# -*- coding: ascii -*-

###########################################################################
# clive, video extraction utility
# Copyright (C) 2007-2008 Toni Gundogdu
#
# clive is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 0.1.2-1307 USA
###########################################################################

## The classes for scanning fed URLs

import gzip
try:
    from cStringIO import StringIO
except ImportError:
    from StringIO import StringIO

try:
    from clive.newt import snack as newt
    from clive.urlgrabber.grabber import URLGrabber, URLGrabError
except ImportError, err:
    raise SystemExit('error: %s' % err)

from clive.unicode import tostr

__all__ = ['Scan']

## The scanning class
class Scan:

    ## Constructor
    def __init__(self, opts, say, proxy, callbacks, cache):
        self._opts = opts
        self._say = say
        self._proxy = proxy
        (self._check_url_cb, self._reset_found_urls_cb,
            self._get_found_urls_cb, self._login_if_needed_cb) = callbacks
        self._cache = cache            

    ## Scan URLs for identifiable video page links
    def scan(self, raw_urls):
        found_videos = {}
        cache_urls = [] # Will contain urls to be stored in the cache
        for url in raw_urls:
            found_urls = []
            if not url.lower().startswith('http://'):
                url = 'http://' + url
            self._say('scan: %s' % url)
            self._say('scanning... ', newline=0)
            try:
                data = self._fetch_page(url)
                # Search for identifiable link strings
                lookup = [
                    ('/watch?v=','youtube.com'),
                    ('youtube.com/v/',''),
                    ('/googleplayer.swf?docid=','video.google.com'),
                    ('/videoplay?docid=','video.google.com'),
                ]
                for (l,host) in lookup:
                    # TODO: put the code below in a method
                    offset = 0
                    while offset != -1:
                        offset = data.lower().find(l, offset)
                        if offset != -1:
                            end = data.find('"', offset)
                            lnk = host + data[offset:end]
                            for e in ['&','<','\n']: # Strip trailing garbage
                                lnk = lnk.split(e,1)[0]
                            _lookup = [('/v/','/watch?v='), # Embed -> video ..
                                ('/googleplayer.swf?docid=', # .. page URL
                                    '/videoplay?docid=')]
                            for (s,r) in _lookup:
                                lnk = lnk.replace(s,r)
                            if lnk not in found_urls:
                                found_urls.append(lnk)
                                i = len(found_urls); s='.'
                                if not i % 5: s = '%d' % i
                                self._say(s, newline=0)
                            offset = end
                self._say(" done.\n")
                """
                for u in found_urls:
                    print '(%s)'%u
                raise SystemExit
                """
                # Log-in
                self._login_if_needed_cb(found_urls)
                # Fetch video page and parse it
                self._reset_found_urls_cb()
                for (i,vurl) in enumerate(found_urls):
                    # Callback stores checked URLs to nomad._found_urls
                    # using v_info dictionaries.
                    self._check_url_cb(vurl, (i,len(found_urls)))
                a = self._get_found_urls_cb()
                if len(a) > 0:
                    found_videos[url] = a
                    # Write cache, done here in case user hits Cancel in
                    # the newt interface. Normally this would be done in
                    # Nomad._show_queue.
                    if self._opts.enable_cache:
                        for (i,v) in enumerate(a):
                            self._cache.write(a[i])
            except URLGrabError, err:
                self._say('%s [%s]' % (err.strerror,url), is_error=1)
        if len(found_videos) == 0:
            raise SystemExit('error: nothing found')
        # Display for selection
        u = DisplayVideos(found_videos, self._opts)
        return u.get_selection()

    def _fetch_page(self, url):
        g = URLGrabber(user_agent = self._opts.http_agent,
            http_headers = (('accept-encoding','gzip'),),
            throttle = self._opts.http_throttle,
            proxies = self._proxy)
        o = g.urlopen(url)
        data = o.read()
        if o.hdr.get('content-encoding') == 'gzip':
            data = gzip.GzipFile(fileobj=StringIO(data)).read()
        o.close()
        return data            

## The class that displays the found videos after a scan
class DisplayVideos:

    ## Constructor
    def __init__(self, videos, opts):
        self._last = None # Selection before hitting "Show .."
        self._videos = videos
        self._opts = opts
        while 1:
            self._selection = self._main()
            if self._selection: break

    ## Returns user selection (sequence)
    def get_selection(self):
        return self._selection

    def _main(self):
        sel = []
        try:
            scr = newt.SnackScreen()
            sel = self._show_videos(scr)
        finally:
            scr.finish()
        return sel

    def _ok(self):
        sel = []
        for url in self._ct.getSelection():
            for d in self._videos:
                for v in self._videos[d]:
                    if v['url'] == url:
                        sel.append(v)
        return sel

    def _cancel(self):
        raise SystemExit('Cancelled.')

    def _resize(self):
        self._last = self._ct.getSelection()
        return None

    def _show_videos(self, scr):
        w = scr.width - (scr.width/6)
        ww = w-12
        ct = newt.CheckboxTree(scr.height/2, scroll=1)
        i = 0
        for (n,d) in sorted(self._videos.items()):
            for v in self._videos[n]:
                t = v['page_title'].replace('YouTube - ','')[:w]
                try:
                    t = tostr(t,self._opts)
                except UnicodeDecodeError, err:
                    continue # Skip it if decoding fails for some reason
                ct.append(t)
                # Recall last state: selected | unselected
                sel = False
                if self._last and v['url'] in self._last:
                    sel = True
                u = v['url'][:ww]
                u += ' (%s)' % v['length']
                ct.addItem(u, (i, newt.snackArgs['append']),
                    v['url'], selected=sel)
                i += 1                    
        g = newt.GridForm(scr, 'Scan: Videos found', 1,2,)
        g.add(ct, col=0, row=0, padding=(0,0,0,1))
        b = newt.ButtonBar(scr,
            [('Resize',2), ('Cancel',0), ('Extract',1)], compact=0)
        g.add(b, col=0, row=1, padding=(0,0,0,0))
        self._ct = ct # Store for self._ok
        d = {0:self._cancel, 1:self._ok, 2:self._resize, None:self._ok}
        return d.get(b.buttonPressed(g.runOnce()))()
