diff options
Diffstat (limited to 'youtube_dl/utils.py')
| -rw-r--r-- | youtube_dl/utils.py | 83 | 
1 files changed, 60 insertions, 23 deletions
| diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index b644f4e92..f8dd9c72d 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -673,6 +673,8 @@ class ExtractorError(Exception):              expected = True          if video_id is not None:              msg = video_id + ': ' + msg +        if cause: +            msg += u' (caused by %r)' % cause          if not expected:              msg = msg + u'; please report this issue on https://yt-dl.org/bug . Be sure to call youtube-dl with the --verbose flag and include its complete output. Make sure you are using the latest version; type  youtube-dl -U  to update.'          super(ExtractorError, self).__init__(msg) @@ -799,6 +801,12 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):                  del req.headers['User-agent']              req.headers['User-agent'] = req.headers['Youtubedl-user-agent']              del req.headers['Youtubedl-user-agent'] + +        if sys.version_info < (2, 7) and '#' in req.get_full_url(): +            # Python 2.6 is brain-dead when it comes to fragments +            req._Request__original = req._Request__original.partition('#')[0] +            req._Request__r_type = req._Request__r_type.partition('#')[0] +          return req      def http_response(self, req, resp): @@ -884,6 +892,7 @@ def unified_strdate(date_str):          '%d/%m/%Y',          '%d/%m/%y',          '%Y/%m/%d %H:%M:%S', +        '%d/%m/%Y %H:%M:%S',          '%Y-%m-%d %H:%M:%S',          '%d.%m.%Y %H:%M',          '%d.%m.%Y %H.%M', @@ -1384,14 +1393,16 @@ def check_executable(exe, args=[]):  class PagedList(object): -    def __init__(self, pagefunc, pagesize): -        self._pagefunc = pagefunc -        self._pagesize = pagesize -      def __len__(self):          # This is only useful for tests          return len(self.getslice()) + +class OnDemandPagedList(PagedList): +    def __init__(self, pagefunc, pagesize): +        self._pagefunc = pagefunc +        self._pagesize = pagesize +      def getslice(self, start=0, end=None):          res = []          for pagenum in itertools.count(start // self._pagesize): @@ -1430,6 +1441,35 @@ class PagedList(object):          return res +class InAdvancePagedList(PagedList): +    def __init__(self, pagefunc, pagecount, pagesize): +        self._pagefunc = pagefunc +        self._pagecount = pagecount +        self._pagesize = pagesize + +    def getslice(self, start=0, end=None): +        res = [] +        start_page = start // self._pagesize +        end_page = ( +            self._pagecount if end is None else (end // self._pagesize + 1)) +        skip_elems = start - start_page * self._pagesize +        only_more = None if end is None else end - start +        for pagenum in range(start_page, end_page): +            page = list(self._pagefunc(pagenum)) +            if skip_elems: +                page = page[skip_elems:] +                skip_elems = None +            if only_more is not None: +                if len(page) < only_more: +                    only_more -= len(page) +                else: +                    page = page[:only_more] +                    res.extend(page) +                    break +            res.extend(page) +        return res + +  def uppercase_escape(s):      unicode_escape = codecs.getdecoder('unicode_escape')      return re.sub( @@ -1540,27 +1580,24 @@ def strip_jsonp(code):  def js_to_json(code):      def fix_kv(m): -        key = m.group(2) -        if key.startswith("'"): -            assert key.endswith("'") -            assert '"' not in key -            key = '"%s"' % key[1:-1] -        elif not key.startswith('"'): -            key = '"%s"' % key - -        value = m.group(4) -        if value.startswith("'"): -            assert value.endswith("'") -            assert '"' not in value -            value = '"%s"' % value[1:-1] - -        return m.group(1) + key + m.group(3) + value +        v = m.group(0) +        if v in ('true', 'false', 'null'): +            return v +        if v.startswith('"'): +            return v +        if v.startswith("'"): +            v = v[1:-1] +            v = re.sub(r"\\\\|\\'|\"", lambda m: { +                '\\\\': '\\\\', +                "\\'": "'", +                '"': '\\"', +            }[m.group(0)], v) +        return '"%s"' % v      res = re.sub(r'''(?x) -            ([{,]\s*) -            ("[^"]*"|\'[^\']*\'|[a-z0-9A-Z]+) -            (:\s*) -            ([0-9.]+|true|false|"[^"]*"|\'[^\']*\'|\[|\{) +        "(?:[^"\\]*(?:\\\\|\\")?)*"| +        '(?:[^'\\]*(?:\\\\|\\')?)*'| +        [a-zA-Z_][a-zA-Z_0-9]*          ''', fix_kv, code)      res = re.sub(r',(\s*\])', lambda m: m.group(1), res)      return res | 
