diff options
32 files changed, 396 insertions, 204 deletions
| @@ -534,7 +534,7 @@ Either prepend `http://www.youtube.com/watch?v=` or separate the ID from the opt  ### How can I detect whether a given URL is supported by youtube-dl? -For one, have a look at the [list of supported sites](docs/supportedsites). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug. +For one, have a look at the [list of supported sites](docs/supportedsites.md). Note that it can sometimes happen that the site changes its URL scheme (say, from http://example.com/v/1234567 to http://example.com/v/1234567 ) and youtube-dl reports an URL of a service in that list as unsupported. In that case, simply report a bug.  It is *not* possible to detect whether a URL is supported or not. That's because youtube-dl contains a generic extractor which matches **all** URLs. You may be tempted to disable, exclude, or remove the generic extractor, but the generic extractor not only allows users to extract videos from lots of websites that embed a video from another service, but may also be used to extract video from a service that it's hosting itself. Therefore, we neither recommend nor support disabling, excluding, or removing the generic extractor. diff --git a/test/test_download.py b/test/test_download.py index 412f3dbce..6a149ae4f 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -89,7 +89,7 @@ def generator(test_case):          for tc in test_cases:              info_dict = tc.get('info_dict', {}) -            if not tc.get('file') and not (info_dict.get('id') and info_dict.get('ext')): +            if not (info_dict.get('id') and info_dict.get('ext')):                  raise Exception('Test definition incorrect. The output file cannot be known. Are both \'id\' and \'ext\' keys present?')          if 'skip' in test_case: @@ -116,7 +116,7 @@ def generator(test_case):          expect_warnings(ydl, test_case.get('expected_warnings', []))          def get_tc_filename(tc): -            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {})) +            return ydl.prepare_filename(tc.get('info_dict', {}))          res_dict = None diff --git a/test/test_jsinterp.py b/test/test_jsinterp.py new file mode 100644 index 000000000..b91b8c492 --- /dev/null +++ b/test/test_jsinterp.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python + +from __future__ import unicode_literals + +# Allow direct execution +import os +import sys +import unittest +sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from youtube_dl.jsinterp import JSInterpreter + + +class TestJSInterpreter(unittest.TestCase): +    def test_basic(self): +        jsi = JSInterpreter('function x(){;}') +        self.assertEqual(jsi.call_function('x'), None) + +        jsi = JSInterpreter('function x3(){return 42;}') +        self.assertEqual(jsi.call_function('x3'), 42) + +    def test_calc(self): +        jsi = JSInterpreter('function x4(a){return 2*a+1;}') +        self.assertEqual(jsi.call_function('x4', 3), 7) + +    def test_empty_return(self): +        jsi = JSInterpreter('function f(){return; y()}') +        self.assertEqual(jsi.call_function('f'), None) + +    def test_morespace(self): +        jsi = JSInterpreter('function x (a) { return 2 * a + 1 ; }') +        self.assertEqual(jsi.call_function('x', 3), 7) + +        jsi = JSInterpreter('function f () { x =  2  ; return x; }') +        self.assertEqual(jsi.call_function('f'), 2) + +    def test_strange_chars(self): +        jsi = JSInterpreter('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }') +        self.assertEqual(jsi.call_function('$_xY1', 20), 21) + +    def test_operators(self): +        jsi = JSInterpreter('function f(){return 1 << 5;}') +        self.assertEqual(jsi.call_function('f'), 32) + +        jsi = JSInterpreter('function f(){return 19 & 21;}') +        self.assertEqual(jsi.call_function('f'), 17) + +        jsi = JSInterpreter('function f(){return 11 >> 2;}') +        self.assertEqual(jsi.call_function('f'), 2) + +    def test_array_access(self): +        jsi = JSInterpreter('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2] = 7; return x;}') +        self.assertEqual(jsi.call_function('f'), [5, 2, 7]) + +    def test_parens(self): +        jsi = JSInterpreter('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}') +        self.assertEqual(jsi.call_function('f'), 7) + +        jsi = JSInterpreter('function f(){return (1 + 2) * 3;}') +        self.assertEqual(jsi.call_function('f'), 9) + +    def test_assignments(self): +        jsi = JSInterpreter('function f(){var x = 20; x = 30 + 1; return x;}') +        self.assertEqual(jsi.call_function('f'), 31) + +        jsi = JSInterpreter('function f(){var x = 20; x += 30 + 1; return x;}') +        self.assertEqual(jsi.call_function('f'), 51) + +        jsi = JSInterpreter('function f(){var x = 20; x -= 30 + 1; return x;}') +        self.assertEqual(jsi.call_function('f'), -11) + +    def test_comments(self): +        jsi = JSInterpreter(''' +        function x() { +            var x = /* 1 + */ 2; +            var y = /* 30 +            * 40 */ 50; +            return x + y; +        } +        ''') +        self.assertEqual(jsi.call_function('x'), 52) + +    def test_precedence(self): +        jsi = JSInterpreter(''' +        function x() { +            var a = [10, 20, 30, 40, 50]; +            var b = 6; +            a[0]=a[b%a.length]; +            return a; +        }''') +        self.assertEqual(jsi.call_function('x'), [20, 20, 30, 40, 50]) + + +if __name__ == '__main__': +    unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index e5a96cad5..c18ce9660 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -25,6 +25,7 @@ if os.name == 'nt':      import ctypes  from .compat import ( +    compat_basestring,      compat_cookiejar,      compat_expanduser,      compat_http_client, @@ -1558,7 +1559,7 @@ class YoutubeDL(object):          # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)          # To work around aforementioned issue we will replace request's original URL with          # percent-encoded one -        req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str) +        req_is_string = isinstance(req, compat_basestring)          url = req if req_is_string else req.get_full_url()          url_escaped = escape_url(url) diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index 497ca52de..e989cdbbd 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -114,6 +114,26 @@ except ImportError:              string += pct_sequence.decode(encoding, errors)          return string +try: +    compat_str = unicode  # Python 2 +except NameError: +    compat_str = str + +try: +    compat_basestring = basestring  # Python 2 +except NameError: +    compat_basestring = str + +try: +    compat_chr = unichr  # Python 2 +except NameError: +    compat_chr = chr + +try: +    from xml.etree.ElementTree import ParseError as compat_xml_parse_error +except ImportError:  # Python 2.6 +    from xml.parsers.expat import ExpatError as compat_xml_parse_error +  try:      from urllib.parse import parse_qs as compat_parse_qs @@ -123,7 +143,7 @@ except ImportError:  # Python 2      def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,                     encoding='utf-8', errors='replace'): -        qs, _coerce_result = qs, unicode +        qs, _coerce_result = qs, compat_str          pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]          r = []          for name_value in pairs: @@ -163,21 +183,6 @@ except ImportError:  # Python 2          return parsed_result  try: -    compat_str = unicode  # Python 2 -except NameError: -    compat_str = str - -try: -    compat_chr = unichr  # Python 2 -except NameError: -    compat_chr = chr - -try: -    from xml.etree.ElementTree import ParseError as compat_xml_parse_error -except ImportError:  # Python 2.6 -    from xml.parsers.expat import ExpatError as compat_xml_parse_error - -try:      from shlex import quote as shlex_quote  except ImportError:  # Python < 3.3      def shlex_quote(s): @@ -362,6 +367,7 @@ def workaround_optparse_bug9161():  __all__ = [      'compat_HTTPError', +    'compat_basestring',      'compat_chr',      'compat_cookiejar',      'compat_expanduser', diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index aa58b52ab..e527ee425 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -11,6 +11,7 @@ from ..compat import (      compat_urllib_request,  )  from ..utils import ( +    encodeArgument,      encodeFilename,  ) @@ -21,23 +22,22 @@ class HlsFD(FileDownloader):          self.report_destination(filename)          tmpfilename = self.temp_name(filename) -        args = [ -            '-y', '-i', url, '-f', 'mp4', '-c', 'copy', -            '-bsf:a', 'aac_adtstoasc', -            encodeFilename(tmpfilename, for_subprocess=True)] -          ffpp = FFmpegPostProcessor(downloader=self)          program = ffpp._executable          if program is None:              self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')              return False          ffpp.check_version() -        cmd = [program] + args -        retval = subprocess.call(cmd) +        args = [ +            encodeArgument(opt) +            for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] +        args.append(encodeFilename(tmpfilename, True)) + +        retval = subprocess.call(args)          if retval == 0:              fsize = os.path.getsize(encodeFilename(tmpfilename)) -            self.to_screen('\r[%s] %s bytes' % (cmd[0], fsize)) +            self.to_screen('\r[%s] %s bytes' % (args[0], fsize))              self.try_rename(tmpfilename, filename)              self._hook_progress({                  'downloaded_bytes': fsize, diff --git a/youtube_dl/extractor/ard.py b/youtube_dl/extractor/ard.py index 967bd865c..783b53e23 100644 --- a/youtube_dl/extractor/ard.py +++ b/youtube_dl/extractor/ard.py @@ -23,13 +23,7 @@ class ARDMediathekIE(InfoExtractor):      _TESTS = [{          'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', -        'file': '22429276.mp4', -        'md5': '469751912f1de0816a9fc9df8336476c', -        'info_dict': { -            'title': 'Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?', -            'description': 'Das Erste Mediathek [ARD]: Vertrauen ist gut, Spionieren ist besser - Geht so deutsch-amerikanische Freundschaft?, Anne Will, Über die Spionage-Affäre diskutieren Clemens Binninger, Katrin Göring-Eckardt, Georg Mascolo, Andrew B. Denison und Constanze Kurz.. Das Video zur Sendung Anne Will am Mittwoch, 16.07.2014', -        }, -        'skip': 'Blocked outside of Germany', +        'only_matching': True,      }, {          'url': 'http://www.ardmediathek.de/tv/Tatort/Das-Wunder-von-Wolbeck-Video-tgl-ab-20/Das-Erste/Video?documentId=22490580&bcastId=602916',          'info_dict': { diff --git a/youtube_dl/extractor/defense.py b/youtube_dl/extractor/defense.py index 5e50c63d9..2b90bf4fc 100644 --- a/youtube_dl/extractor/defense.py +++ b/youtube_dl/extractor/defense.py @@ -1,40 +1,38 @@  from __future__ import unicode_literals -import re -import json -  from .common import InfoExtractor  class DefenseGouvFrIE(InfoExtractor):      IE_NAME = 'defense.gouv.fr' -    _VALID_URL = (r'http://.*?\.defense\.gouv\.fr/layout/set/' -                  r'ligthboxvideo/base-de-medias/webtv/(.*)') +    _VALID_URL = r'http://.*?\.defense\.gouv\.fr/layout/set/ligthboxvideo/base-de-medias/webtv/(?P<id>[^/?#]*)'      _TEST = {          'url': 'http://www.defense.gouv.fr/layout/set/ligthboxvideo/base-de-medias/webtv/attaque-chimique-syrienne-du-21-aout-2013-1', -        'file': '11213.mp4',          'md5': '75bba6124da7e63d2d60b5244ec9430c', -        "info_dict": { -            "title": "attaque-chimique-syrienne-du-21-aout-2013-1" +        'info_dict': { +            'id': '11213', +            'ext': 'mp4', +            'title': 'attaque-chimique-syrienne-du-21-aout-2013-1'          }      }      def _real_extract(self, url): -        title = re.match(self._VALID_URL, url).group(1) +        title = self._match_id(url)          webpage = self._download_webpage(url, title) +          video_id = self._search_regex(              r"flashvars.pvg_id=\"(\d+)\";",              webpage, 'ID')          json_url = ('http://static.videos.gouv.fr/brightcovehub/export/json/'                      + video_id) -        info = self._download_webpage(json_url, title, -                                      'Downloading JSON config') -        video_url = json.loads(info)['renditions'][0]['url'] - -        return {'id': video_id, -                'ext': 'mp4', -                'url': video_url, -                'title': title, -                } +        info = self._download_json(json_url, title, 'Downloading JSON config') +        video_url = info['renditions'][0]['url'] + +        return { +            'id': video_id, +            'ext': 'mp4', +            'url': video_url, +            'title': title, +        } diff --git a/youtube_dl/extractor/francetv.py b/youtube_dl/extractor/francetv.py index bbc760a49..170d68075 100644 --- a/youtube_dl/extractor/francetv.py +++ b/youtube_dl/extractor/francetv.py @@ -230,12 +230,13 @@ class FranceTVIE(FranceTVBaseInfoExtractor):  class GenerationQuoiIE(InfoExtractor):      IE_NAME = 'france2.fr:generation-quoi' -    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<name>.*)(\?|$)' +    _VALID_URL = r'https?://generation-quoi\.france2\.fr/portrait/(?P<id>[^/?#]+)'      _TEST = {          'url': 'http://generation-quoi.france2.fr/portrait/garde-a-vous', -        'file': 'k7FJX8VBcvvLmX4wA5Q.mp4',          'info_dict': { +            'id': 'k7FJX8VBcvvLmX4wA5Q', +            'ext': 'mp4',              'title': 'Génération Quoi - Garde à Vous',              'uploader': 'Génération Quoi',          }, @@ -243,14 +244,12 @@ class GenerationQuoiIE(InfoExtractor):              # It uses Dailymotion              'skip_download': True,          }, -        'skip': 'Only available from France',      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        name = mobj.group('name') -        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % name) -        info_json = self._download_webpage(info_url, name) +        display_id = self._match_id(url) +        info_url = compat_urlparse.urljoin(url, '/medias/video/%s.json' % display_id) +        info_json = self._download_webpage(info_url, display_id)          info = json.loads(info_json)          return self.url_result('http://www.dailymotion.com/video/%s' % info['id'],                                 ie='Dailymotion') diff --git a/youtube_dl/extractor/globo.py b/youtube_dl/extractor/globo.py index 6949a57c7..29638a194 100644 --- a/youtube_dl/extractor/globo.py +++ b/youtube_dl/extractor/globo.py @@ -70,6 +70,19 @@ class GloboIE(InfoExtractor):                  'like_count': int,              }          }, +        { +            'url': 'http://globotv.globo.com/canal-brasil/sangue-latino/t/todos-os-videos/v/ator-e-diretor-argentino-ricado-darin-fala-sobre-utopias-e-suas-perdas/3928201/', +            'md5': 'c1defca721ce25b2354e927d3e4b3dec', +            'info_dict': { +                'id': '3928201', +                'ext': 'mp4', +                'title': 'Ator e diretor argentino, Ricado Darín fala sobre utopias e suas perdas', +                'duration': 1472.906, +                'uploader': 'Canal Brasil', +                'uploader_id': 705, +                'like_count': int, +            } +        },      ]      class MD5(): @@ -381,11 +394,16 @@ class GloboIE(InfoExtractor):              signed_md5 = self.MD5.b64_md5(received_md5 + compat_str(sign_time) + padding)              signed_hash = hash_code + compat_str(received_time) + received_random + compat_str(sign_time) + padding + signed_md5 -            formats.append({ -                'url': '%s?h=%s&k=%s' % (resource['url'], signed_hash, 'flash'), -                'format_id': resource_id, -                'height': resource['height'] -            }) +            resource_url = resource['url'] +            signed_url = '%s?h=%s&k=%s' % (resource_url, signed_hash, 'flash') +            if resource_id.endswith('m3u8') or resource_url.endswith('.m3u8'): +                formats.extend(self._extract_m3u8_formats(signed_url, resource_id, 'mp4')) +            else: +                formats.append({ +                    'url': signed_url, +                    'format_id': resource_id, +                    'height': resource.get('height'), +                })          self._sort_formats(formats) diff --git a/youtube_dl/extractor/kankan.py b/youtube_dl/extractor/kankan.py index dbfe4cc03..364dc878e 100644 --- a/youtube_dl/extractor/kankan.py +++ b/youtube_dl/extractor/kankan.py @@ -13,17 +13,17 @@ class KankanIE(InfoExtractor):      _TEST = {          'url': 'http://yinyue.kankan.com/vod/48/48863.shtml', -        'file': '48863.flv',          'md5': '29aca1e47ae68fc28804aca89f29507e',          'info_dict': { +            'id': '48863', +            'ext': 'flv',              'title': 'Ready To Go',          },          'skip': 'Only available from China',      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id)          title = self._search_regex(r'(?:G_TITLE=|G_MOVIE_TITLE = )[\'"](.+?)[\'"]', webpage, 'video title') diff --git a/youtube_dl/extractor/keezmovies.py b/youtube_dl/extractor/keezmovies.py index 97dcb518a..82eddec51 100644 --- a/youtube_dl/extractor/keezmovies.py +++ b/youtube_dl/extractor/keezmovies.py @@ -7,10 +7,6 @@ from .common import InfoExtractor  from ..compat import (      compat_urllib_parse_urlparse,      compat_urllib_request, -    compat_urllib_parse, -) -from ..aes import ( -    aes_decrypt_text  ) @@ -18,9 +14,10 @@ class KeezMoviesIE(InfoExtractor):      _VALID_URL = r'https?://(?:www\.)?keezmovies\.com/video/.+?(?P<id>[0-9]+)(?:[/?&]|$)'      _TEST = {          'url': 'http://www.keezmovies.com/video/petite-asian-lady-mai-playing-in-bathtub-1214711', -        'file': '1214711.mp4',          'md5': '6e297b7e789329923fcf83abb67c9289',          'info_dict': { +            'id': '1214711', +            'ext': 'mp4',              'title': 'Petite Asian Lady Mai Playing In Bathtub',              'age_limit': 18,          } @@ -39,11 +36,10 @@ class KeezMoviesIE(InfoExtractor):              embedded_url = mobj.group(1)              return self.url_result(embedded_url) -        video_title = self._html_search_regex(r'<h1 [^>]*>([^<]+)', webpage, 'title') -        video_url = compat_urllib_parse.unquote(self._html_search_regex(r'video_url=(.+?)&', webpage, 'video_url')) -        if 'encrypted=true' in webpage: -            password = self._html_search_regex(r'video_title=(.+?)&', webpage, 'password') -            video_url = aes_decrypt_text(video_url, password, 32).decode('utf-8') +        video_title = self._html_search_regex( +            r'<h1 [^>]*>([^<]+)', webpage, 'title') +        video_url = self._html_search_regex( +            r'(?s)html5VideoPlayer = .*?src="([^"]+)"', webpage, 'video URL')          path = compat_urllib_parse_urlparse(video_url).path          extension = os.path.splitext(path)[1][1:]          format = path.split('/')[4].split('_')[:2] diff --git a/youtube_dl/extractor/la7.py b/youtube_dl/extractor/la7.py index db2028e9f..b08f6e3c9 100644 --- a/youtube_dl/extractor/la7.py +++ b/youtube_dl/extractor/la7.py @@ -1,7 +1,5 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      parse_duration, @@ -20,9 +18,10 @@ class LA7IE(InfoExtractor):      _TEST = {          'url': 'http://www.la7.tv/richplayer/?assetid=50355319', -        'file': '50355319.mp4',          'md5': 'ec7d1f0224d20ba293ab56cf2259651f',          'info_dict': { +            'id': '50355319', +            'ext': 'mp4',              'title': 'IL DIVO',              'description': 'Un film di Paolo Sorrentino con Toni Servillo, Anna Bonaiuto, Giulio Bosetti  e Flavio Bucci',              'duration': 6254, @@ -31,9 +30,7 @@ class LA7IE(InfoExtractor):      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - +        video_id = self._match_id(url)          xml_url = 'http://www.la7.tv/repliche/content/index.php?contentId=%s' % video_id          doc = self._download_xml(xml_url, video_id) diff --git a/youtube_dl/extractor/macgamestore.py b/youtube_dl/extractor/macgamestore.py index b818cf50c..3cd4a3a19 100644 --- a/youtube_dl/extractor/macgamestore.py +++ b/youtube_dl/extractor/macgamestore.py @@ -1,7 +1,5 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import ExtractorError @@ -13,21 +11,22 @@ class MacGameStoreIE(InfoExtractor):      _TEST = {          'url': 'http://www.macgamestore.com/mediaviewer.php?trailer=2450', -        'file': '2450.m4v',          'md5': '8649b8ea684b6666b4c5be736ecddc61',          'info_dict': { +            'id': '2450', +            'ext': 'm4v',              'title': 'Crow',          }      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        video_id = mobj.group('id') - -        webpage = self._download_webpage(url, video_id, 'Downloading trailer page') +        video_id = self._match_id(url) +        webpage = self._download_webpage( +            url, video_id, 'Downloading trailer page') -        if re.search(r'>Missing Media<', webpage) is not None: -            raise ExtractorError('Trailer %s does not exist' % video_id, expected=True) +        if '>Missing Media<' in webpage: +            raise ExtractorError( +                'Trailer %s does not exist' % video_id, expected=True)          video_title = self._html_search_regex(              r'<title>MacGameStore: (.*?) Trailer</title>', webpage, 'title') diff --git a/youtube_dl/extractor/mpora.py b/youtube_dl/extractor/mpora.py index 88c9501cd..6db3c67a5 100644 --- a/youtube_dl/extractor/mpora.py +++ b/youtube_dl/extractor/mpora.py @@ -1,21 +1,19 @@  from __future__ import unicode_literals -import json -import re -  from .common import InfoExtractor  from ..utils import int_or_none  class MporaIE(InfoExtractor): -    _VALID_URL = r'^https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)' +    _VALID_URL = r'https?://(www\.)?mpora\.(?:com|de)/videos/(?P<id>[^?#/]+)'      IE_NAME = 'MPORA'      _TEST = {          'url': 'http://mpora.de/videos/AAdo8okx4wiz/embed?locale=de', -        'file': 'AAdo8okx4wiz.mp4',          'md5': 'a7a228473eedd3be741397cf452932eb',          'info_dict': { +            'id': 'AAdo8okx4wiz', +            'ext': 'mp4',              'title': 'Katy Curd -  Winter in the Forest',              'duration': 416,              'uploader': 'Peter Newman Media', @@ -23,14 +21,12 @@ class MporaIE(InfoExtractor):      }      def _real_extract(self, url): -        m = re.match(self._VALID_URL, url) -        video_id = m.group('id') - +        video_id = self._match_id(url)          webpage = self._download_webpage(url, video_id) +          data_json = self._search_regex(              r"new FM\.Player\('[^']+',\s*(\{.*?)\).player;", webpage, 'json') - -        data = json.loads(data_json) +        data = self._parse_json(data_json, video_id)          uploader = data['info_overlay'].get('username')          duration = data['video']['duration'] // 1000 diff --git a/youtube_dl/extractor/mtv.py b/youtube_dl/extractor/mtv.py index 7a3b62ebe..bc7f49ebb 100644 --- a/youtube_dl/extractor/mtv.py +++ b/youtube_dl/extractor/mtv.py @@ -79,12 +79,15 @@ class MTVServicesInfoExtractor(SubtitlesInfoExtractor):              try:                  _, _, ext = rendition.attrib['type'].partition('/')                  rtmp_video_url = rendition.find('./src').text -                formats.append({'ext': ext, -                                'url': self._transform_rtmp_url(rtmp_video_url), -                                'format_id': rendition.get('bitrate'), -                                'width': int(rendition.get('width')), -                                'height': int(rendition.get('height')), -                                }) +                if rtmp_video_url.endswith('siteunavail.png'): +                    continue +                formats.append({ +                    'ext': ext, +                    'url': self._transform_rtmp_url(rtmp_video_url), +                    'format_id': rendition.get('bitrate'), +                    'width': int(rendition.get('width')), +                    'height': int(rendition.get('height')), +                })              except (KeyError, TypeError):                  raise ExtractorError('Invalid rendition field.')          self._sort_formats(formats) @@ -240,25 +243,14 @@ class MTVIE(MTVServicesInfoExtractor):      _TESTS = [          {              'url': 'http://www.mtv.com/videos/misc/853555/ours-vh1-storytellers.jhtml', -            'file': '853555.mp4',              'md5': '850f3f143316b1e71fa56a4edfd6e0f8',              'info_dict': { +                'id': '853555', +                'ext': 'mp4',                  'title': 'Taylor Swift - "Ours (VH1 Storytellers)"',                  'description': 'Album: Taylor Swift performs "Ours" for VH1 Storytellers at Harvey Mudd College.',              },          }, -        { -            'add_ie': ['Vevo'], -            'url': 'http://www.mtv.com/videos/taylor-swift/916187/everything-has-changed-ft-ed-sheeran.jhtml', -            'file': 'USCJY1331283.mp4', -            'md5': '73b4e7fcadd88929292fe52c3ced8caf', -            'info_dict': { -                'title': 'Everything Has Changed', -                'upload_date': '20130606', -                'uploader': 'Taylor Swift', -            }, -            'skip': 'VEVO is only available in some countries', -        },      ]      def _get_thumbnail_url(self, uri, itemdoc): @@ -272,8 +264,8 @@ class MTVIE(MTVServicesInfoExtractor):              webpage = self._download_webpage(url, video_id)              # Some videos come from Vevo.com -            m_vevo = re.search(r'isVevoVideo = true;.*?vevoVideoId = "(.*?)";', -                               webpage, re.DOTALL) +            m_vevo = re.search( +                r'(?s)isVevoVideo = true;.*?vevoVideoId = "(.*?)";', webpage)              if m_vevo:                  vevo_id = m_vevo.group(1)                  self.to_screen('Vevo video detected: %s' % vevo_id) diff --git a/youtube_dl/extractor/nerdcubed.py b/youtube_dl/extractor/nerdcubed.py index efc903afa..dff78e486 100644 --- a/youtube_dl/extractor/nerdcubed.py +++ b/youtube_dl/extractor/nerdcubed.py @@ -11,6 +11,7 @@ class NerdCubedFeedIE(InfoExtractor):      _TEST = {          'url': 'http://www.nerdcubed.co.uk/feed.json',          'info_dict': { +            'id': 'nerdcubed-feed',              'title': 'nerdcubed.co.uk feed',          },          'playlist_mincount': 1300, diff --git a/youtube_dl/extractor/ringtv.py b/youtube_dl/extractor/ringtv.py index 59dc137cc..efa4afeb6 100644 --- a/youtube_dl/extractor/ringtv.py +++ b/youtube_dl/extractor/ringtv.py @@ -6,12 +6,13 @@ from .common import InfoExtractor  class RingTVIE(InfoExtractor): -    _VALID_URL = r'(?:http://)?(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)' +    _VALID_URL = r'http://(?:www\.)?ringtv\.craveonline\.com/(?P<type>news|videos/video)/(?P<id>[^/?#]+)'      _TEST = {          "url": "http://ringtv.craveonline.com/news/310833-luis-collazo-says-victor-ortiz-better-not-quit-on-jan-30", -        "file": "857645.mp4",          "md5": "d25945f5df41cdca2d2587165ac28720",          "info_dict": { +            'id': '857645', +            'ext': 'mp4',              "title": 'Video: Luis Collazo says Victor Ortiz "better not quit on Jan. 30" - Ring TV',              "description": 'Luis Collazo is excited about his Jan. 30 showdown with fellow former welterweight titleholder Victor Ortiz at Barclays Center in his hometown of Brooklyn. The SuperBowl week fight headlines a Golden Boy Live! card on Fox Sports 1.',          } diff --git a/youtube_dl/extractor/rottentomatoes.py b/youtube_dl/extractor/rottentomatoes.py index c1500b82f..e8bb20a08 100644 --- a/youtube_dl/extractor/rottentomatoes.py +++ b/youtube_dl/extractor/rottentomatoes.py @@ -10,8 +10,9 @@ class RottenTomatoesIE(VideoDetectiveIE):      _TEST = {          'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', -        'file': '613340.mp4',          'info_dict': { +            'id': '613340', +            'ext': 'mp4',              'title': 'TOY STORY 3',              'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.',          }, diff --git a/youtube_dl/extractor/rutv.py b/youtube_dl/extractor/rutv.py index a73e6f331..ef766237b 100644 --- a/youtube_dl/extractor/rutv.py +++ b/youtube_dl/extractor/rutv.py @@ -162,10 +162,8 @@ class RUTVIE(InfoExtractor):                          'vbr': int(quality),                      }                  elif transport == 'm3u8': -                    fmt = { -                        'url': url, -                        'ext': 'mp4', -                    } +                    formats.extend(self._extract_m3u8_formats(url, video_id, 'mp4')) +                    continue                  else:                      fmt = {                          'url': url diff --git a/youtube_dl/extractor/servingsys.py b/youtube_dl/extractor/servingsys.py index 16dc3736b..c013d678f 100644 --- a/youtube_dl/extractor/servingsys.py +++ b/youtube_dl/extractor/servingsys.py @@ -1,7 +1,5 @@  from __future__ import unicode_literals -import re -  from .common import InfoExtractor  from ..utils import (      int_or_none, @@ -13,10 +11,15 @@ class ServingSysIE(InfoExtractor):      _TEST = {          'url': 'http://bs.serving-sys.com/BurstingPipe/adServer.bs?cn=is&c=23&pl=VAST&pli=5349193&PluID=0&pos=7135&ord=[timestamp]&cim=1?', +        'info_dict': { +            'id': '5349193', +            'title': 'AdAPPter_Hyundai_demo', +        },          'playlist': [{ -            'file': '29955898.flv',              'md5': 'baed851342df6846eb8677a60a011a0f',              'info_dict': { +                'id': '29955898', +                'ext': 'flv',                  'title': 'AdAPPter_Hyundai_demo (1)',                  'duration': 74,                  'tbr': 1378, @@ -24,9 +27,10 @@ class ServingSysIE(InfoExtractor):                  'height': 400,              },          }, { -            'file': '29907998.flv',              'md5': '979b4da2655c4bc2d81aeb915a8c5014',              'info_dict': { +                'id': '29907998', +                'ext': 'flv',                  'title': 'AdAPPter_Hyundai_demo (2)',                  'duration': 34,                  'width': 854, @@ -37,14 +41,13 @@ class ServingSysIE(InfoExtractor):          'params': {              'playlistend': 2,          }, -        'skip': 'Blocked in the US [sic]', +        '_skip': 'Blocked in the US [sic]',      }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        pl_id = mobj.group('id') - +        pl_id = self._match_id(url)          vast_doc = self._download_xml(url, pl_id) +          title = vast_doc.find('.//AdTitle').text          media = vast_doc.find('.//MediaFile').text          info_url = self._search_regex(r'&adData=([^&]+)&', media, 'info URL') diff --git a/youtube_dl/extractor/sina.py b/youtube_dl/extractor/sina.py index a63d126d4..0891a441f 100644 --- a/youtube_dl/extractor/sina.py +++ b/youtube_dl/extractor/sina.py @@ -11,7 +11,7 @@ from ..compat import (  class SinaIE(InfoExtractor): -    _VALID_URL = r'''https?://(.*?\.)?video\.sina\.com\.cn/ +    _VALID_URL = r'''(?x)https?://(.*?\.)?video\.sina\.com\.cn/                          (                              (.+?/(((?P<pseudo_id>\d+).html)|(.*?(\#|(vid=)|b/)(?P<id>\d+?)($|&|\-))))                              | @@ -23,9 +23,10 @@ class SinaIE(InfoExtractor):      _TESTS = [          {              'url': 'http://video.sina.com.cn/news/vlist/zt/chczlj2013/?opsubject_id=top12#110028898', -            'file': '110028898.flv',              'md5': 'd65dd22ddcf44e38ce2bf58a10c3e71f',              'info_dict': { +                'id': '110028898', +                'ext': 'flv',                  'title': '《中国新闻》 朝鲜要求巴拿马立即释放被扣船员',              }          }, @@ -39,10 +40,6 @@ class SinaIE(InfoExtractor):          },      ] -    @classmethod -    def suitable(cls, url): -        return re.match(cls._VALID_URL, url, flags=re.VERBOSE) is not None -      def _extract_video(self, video_id):          data = compat_urllib_parse.urlencode({'vid': video_id})          url_doc = self._download_xml('http://v.iask.com/v_play.php?%s' % data, @@ -59,7 +56,7 @@ class SinaIE(InfoExtractor):                  }      def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url, flags=re.VERBOSE) +        mobj = re.match(self._VALID_URL, url)          video_id = mobj.group('id')          if mobj.group('token') is not None:              # The video id is in the redirected url diff --git a/youtube_dl/extractor/soundcloud.py b/youtube_dl/extractor/soundcloud.py index 5d60c4939..c5284fa67 100644 --- a/youtube_dl/extractor/soundcloud.py +++ b/youtube_dl/extractor/soundcloud.py @@ -246,6 +246,7 @@ class SoundcloudSetIE(SoundcloudIE):      _TESTS = [{          'url': 'https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep',          'info_dict': { +            'id': '2284613',              'title': 'The Royal Concept EP',          },          'playlist_mincount': 6, @@ -279,7 +280,7 @@ class SoundcloudSetIE(SoundcloudIE):          return {              '_type': 'playlist',              'entries': [self._extract_info_dict(track, secret_token=token) for track in info['tracks']], -            'id': info['id'], +            'id': '%s' % info['id'],              'title': info['title'],          } diff --git a/youtube_dl/extractor/teamcoco.py b/youtube_dl/extractor/teamcoco.py index 5fa67eb8d..18a823719 100644 --- a/youtube_dl/extractor/teamcoco.py +++ b/youtube_dl/extractor/teamcoco.py @@ -10,17 +10,19 @@ class TeamcocoIE(InfoExtractor):      _TESTS = [          {              'url': 'http://teamcoco.com/video/80187/conan-becomes-a-mary-kay-beauty-consultant', -            'file': '80187.mp4',              'md5': '3f7746aa0dc86de18df7539903d399ea',              'info_dict': { +                'id': '80187', +                'ext': 'mp4',                  'title': 'Conan Becomes A Mary Kay Beauty Consultant',                  'description': 'Mary Kay is perhaps the most trusted name in female beauty, so of course Conan is a natural choice to sell their products.'              }          }, {              'url': 'http://teamcoco.com/video/louis-ck-interview-george-w-bush', -            'file': '19705.mp4',              'md5': 'cde9ba0fa3506f5f017ce11ead928f9a',              'info_dict': { +                'id': '19705', +                'ext': 'mp4',                  "description": "Louis C.K. got starstruck by George W. Bush, so what? Part one.",                  "title": "Louis C.K. Interview Pt. 1 11/3/11"              } @@ -36,7 +38,7 @@ class TeamcocoIE(InfoExtractor):          video_id = mobj.group("video_id")          if not video_id:              video_id = self._html_search_regex( -                r'data-node-id="(\d+?)"', +                r'<div\s+class="player".*?data-id="(\d+?)"',                  webpage, 'video id')          data_url = 'http://teamcoco.com/cvp/2.0/%s.xml' % video_id diff --git a/youtube_dl/extractor/teletask.py b/youtube_dl/extractor/teletask.py index e54145105..b9e2ef8ca 100644 --- a/youtube_dl/extractor/teletask.py +++ b/youtube_dl/extractor/teletask.py @@ -11,6 +11,7 @@ class TeleTaskIE(InfoExtractor):      _TEST = {          'url': 'http://www.tele-task.de/archive/video/html5/26168/',          'info_dict': { +            'id': '26168',              'title': 'Duplicate Detection',          },          'playlist': [{ @@ -34,7 +35,6 @@ class TeleTaskIE(InfoExtractor):      def _real_extract(self, url):          lecture_id = self._match_id(url) -          webpage = self._download_webpage(url, lecture_id)          title = self._html_search_regex( diff --git a/youtube_dl/extractor/toutv.py b/youtube_dl/extractor/toutv.py index 2837f9c8e..4797d1310 100644 --- a/youtube_dl/extractor/toutv.py +++ b/youtube_dl/extractor/toutv.py @@ -16,8 +16,9 @@ class TouTvIE(InfoExtractor):      _TEST = {          'url': 'http://www.tou.tv/30-vies/S04E41', -        'file': '30-vies_S04E41.mp4',          'info_dict': { +            'id': '30-vies_S04E41', +            'ext': 'mp4',              'title': '30 vies Saison 4 / Épisode 41',              'description': 'md5:da363002db82ccbe4dafeb9cab039b09',              'age_limit': 8, diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index 06b0bed41..1bb473514 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -501,9 +501,10 @@ class VimeoReviewIE(InfoExtractor):      _VALID_URL = r'https?://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'      _TESTS = [{          'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d', -        'file': '75524534.mp4',          'md5': 'c507a72f780cacc12b2248bb4006d253',          'info_dict': { +            'id': '75524534', +            'ext': 'mp4',              'title': "DICK HARDWICK 'Comedian'",              'uploader': 'Richard Hardwick',          } diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index 313b9c15d..c90488500 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -71,6 +71,9 @@ class WDRIE(InfoExtractor):          {              'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html',              'playlist_mincount': 146, +            'info_dict': { +                'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', +            }          }      ] diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3b3678c6e..e4b26b84f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1160,6 +1160,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):      }, {          'url': 'https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',          'info_dict': { +            'id': 'PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx',              'title': 'YDL_Empty_List',          },          'playlist_count': 0, @@ -1168,6 +1169,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          'url': 'https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',          'info_dict': {              'title': '29C3: Not my department', +            'id': 'PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC',          },          'playlist_count': 95,      }, { @@ -1175,6 +1177,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          'url': 'PLBB231211A4F62143',          'info_dict': {              'title': '[OLD]Team Fortress 2 (Class-based LP)', +            'id': 'PLBB231211A4F62143',          },          'playlist_mincount': 26,      }, { @@ -1182,12 +1185,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          'url': 'https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q',          'info_dict': {              'title': 'Uploads from Cauchemar', +            'id': 'UUBABnxM4Ar9ten8Mdjj1j0Q',          },          'playlist_mincount': 799,      }, {          'url': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',          'info_dict': {              'title': 'YDL_safe_search', +            'id': 'PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl',          },          'playlist_count': 2,      }, { @@ -1196,6 +1201,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          'playlist_count': 4,          'info_dict': {              'title': 'JODA15', +            'id': 'PL6IaIsEjSbf96XFRuNccS_RuEXwNdsoEu',          }      }, {          'note': 'Embedded SWF player', @@ -1203,12 +1209,14 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          'playlist_count': 4,          'info_dict': {              'title': 'JODA7', +            'id': 'YN5VISEtHet5D4NEvfTd0zcgFk84NqFZ',          }      }, {          'note': 'Buggy playlist: the webpage has a "Load more" button but it doesn\'t have more videos',          'url': 'https://www.youtube.com/playlist?list=UUXw-G3eDE9trcvY2sBMM_aA',          'info_dict': { -                'title': 'Uploads from Interstellar Movie', +            'title': 'Uploads from Interstellar Movie', +            'id': 'UUXw-G3eDE9trcvY2sBMM_aA',          },          'playlist_mincout': 21,      }] @@ -1314,6 +1322,9 @@ class YoutubeChannelIE(InfoExtractor):          'note': 'paginated channel',          'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w',          'playlist_mincount': 91, +        'info_dict': { +            'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', +        }      }]      def extract_videos_from_page(self, page): diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index b4617fbad..453e2732c 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -1,59 +1,122 @@  from __future__ import unicode_literals  import json +import operator  import re  from .utils import (      ExtractorError,  ) +_OPERATORS = [ +    ('|', operator.or_), +    ('^', operator.xor), +    ('&', operator.and_), +    ('>>', operator.rshift), +    ('<<', operator.lshift), +    ('-', operator.sub), +    ('+', operator.add), +    ('%', operator.mod), +    ('/', operator.truediv), +    ('*', operator.mul), +] +_ASSIGN_OPERATORS = [(op + '=', opfunc) for op, opfunc in _OPERATORS] +_ASSIGN_OPERATORS.append(('=', lambda cur, right: right)) + +_NAME_RE = r'[a-zA-Z_$][a-zA-Z_$0-9]*' +  class JSInterpreter(object): -    def __init__(self, code): -        self.code = code +    def __init__(self, code, objects=None): +        if objects is None: +            objects = {} +        self.code = self._remove_comments(code)          self._functions = {} -        self._objects = {} +        self._objects = objects + +    def _remove_comments(self, code): +        return re.sub(r'(?s)/\*.*?\*/', '', code) -    def interpret_statement(self, stmt, local_vars, allow_recursion=20): +    def interpret_statement(self, stmt, local_vars, allow_recursion=100):          if allow_recursion < 0:              raise ExtractorError('Recursion limit reached') -        if stmt.startswith('var '): -            stmt = stmt[len('var '):] -        ass_m = re.match(r'^(?P<out>[a-z]+)(?:\[(?P<index>[^\]]+)\])?' + -                         r'=(?P<expr>.*)$', stmt) -        if ass_m: -            if ass_m.groupdict().get('index'): -                def assign(val): -                    lvar = local_vars[ass_m.group('out')] -                    idx = self.interpret_expression( -                        ass_m.group('index'), local_vars, allow_recursion) -                    assert isinstance(idx, int) -                    lvar[idx] = val -                    return val -                expr = ass_m.group('expr') -            else: -                def assign(val): -                    local_vars[ass_m.group('out')] = val -                    return val -                expr = ass_m.group('expr') -        elif stmt.startswith('return '): -            assign = lambda v: v -            expr = stmt[len('return '):] +        should_abort = False +        stmt = stmt.lstrip() +        stmt_m = re.match(r'var\s', stmt) +        if stmt_m: +            expr = stmt[len(stmt_m.group(0)):]          else: -            # Try interpreting it as an expression -            expr = stmt -            assign = lambda v: v +            return_m = re.match(r'return(?:\s+|$)', stmt) +            if return_m: +                expr = stmt[len(return_m.group(0)):] +                should_abort = True +            else: +                # Try interpreting it as an expression +                expr = stmt          v = self.interpret_expression(expr, local_vars, allow_recursion) -        return assign(v) +        return v, should_abort      def interpret_expression(self, expr, local_vars, allow_recursion): +        expr = expr.strip() + +        if expr == '':  # Empty expression +            return None + +        if expr.startswith('('): +            parens_count = 0 +            for m in re.finditer(r'[()]', expr): +                if m.group(0) == '(': +                    parens_count += 1 +                else: +                    parens_count -= 1 +                    if parens_count == 0: +                        sub_expr = expr[1:m.start()] +                        sub_result = self.interpret_expression( +                            sub_expr, local_vars, allow_recursion) +                        remaining_expr = expr[m.end():].strip() +                        if not remaining_expr: +                            return sub_result +                        else: +                            expr = json.dumps(sub_result) + remaining_expr +                        break +            else: +                raise ExtractorError('Premature end of parens in %r' % expr) + +        for op, opfunc in _ASSIGN_OPERATORS: +            m = re.match(r'''(?x) +                (?P<out>%s)(?:\[(?P<index>[^\]]+?)\])? +                \s*%s +                (?P<expr>.*)$''' % (_NAME_RE, re.escape(op)), expr) +            if not m: +                continue +            right_val = self.interpret_expression( +                m.group('expr'), local_vars, allow_recursion - 1) + +            if m.groupdict().get('index'): +                lvar = local_vars[m.group('out')] +                idx = self.interpret_expression( +                    m.group('index'), local_vars, allow_recursion) +                assert isinstance(idx, int) +                cur = lvar[idx] +                val = opfunc(cur, right_val) +                lvar[idx] = val +                return val +            else: +                cur = local_vars.get(m.group('out')) +                val = opfunc(cur, right_val) +                local_vars[m.group('out')] = val +                return val +          if expr.isdigit():              return int(expr) -        if expr.isalpha(): -            return local_vars[expr] +        var_m = re.match( +            r'(?!if|return|true|false)(?P<name>%s)$' % _NAME_RE, +            expr) +        if var_m: +            return local_vars[var_m.group('name')]          try:              return json.loads(expr) @@ -61,7 +124,7 @@ class JSInterpreter(object):              pass          m = re.match( -            r'^(?P<var>[$a-zA-Z0-9_]+)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$', +            r'(?P<var>%s)\.(?P<member>[^(]+)(?:\(+(?P<args>[^()]*)\))?$' % _NAME_RE,              expr)          if m:              variable = m.group('var') @@ -114,23 +177,31 @@ class JSInterpreter(object):              return obj[member](argvals)          m = re.match( -            r'^(?P<in>[a-z]+)\[(?P<idx>.+)\]$', expr) +            r'(?P<in>%s)\[(?P<idx>.+)\]$' % _NAME_RE, expr)          if m:              val = local_vars[m.group('in')]              idx = self.interpret_expression(                  m.group('idx'), local_vars, allow_recursion - 1)              return val[idx] -        m = re.match(r'^(?P<a>.+?)(?P<op>[%])(?P<b>.+?)$', expr) -        if m: -            a = self.interpret_expression( -                m.group('a'), local_vars, allow_recursion) -            b = self.interpret_expression( -                m.group('b'), local_vars, allow_recursion) -            return a % b +        for op, opfunc in _OPERATORS: +            m = re.match(r'(?P<x>.+?)%s(?P<y>.+)' % re.escape(op), expr) +            if not m: +                continue +            x, abort = self.interpret_statement( +                m.group('x'), local_vars, allow_recursion - 1) +            if abort: +                raise ExtractorError( +                    'Premature left-side return of %s in %r' % (op, expr)) +            y, abort = self.interpret_statement( +                m.group('y'), local_vars, allow_recursion - 1) +            if abort: +                raise ExtractorError( +                    'Premature right-side return of %s in %r' % (op, expr)) +            return opfunc(x, y)          m = re.match( -            r'^(?P<func>[a-zA-Z$]+)\((?P<args>[a-z0-9,]+)\)$', expr) +            r'^(?P<func>%s)\((?P<args>[a-zA-Z0-9_$,]+)\)$' % _NAME_RE, expr)          if m:              fname = m.group('func')              argvals = tuple([ @@ -139,6 +210,7 @@ class JSInterpreter(object):              if fname not in self._functions:                  self._functions[fname] = self.extract_function(fname)              return self._functions[fname](argvals) +          raise ExtractorError('Unsupported JS expression %r' % expr)      def extract_object(self, objname): @@ -162,9 +234,11 @@ class JSInterpreter(object):      def extract_function(self, funcname):          func_m = re.search( -            (r'(?:function %s|[{;]%s\s*=\s*function)' % ( -                re.escape(funcname), re.escape(funcname))) + -            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', +            r'''(?x) +                (?:function\s+%s|[{;]%s\s*=\s*function)\s* +                \((?P<args>[^)]*)\)\s* +                \{(?P<code>[^}]+)\}''' % ( +                re.escape(funcname), re.escape(funcname)),              self.code)          if func_m is None:              raise ExtractorError('Could not find JS function %r' % funcname) @@ -172,10 +246,16 @@ class JSInterpreter(object):          return self.build_function(argnames, func_m.group('code')) +    def call_function(self, funcname, *args): +        f = self.extract_function(funcname) +        return f(args) +      def build_function(self, argnames, code):          def resf(args):              local_vars = dict(zip(argnames, args))              for stmt in code.split(';'): -                res = self.interpret_statement(stmt, local_vars) +                res, abort = self.interpret_statement(stmt, local_vars) +                if abort: +                    break              return res          return resf diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index a4c9813ec..4ade0554e 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -32,6 +32,7 @@ import xml.etree.ElementTree  import zlib  from .compat import ( +    compat_basestring,      compat_chr,      compat_getenv,      compat_html_entities, @@ -140,7 +141,7 @@ else:      def find_xpath_attr(node, xpath, key, val):          # Here comes the crazy part: In 2.6, if the xpath is a unicode,          # .//node does not match if a node is a direct child of . ! -        if isinstance(xpath, unicode): +        if isinstance(xpath, compat_str):              xpath = xpath.encode('ascii')          for f in node.findall(xpath): @@ -1262,7 +1263,7 @@ def float_or_none(v, scale=1, invscale=1, default=None):  def parse_duration(s): -    if not isinstance(s, basestring if sys.version_info < (3, 0) else compat_str): +    if not isinstance(s, compat_basestring):          return None      s = s.strip() @@ -1426,7 +1427,7 @@ def uppercase_escape(s):  def escape_rfc3986(s):      """Escape non-ASCII characters as suggested by RFC 3986""" -    if sys.version_info < (3, 0) and isinstance(s, unicode): +    if sys.version_info < (3, 0) and isinstance(s, compat_str):          s = s.encode('utf-8')      return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]") diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 5e6288e8e..1e469d93b 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,3 +1,3 @@  from __future__ import unicode_literals -__version__ = '2015.01.30.2' +__version__ = '2015.02.02' | 
