diff options
| -rw-r--r-- | test/test_youtube_signature.py | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 1 | ||||
| -rw-r--r-- | youtube_dl/extractor/mlb.py | 102 | ||||
| -rw-r--r-- | youtube_dl/jsinterp.py | 40 | ||||
| -rw-r--r-- | youtube_dl/version.py | 2 | 
5 files changed, 147 insertions, 4 deletions
diff --git a/test/test_youtube_signature.py b/test/test_youtube_signature.py index 8d46fe108..d95533959 100644 --- a/test/test_youtube_signature.py +++ b/test/test_youtube_signature.py @@ -34,6 +34,12 @@ _TESTS = [          u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',      ),      ( +        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', +        u'js', +        84, +        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', +    ), +    (          u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',          u'js',          u'2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 78b95c2a5..ca372496a 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -171,6 +171,7 @@ from .metacafe import MetacafeIE  from .metacritic import MetacriticIE  from .mit import TechTVMITIE, MITIE, OCWMITIE  from .mixcloud import MixcloudIE +from .mlb import MLBIE  from .mpora import MporaIE  from .mofosex import MofosexIE  from .mooshare import MooshareIE diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py new file mode 100644 index 000000000..18ab2c135 --- /dev/null +++ b/youtube_dl/extractor/mlb.py @@ -0,0 +1,102 @@ +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( +    parse_duration, +    parse_iso8601, +    find_xpath_attr, +) + + +class MLBIE(InfoExtractor): +    _VALID_URL = r'http?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)' +    _TESTS = [ +        { +            'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby', +            'md5': 'd9c022c10d21f849f49c05ae12a8a7e9', +            'info_dict': { +                'id': '34496663', +                'ext': 'mp4', +                'title': 'Stanton prepares for Derby', +                'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57', +                'duration': 46, +                'timestamp': 1405105800, +                'upload_date': '20140711', +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +        { +            'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby', +            'md5': '0e6e73d509321e142409b695eadd541f', +            'info_dict': { +                'id': '34578115', +                'ext': 'mp4', +                'title': 'Cespedes repeats as Derby champ', +                'description': 'md5:08df253ce265d4cf6fb09f581fafad07', +                'duration': 488, +                'timestamp': 1405399936, +                'upload_date': '20140715', +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +        { +            'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance', +            'md5': 'b8fd237347b844365d74ea61d4245967', +            'info_dict': { +                'id': '34577915', +                'ext': 'mp4', +                'title': 'Bautista on Home Run Derby', +                'description': 'md5:b80b34031143d0986dddc64a8839f0fb', +                'duration': 52, +                'timestamp': 1405390722, +                'upload_date': '20140715', +                'thumbnail': 're:^https?://.*\.jpg$', +            }, +        }, +    ] + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        video_id = mobj.group('id') + +        detail = self._download_xml( +            'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml' +            % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id) + +        title = detail.find('./headline').text +        description = detail.find('./big-blurb').text +        duration = parse_duration(detail.find('./duration').text) +        timestamp = parse_iso8601(detail.attrib['date'][:-5]) + +        thumbnail = find_xpath_attr( +            detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text + +        formats = [] +        for media_url in detail.findall('./url'): +            playback_scenario = media_url.attrib['playback_scenario'] +            fmt = { +                'url': media_url.text, +                'format_id': playback_scenario, +            } +            m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario) +            if m: +                fmt.update({ +                    'vbr': int(m.group('vbr')) * 1000, +                    'width': int(m.group('width')), +                    'height': int(m.group('height')), +                }) +            formats.append(fmt) + +        self._sort_formats(formats) + +        return { +            'id': video_id, +            'title': title, +            'description': description, +            'duration': duration, +            'timestamp': timestamp, +            'formats': formats, +            'thumbnail': thumbnail, +        } diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py index 3bbb07704..ae5bca2e6 100644 --- a/youtube_dl/jsinterp.py +++ b/youtube_dl/jsinterp.py @@ -11,6 +11,7 @@ class JSInterpreter(object):      def __init__(self, code):          self.code = code          self._functions = {} +        self._objects = {}      def interpret_statement(self, stmt, local_vars, allow_recursion=20):          if allow_recursion < 0: @@ -55,7 +56,19 @@ class JSInterpreter(object):          m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)          if m:              member = m.group('member') -            val = local_vars[m.group('in')] +            variable = m.group('in') + +            if variable not in local_vars: +                if variable not in self._objects: +                    self._objects[variable] = self.extract_object(variable) +                obj = self._objects[variable] +                key, args = member.split('(', 1) +                args = args.strip(')') +                argvals = [int(v) if v.isdigit() else local_vars[v] +                           for v in args.split(',')] +                return obj[key](argvals) + +            val = local_vars[variable]              if member == 'split("")':                  return list(val)              if member == 'join("")': @@ -97,6 +110,25 @@ class JSInterpreter(object):              return self._functions[fname](argvals)          raise ExtractorError('Unsupported JS expression %r' % expr) +    def extract_object(self, objname): +        obj = {} +        obj_m = re.search( +            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) + +            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' + +            r'\}\s*;', +            self.code) +        fields = obj_m.group('fields') +        # Currently, it only supports function definitions +        fields_m = re.finditer( +            r'(?P<key>[a-zA-Z$]+)\s*:\s*function' +            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}', +            fields) +        for f in fields_m: +            argnames = f.group('args').split(',') +            obj[f.group('key')] = self.build_function(argnames, f.group('code')) + +        return obj +      def extract_function(self, funcname):          func_m = re.search(              (r'(?:function %s|[{;]%s\s*=\s*function)' % ( @@ -107,10 +139,12 @@ class JSInterpreter(object):              raise ExtractorError('Could not find JS function %r' % funcname)          argnames = func_m.group('args').split(',') +        return self.build_function(argnames, func_m.group('code')) + +    def build_function(self, argnames, code):          def resf(args):              local_vars = dict(zip(argnames, args)) -            for stmt in func_m.group('code').split(';'): +            for stmt in code.split(';'):                  res = self.interpret_statement(stmt, local_vars)              return res          return resf - diff --git a/youtube_dl/version.py b/youtube_dl/version.py index 2c9591630..4d606c3d2 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2014.07.11.3' +__version__ = '2014.07.15'  | 
