aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl')
-rw-r--r--youtube_dl/extractor/__init__.py1
-rw-r--r--youtube_dl/extractor/mlb.py102
-rw-r--r--youtube_dl/jsinterp.py40
-rw-r--r--youtube_dl/version.py2
4 files changed, 141 insertions, 4 deletions
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index 78b95c2a5..ca372496a 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -171,6 +171,7 @@ from .metacafe import MetacafeIE
from .metacritic import MetacriticIE
from .mit import TechTVMITIE, MITIE, OCWMITIE
from .mixcloud import MixcloudIE
+from .mlb import MLBIE
from .mpora import MporaIE
from .mofosex import MofosexIE
from .mooshare import MooshareIE
diff --git a/youtube_dl/extractor/mlb.py b/youtube_dl/extractor/mlb.py
new file mode 100644
index 000000000..18ab2c135
--- /dev/null
+++ b/youtube_dl/extractor/mlb.py
@@ -0,0 +1,102 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ parse_duration,
+ parse_iso8601,
+ find_xpath_attr,
+)
+
+
+class MLBIE(InfoExtractor):
+ _VALID_URL = r'http?://m\.mlb\.com/video/(?:topic/[\da-z_-]+/)?v(?P<id>n?\d+)'
+ _TESTS = [
+ {
+ 'url': 'http://m.mlb.com/video/topic/81536970/v34496663/mianym-stanton-practices-for-the-home-run-derby',
+ 'md5': 'd9c022c10d21f849f49c05ae12a8a7e9',
+ 'info_dict': {
+ 'id': '34496663',
+ 'ext': 'mp4',
+ 'title': 'Stanton prepares for Derby',
+ 'description': 'md5:d00ce1e5fd9c9069e9c13ab4faedfa57',
+ 'duration': 46,
+ 'timestamp': 1405105800,
+ 'upload_date': '20140711',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://m.mlb.com/video/topic/vtp_hrd_sponsor/v34578115/hrd-cespedes-wins-2014-gillette-home-run-derby',
+ 'md5': '0e6e73d509321e142409b695eadd541f',
+ 'info_dict': {
+ 'id': '34578115',
+ 'ext': 'mp4',
+ 'title': 'Cespedes repeats as Derby champ',
+ 'description': 'md5:08df253ce265d4cf6fb09f581fafad07',
+ 'duration': 488,
+ 'timestamp': 1405399936,
+ 'upload_date': '20140715',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ {
+ 'url': 'http://m.mlb.com/video/v34577915/bautista-on-derby-captaining-duties-his-performance',
+ 'md5': 'b8fd237347b844365d74ea61d4245967',
+ 'info_dict': {
+ 'id': '34577915',
+ 'ext': 'mp4',
+ 'title': 'Bautista on Home Run Derby',
+ 'description': 'md5:b80b34031143d0986dddc64a8839f0fb',
+ 'duration': 52,
+ 'timestamp': 1405390722,
+ 'upload_date': '20140715',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
+ ]
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ video_id = mobj.group('id')
+
+ detail = self._download_xml(
+ 'http://m.mlb.com/gen/multimedia/detail/%s/%s/%s/%s.xml'
+ % (video_id[-3], video_id[-2], video_id[-1], video_id), video_id)
+
+ title = detail.find('./headline').text
+ description = detail.find('./big-blurb').text
+ duration = parse_duration(detail.find('./duration').text)
+ timestamp = parse_iso8601(detail.attrib['date'][:-5])
+
+ thumbnail = find_xpath_attr(
+ detail, './thumbnailScenarios/thumbnailScenario', 'type', '45').text
+
+ formats = []
+ for media_url in detail.findall('./url'):
+ playback_scenario = media_url.attrib['playback_scenario']
+ fmt = {
+ 'url': media_url.text,
+ 'format_id': playback_scenario,
+ }
+ m = re.search(r'(?P<vbr>\d+)K_(?P<width>\d+)X(?P<height>\d+)', playback_scenario)
+ if m:
+ fmt.update({
+ 'vbr': int(m.group('vbr')) * 1000,
+ 'width': int(m.group('width')),
+ 'height': int(m.group('height')),
+ })
+ formats.append(fmt)
+
+ self._sort_formats(formats)
+
+ return {
+ 'id': video_id,
+ 'title': title,
+ 'description': description,
+ 'duration': duration,
+ 'timestamp': timestamp,
+ 'formats': formats,
+ 'thumbnail': thumbnail,
+ }
diff --git a/youtube_dl/jsinterp.py b/youtube_dl/jsinterp.py
index 3bbb07704..ae5bca2e6 100644
--- a/youtube_dl/jsinterp.py
+++ b/youtube_dl/jsinterp.py
@@ -11,6 +11,7 @@ class JSInterpreter(object):
def __init__(self, code):
self.code = code
self._functions = {}
+ self._objects = {}
def interpret_statement(self, stmt, local_vars, allow_recursion=20):
if allow_recursion < 0:
@@ -55,7 +56,19 @@ class JSInterpreter(object):
m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
if m:
member = m.group('member')
- val = local_vars[m.group('in')]
+ variable = m.group('in')
+
+ if variable not in local_vars:
+ if variable not in self._objects:
+ self._objects[variable] = self.extract_object(variable)
+ obj = self._objects[variable]
+ key, args = member.split('(', 1)
+ args = args.strip(')')
+ argvals = [int(v) if v.isdigit() else local_vars[v]
+ for v in args.split(',')]
+ return obj[key](argvals)
+
+ val = local_vars[variable]
if member == 'split("")':
return list(val)
if member == 'join("")':
@@ -97,6 +110,25 @@ class JSInterpreter(object):
return self._functions[fname](argvals)
raise ExtractorError('Unsupported JS expression %r' % expr)
+ def extract_object(self, objname):
+ obj = {}
+ obj_m = re.search(
+ (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+ r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+ r'\}\s*;',
+ self.code)
+ fields = obj_m.group('fields')
+ # Currently, it only supports function definitions
+ fields_m = re.finditer(
+ r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+ r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+ fields)
+ for f in fields_m:
+ argnames = f.group('args').split(',')
+ obj[f.group('key')] = self.build_function(argnames, f.group('code'))
+
+ return obj
+
def extract_function(self, funcname):
func_m = re.search(
(r'(?:function %s|[{;]%s\s*=\s*function)' % (
@@ -107,10 +139,12 @@ class JSInterpreter(object):
raise ExtractorError('Could not find JS function %r' % funcname)
argnames = func_m.group('args').split(',')
+ return self.build_function(argnames, func_m.group('code'))
+
+ def build_function(self, argnames, code):
def resf(args):
local_vars = dict(zip(argnames, args))
- for stmt in func_m.group('code').split(';'):
+ for stmt in code.split(';'):
res = self.interpret_statement(stmt, local_vars)
return res
return resf
-
diff --git a/youtube_dl/version.py b/youtube_dl/version.py
index 2c9591630..4d606c3d2 100644
--- a/youtube_dl/version.py
+++ b/youtube_dl/version.py
@@ -1,2 +1,2 @@
-__version__ = '2014.07.11.3'
+__version__ = '2014.07.15'