aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py45
1 files changed, 41 insertions, 4 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 3a908d01f..2bfa20606 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -12,6 +12,7 @@ from ..utils import (
compat_urlparse,
compat_xml_parse_error,
+ determine_ext,
ExtractorError,
float_or_none,
HEADRequest,
@@ -351,7 +352,36 @@ class GenericIE(InfoExtractor):
'description': 're:'
},
'playlist_mincount': 11,
- }
+ },
+ # Multiple brightcove videos
+ # https://github.com/rg3/youtube-dl/issues/2283
+ {
+ 'url': 'http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html',
+ 'info_dict': {
+ 'id': 'always-never',
+ 'title': 'Always / Never - The New Yorker',
+ },
+ 'playlist_count': 3,
+ 'params': {
+ 'extract_flat': False,
+ 'skip_download': True,
+ }
+ },
+ # MLB embed
+ {
+ 'url': 'http://umpire-empire.com/index.php/topic/58125-laz-decides-no-thats-low/',
+ 'md5': '96f09a37e44da40dd083e12d9a683327',
+ 'info_dict': {
+ 'id': '33322633',
+ 'ext': 'mp4',
+ 'title': 'Ump changes call to ball',
+ 'description': 'md5:71c11215384298a172a6dcb4c2e20685',
+ 'duration': 48,
+ 'timestamp': 1401537900,
+ 'upload_date': '20140531',
+ 'thumbnail': 're:^https?://.*\.jpg$',
+ },
+ },
]
def report_download_webpage(self, video_id):
@@ -598,7 +628,7 @@ class GenericIE(InfoExtractor):
embedSWF\(?:\s*
)
(["\'])
- (?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+ (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
(?:embed|v)/.+?)
\1''', webpage)
if matches:
@@ -794,6 +824,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'SBS')
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>https?://m\.mlb\.com/shared/video/embed/embed\.html\?.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'MLB')
+
# Start with something easy: JW Player in SWFObject
found = re.findall(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if not found:
@@ -830,13 +866,14 @@ class GenericIE(InfoExtractor):
if m_video_type is not None:
def check_video(vurl):
vpath = compat_urlparse.urlparse(vurl).path
- return '.' in vpath and not vpath.endswith('.swf')
+ vext = determine_ext(vpath)
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg')
found = list(filter(
check_video,
re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)))
if not found:
# HTML5 video
- found = re.findall(r'(?s)<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage)
+ found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]+)? src="([^"]+)"', webpage)
if not found:
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'