diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r-- | youtube_dl/extractor/generic.py | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 4e6927b08..8716e4503 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1006,6 +1006,13 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url')) + # Look for NYTimes player + mobj = re.search( + r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//graphics8\.nytimes\.com/bcvideo/[^/]+/iframe/embed\.html.+?)\1>', + webpage) + if mobj is not None: + return self.url_result(mobj.group('url')) + # Look for Ooyala videos mobj = (re.search(r'player\.ooyala\.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or @@ -1268,10 +1275,16 @@ class GenericIE(InfoExtractor): # HTML5 video found = re.findall(r'(?s)<video[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage) if not found: + REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)' found = re.search( r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")' - r'(?:[a-z-]+="[^"]+"\s+)*?content="[0-9]{,2};url=\'?([^\'"]+)', + r'(?:[a-z-]+="[^"]+"\s+)*?content="%s' % REDIRECT_REGEX, webpage) + if not found: + # Look also in Refresh HTTP header + refresh_header = head_response.headers.get('Refresh') + if refresh_header: + found = re.search(REDIRECT_REGEX, refresh_header) if found: new_url = found.group(1) self.report_following_redirect(new_url) |