aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py54
1 files changed, 23 insertions, 31 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 367f930dd..c16da70f1 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -155,7 +155,6 @@ class GenericIE(InfoExtractor):
# funnyordie embed
{
'url': 'http://www.theguardian.com/world/2014/mar/11/obama-zach-galifianakis-between-two-ferns',
- 'md5': '7cf780be104d40fea7bae52eed4a470e',
'info_dict': {
'id': '18e820ec3f',
'ext': 'mp4',
@@ -180,13 +179,13 @@ class GenericIE(InfoExtractor):
# Embedded TED video
{
'url': 'http://en.support.wordpress.com/videos/ted-talks/',
- 'md5': 'deeeabcc1085eb2ba205474e7235a3d5',
+ 'md5': '65fdff94098e4a607385a60c5177c638',
'info_dict': {
- 'id': '981',
+ 'id': '1969',
'ext': 'mp4',
- 'title': 'My web playroom',
- 'uploader': 'Ze Frank',
- 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
+ 'title': 'Hidden miracles of the natural world',
+ 'uploader': 'Louie Schwartzberg',
+ 'description': 'md5:8145d19d320ff3e52f28401f4c4283b9',
}
},
# Embeded Ustream video
@@ -226,21 +225,6 @@ class GenericIE(InfoExtractor):
'skip_download': 'Requires rtmpdump'
}
},
- # smotri embed
- {
- 'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
- 'md5': 'ec40048448e9284c9a1de77bb188108b',
- 'info_dict': {
- 'id': 'v27008541fad',
- 'ext': 'mp4',
- 'title': 'Крым и Севастополь вошли в состав России',
- 'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
- 'duration': 900,
- 'upload_date': '20140318',
- 'uploader': 'rbctv_2012_4',
- 'uploader_id': 'rbctv_2012_4',
- },
- },
# Condé Nast embed
{
'url': 'http://www.wired.com/2014/04/honda-asimo/',
@@ -295,13 +279,13 @@ class GenericIE(InfoExtractor):
{
'url': 'https://play.google.com/store/apps/details?id=com.gameloft.android.ANMP.GloftA8HM',
'info_dict': {
- 'id': 'jpSGZsgga_I',
+ 'id': '4vAffPZIT44',
'ext': 'mp4',
- 'title': 'Asphalt 8: Airborne - Launch Trailer',
+ 'title': 'Asphalt 8: Airborne - Update - Welcome to Dubai!',
'uploader': 'Gameloft',
'uploader_id': 'gameloft',
- 'upload_date': '20130821',
- 'description': 'md5:87bd95f13d8be3e7da87a5f2c443106a',
+ 'upload_date': '20140828',
+ 'description': 'md5:c80da9ed3d83ae6d1876c834de03e1c4',
},
'params': {
'skip_download': True,
@@ -397,12 +381,6 @@ class GenericIE(InfoExtractor):
},
]
- def report_download_webpage(self, video_id):
- """Report webpage download."""
- if not self._downloader.params.get('test', False):
- self._downloader.report_warning('Falling back on generic information extractor.')
- super(GenericIE, self).report_download_webpage(video_id)
-
def report_following_redirect(self, new_url):
"""Report information extraction."""
self._downloader.to_screen('[redirect] Following redirect to %s' % new_url)
@@ -502,6 +480,7 @@ class GenericIE(InfoExtractor):
url, smuggled_data = unsmuggle_url(url)
force_videoid = None
+ is_intentional = smuggled_data and smuggled_data.get('to_generic')
if smuggled_data and 'force_videoid' in smuggled_data:
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
@@ -544,6 +523,9 @@ class GenericIE(InfoExtractor):
'upload_date': upload_date,
}
+ if not self._downloader.params.get('test', False) and not is_intentional:
+ self._downloader.report_warning('Falling back on generic information extractor.')
+
try:
webpage = self._download_webpage(url, video_id)
except ValueError:
@@ -657,6 +639,16 @@ class GenericIE(InfoExtractor):
return _playlist_from_matches(
matches, lambda m: unescapeHTML(m[1]))
+ # Look for embedded Dailymotion playlist player (#3822)
+ m = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.[a-z]{2,3}/widget/jukebox\?.+?)\1', webpage)
+ if m:
+ playlists = re.findall(
+ r'list\[\]=/playlist/([^/]+)/', unescapeHTML(m.group('url')))
+ if playlists:
+ return _playlist_from_matches(
+ playlists, lambda p: '//dailymotion.com/playlist/%s' % p)
+
# Look for embedded Wistia player
match = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:fast\.)?wistia\.net/embed/iframe/.+?)\1', webpage)