aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py188
1 files changed, 165 insertions, 23 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 4aa24061c..cddd1a817 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -49,7 +49,10 @@ from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
from .vimeo import VimeoIE
-from .dailymotion import DailymotionCloudIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionCloudIE,
+)
from .onionstudios import OnionStudiosIE
from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
@@ -64,6 +67,9 @@ from .liveleak import LiveLeakIE
from .threeqsdn import ThreeQSDNIE
from .theplatform import ThePlatformIE
from .vessel import VesselIE
+from .kaltura import KalturaIE
+from .eagleplatform import EaglePlatformIE
+from .facebook import FacebookIE
class GenericIE(InfoExtractor):
@@ -920,6 +926,24 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Kaltura'],
},
+ {
+ # Kaltura embedded via quoted entry_id
+ 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
+ 'info_dict': {
+ 'id': '0_utuok90b',
+ 'ext': 'mp4',
+ 'title': '06_matthew_brender_raj_dutt',
+ 'timestamp': 1466638791,
+ 'upload_date': '20160622',
+ },
+ 'add_ie': ['Kaltura'],
+ 'expected_warnings': [
+ 'Could not send HEAD request'
+ ],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
@@ -1091,12 +1115,17 @@ class GenericIE(InfoExtractor):
# Dailymotion Cloud video
{
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
- 'md5': '49444254273501a64675a7e68c502681',
+ 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
'info_dict': {
- 'id': '5585de919473990de4bee11b',
+ 'id': 'x2uy8t3',
'ext': 'mp4',
- 'title': 'Le débat',
+ 'title': 'Sauvons les abeilles ! - Le débat',
+ 'description': 'md5:d9082128b1c5277987825d684939ca26',
'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'timestamp': 1434970506,
+ 'upload_date': '20150622',
+ 'uploader': 'Public Sénat',
+ 'uploader_id': 'xa9gza',
}
},
# OnionStudios embed
@@ -1220,6 +1249,102 @@ class GenericIE(InfoExtractor):
'uploader': 'www.hudl.com',
},
},
+ # twitter:player embed
+ {
+ 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
+ 'md5': 'a3e0df96369831de324f0778e126653c',
+ 'info_dict': {
+ 'id': '4909620399001',
+ 'ext': 'mp4',
+ 'title': 'What Do Black Holes Sound Like?',
+ 'description': 'what do black holes sound like',
+ 'upload_date': '20160524',
+ 'uploader_id': '29913724001',
+ 'timestamp': 1464107587,
+ 'uploader': 'TheAtlantic',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ # Facebook <iframe> embed
+ {
+ 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+ 'md5': 'fbcde74f534176ecb015849146dd3aee',
+ 'info_dict': {
+ 'id': '599637780109885',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #599637780109885',
+ },
+ },
+ # Facebook API embed
+ {
+ 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+ 'md5': 'a47372ee61b39a7b90287094d447d94e',
+ 'info_dict': {
+ 'id': '10153467542406923',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153467542406923',
+ },
+ },
+ # Wordpress "YouTube Video Importer" plugin
+ {
+ 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
+ 'md5': 'd16797741b560b485194eddda8121b48',
+ 'info_dict': {
+ 'id': 'HNTXWDXV9Is',
+ 'ext': 'mp4',
+ 'title': 'Blue Devils Drumline Stanford lot 2016',
+ 'upload_date': '20160627',
+ 'uploader_id': 'GENOCIDE8GENERAL10',
+ 'uploader': 'cylus cyrus',
+ },
+ },
+ {
+ # video stored on custom kaltura server
+ 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+ 'md5': '537617d06e64dfed891fa1593c4b30cc',
+ 'info_dict': {
+ 'id': '0_1iotm5bh',
+ 'ext': 'mp4',
+ 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+ 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+ 'uploader_id': 'videos.expansion@el-mundo.net',
+ 'upload_date': '20150429',
+ 'timestamp': 1430303472,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Non-standard Vimeo embed
+ 'url': 'https://openclassrooms.com/courses/understanding-the-web',
+ 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+ 'info_dict': {
+ 'id': '148867247',
+ 'ext': 'mp4',
+ 'title': 'Understanding the web - Teaser',
+ 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+ 'upload_date': '20151214',
+ 'uploader': 'OpenClassrooms',
+ 'uploader_id': 'openclassrooms',
+ },
+ 'add_ie': ['Vimeo'],
+ },
+ # {
+ # # TODO: find another test
+ # # http://schema.org/VideoObject
+ # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+ # 'md5': '888dcf08b7ea671381f00fab74692755',
+ # 'info_dict': {
+ # 'id': 'nyvTSJMKId',
+ # 'ext': 'mp4',
+ # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+ # 'description': '#love for cats.',
+ # 'timestamp': 1461244995,
+ # 'upload_date': '20160421',
+ # },
+ # 'params': {
+ # 'force_generic_extractor': True,
+ # },
+ # }
]
def report_following_redirect(self, new_url):
@@ -1576,12 +1701,16 @@ class GenericIE(InfoExtractor):
if matches:
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
- # Look for embedded Dailymotion player
- matches = re.findall(
- r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
+ # Look for Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches:
- return _playlist_from_matches(
- matches, lambda m: unescapeHTML(m[1]))
+ return _playlist_from_matches(matches, lambda m: m[-1])
+
+ matches = DailymotionIE._extract_urls(webpage)
+ if matches:
+ return _playlist_from_matches(matches)
# Look for embedded Dailymotion playlist player (#3822)
m = re.search(
@@ -1718,10 +1847,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Facebook')
+ facebook_url = FacebookIE._extract_url(webpage)
+ if facebook_url is not None:
+ return self.url_result(facebook_url, 'Facebook')
# Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
@@ -1903,18 +2031,14 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
- mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?(?P<q1>['\"])wid(?P=q1)\s*:\s*(?P<q2>['\"])_?(?P<partner_id>[^'\"]+)(?P=q2),.*?(?P<q3>['\"])entry_?[Ii]d(?P=q3)\s*:\s*(?P<q4>['\"])(?P<id>[^'\"]+)(?P=q4),", webpage) or
- re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
- if mobj is not None:
- return self.url_result(smuggle_url(
- 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
- {'source_url': url}), 'Kaltura')
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
# Look for Eagle.Platform embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'EaglePlatform')
+ eagleplatform_url = EaglePlatformIE._extract_url(webpage)
+ if eagleplatform_url:
+ return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
# Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search(
@@ -2060,6 +2184,24 @@ class GenericIE(InfoExtractor):
'uploader': video_uploader,
}
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser
+ embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+ if embed_url:
+ return self.url_result(embed_url)
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(
+ webpage, video_id, default=None, expected_type='VideoObject')
+ if json_ld and json_ld.get('url'):
+ info_dict.update({
+ 'title': video_title or info_dict['title'],
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit
+ })
+ info_dict.update(json_ld)
+ return info_dict
+
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True