aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py101
1 files changed, 73 insertions, 28 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 0ab2ef2d6..49b00b87e 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -97,6 +97,8 @@ from .washingtonpost import WashingtonPostIE
from .wistia import WistiaIE
from .mediaset import MediasetIE
from .joj import JojIE
+from .megaphone import MegaphoneIE
+from .vzaar import VzaarIE
class GenericIE(InfoExtractor):
@@ -574,6 +576,19 @@ class GenericIE(InfoExtractor):
},
'skip': 'movie expired',
},
+ # ooyala video embedded with http://player.ooyala.com/static/v4/production/latest/core.min.js
+ {
+ 'url': 'http://wnep.com/2017/07/22/steampunk-fest-comes-to-honesdale/',
+ 'info_dict': {
+ 'id': 'lwYWYxYzE6V5uJMjNGyKtwwiw9ZJD7t2',
+ 'ext': 'mp4',
+ 'title': 'Steampunk Fest Comes to Honesdale',
+ 'duration': 43.276,
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
# embed.ly video
{
'url': 'http://www.tested.com/science/weird/460206-tested-grinding-coffee-2000-frames-second/',
@@ -1504,14 +1519,27 @@ class GenericIE(InfoExtractor):
# LiveLeak embed
{
'url': 'http://www.wykop.pl/link/3088787/',
- 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
+ 'md5': '7619da8c820e835bef21a1efa2a0fc71',
'info_dict': {
'id': '874_1459135191',
'ext': 'mp4',
'title': 'Man shows poor quality of new apartment building',
'description': 'The wall is like a sand pile.',
'uploader': 'Lake8737',
- }
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
+ },
+ # Another LiveLeak embed pattern (#13336)
+ {
+ 'url': 'https://milo.yiannopoulos.net/2017/06/concealed-carry-robbery/',
+ 'info_dict': {
+ 'id': '2eb_1496309988',
+ 'ext': 'mp4',
+ 'title': 'Thief robs place where everyone was armed',
+ 'description': 'md5:694d73ee79e535953cf2488562288eee',
+ 'uploader': 'brazilwtf',
+ },
+ 'add_ie': [LiveLeakIE.ie_key()],
},
# Duplicated embedded video URLs
{
@@ -1569,27 +1597,6 @@ class GenericIE(InfoExtractor):
'skip_download': True,
},
},
- # Nexx iFrame embed
- {
- 'url': 'http://www.spiegel.de/sptv/spiegeltv/spiegel-tv-ueber-schnellste-katapult-achterbahn-der-welt-taron-a-1137884.html',
- 'info_dict': {
- 'id': '161464',
- 'ext': 'mp4',
- 'title': 'Nervenkitzel Achterbahn',
- 'alt_title': 'Karussellbauer in Deutschland',
- 'description': 'md5:ffe7b1cc59a01f585e0569949aef73cc',
- 'release_year': 2005,
- 'creator': 'SPIEGEL TV',
- 'thumbnail': r're:^https?://.*\.jpg$',
- 'duration': 2761,
- 'timestamp': 1394021479,
- 'upload_date': '20140305',
- },
- 'params': {
- 'format': 'bestvideo',
- 'skip_download': True,
- },
- },
# Facebook <iframe> embed
{
'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
@@ -1792,6 +1799,21 @@ class GenericIE(InfoExtractor):
'playlist_mincount': 5,
},
{
+ # Limelight embed (LimelightPlayerUtil.embed)
+ 'url': 'https://tv5.ca/videos?v=xuu8qowr291ri',
+ 'info_dict': {
+ 'id': '95d035dc5c8a401588e9c0e6bd1e9c92',
+ 'ext': 'mp4',
+ 'title': '07448641',
+ 'timestamp': 1499890639,
+ 'upload_date': '20170712',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': ['LimelightMedia'],
+ },
+ {
'url': 'http://kron4.com/2017/04/28/standoff-with-walnut-creek-murder-suspect-ends-with-arrest/',
'info_dict': {
'id': 'standoff-with-walnut-creek-murder-suspect-ends-with-arrest',
@@ -1847,6 +1869,16 @@ class GenericIE(InfoExtractor):
'title': 'Стас Намин: «Мы нарушили девственность Кремля»',
},
},
+ {
+ # vzaar embed
+ 'url': 'http://help.vzaar.com/article/165-embedding-video',
+ 'md5': '7e3919d9d2620b89e3e00bec7fe8c9d4',
+ 'info_dict': {
+ 'id': '8707641',
+ 'ext': 'mp4',
+ 'title': 'Building A Business Online: Principal Chairs Q & A',
+ },
+ },
# {
# # TODO: find another test
# # http://schema.org/VideoObject
@@ -1996,7 +2028,7 @@ class GenericIE(InfoExtractor):
if head_response is not False:
# Check for redirect
- new_url = head_response.geturl()
+ new_url = compat_str(head_response.geturl())
if url != new_url:
self.report_following_redirect(new_url)
if force_videoid:
@@ -2097,7 +2129,7 @@ class GenericIE(InfoExtractor):
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
doc, video_id,
- mpd_base_url=full_response.geturl().rpartition('/')[0],
+ mpd_base_url=compat_str(full_response.geturl()).rpartition('/')[0],
mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
@@ -2313,6 +2345,7 @@ class GenericIE(InfoExtractor):
# Look for Ooyala videos
mobj = (re.search(r'player\.ooyala\.com/[^"?]+[?#][^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
re.search(r'OO\.Player\.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
+ re.search(r'OO\.Player\.create\.apply\(\s*OO\.Player\s*,\s*op\(\s*\[\s*[\'"][^\'"]*[\'"]\s*,\s*[\'"](?P<ec>.{32})[\'"]', webpage) or
re.search(r'SBN\.VideoLinkset\.ooyala\([\'"](?P<ec>.{32})[\'"]\)', webpage) or
re.search(r'data-ooyala-video-id\s*=\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
@@ -2737,9 +2770,9 @@ class GenericIE(InfoExtractor):
self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
# Look for LiveLeak embeds
- liveleak_url = LiveLeakIE._extract_url(webpage)
- if liveleak_url:
- return self.url_result(liveleak_url, 'LiveLeak')
+ liveleak_urls = LiveLeakIE._extract_urls(webpage)
+ if liveleak_urls:
+ return self.playlist_from_matches(liveleak_urls, video_id, video_title)
# Look for 3Q SDN embeds
threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
@@ -2811,6 +2844,18 @@ class GenericIE(InfoExtractor):
return self.playlist_from_matches(
joj_urls, video_id, video_title, ie=JojIE.ie_key())
+ # Look for megaphone.fm embeds
+ mpfn_urls = MegaphoneIE._extract_urls(webpage)
+ if mpfn_urls:
+ return self.playlist_from_matches(
+ mpfn_urls, video_id, video_title, ie=MegaphoneIE.ie_key())
+
+ # Look for vzaar embeds
+ vzaar_urls = VzaarIE._extract_urls(webpage)
+ if vzaar_urls:
+ return self.playlist_from_matches(
+ vzaar_urls, video_id, video_title, ie=VzaarIE.ie_key())
+
def merge_dicts(dict1, dict2):
merged = {}
for k, v in dict1.items():