aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py706
1 files changed, 565 insertions, 141 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index f3de738f7..bde65fa27 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -1,4 +1,4 @@
-# encoding: utf-8
+# coding: utf-8
from __future__ import unicode_literals
@@ -27,7 +27,6 @@ from ..utils import (
unified_strdate,
unsmuggle_url,
UnsupportedError,
- url_basename,
xpath_text,
)
from .brightcove import (
@@ -48,10 +47,15 @@ from .svt import SVTIE
from .pornhub import PornHubIE
from .xhamster import XHamsterEmbedIE
from .tnaflix import TNAFlixNetworkEmbedIE
+from .drtuber import DrTuberIE
+from .redtube import RedTubeIE
from .vimeo import VimeoIE
-from .dailymotion import DailymotionCloudIE
+from .dailymotion import (
+ DailymotionIE,
+ DailymotionCloudIE,
+)
from .onionstudios import OnionStudiosIE
-from .snagfilms import SnagFilmsEmbedIE
+from .viewlift import ViewLiftEmbedIE
from .screenwavemedia import ScreenwaveMediaIE
from .mtv import MTVServicesEmbeddedIE
from .pladform import PladformIE
@@ -59,7 +63,18 @@ from .videomore import VideomoreIE
from .googledrive import GoogleDriveIE
from .jwplatform import JWPlatformIE
from .digiteka import DigitekaIE
+from .arkena import ArkenaIE
from .instagram import InstagramIE
+from .liveleak import LiveLeakIE
+from .threeqsdn import ThreeQSDNIE
+from .theplatform import ThePlatformIE
+from .vessel import VesselIE
+from .kaltura import KalturaIE
+from .eagleplatform import EaglePlatformIE
+from .facebook import FacebookIE
+from .soundcloud import SoundcloudIE
+from .vbox7 import Vbox7IE
+from .dbtv import DBTVIE
class GenericIE(InfoExtractor):
@@ -90,7 +105,8 @@ class GenericIE(InfoExtractor):
},
'expected_warnings': [
'URL could be a direct video link, returning it as such.'
- ]
+ ],
+ 'skip': 'URL invalid',
},
# Direct download with broken HEAD
{
@@ -104,7 +120,8 @@ class GenericIE(InfoExtractor):
'skip_download': True, # infinite live stream
},
'expected_warnings': [
- r'501.*Not Implemented'
+ r'501.*Not Implemented',
+ r'400.*Bad Request',
],
},
# Direct link with incorrect MIME type
@@ -235,6 +252,7 @@ class GenericIE(InfoExtractor):
'ext': 'mp4',
'title': 'car-20120827-manifest',
'formats': 'mincount:9',
+ 'upload_date': '20130904',
},
'params': {
'format': 'bestvideo',
@@ -252,7 +270,8 @@ class GenericIE(InfoExtractor):
'params': {
# m3u8 downloads
'skip_download': True,
- }
+ },
+ 'skip': 'video gone',
},
# m3u8 served with Content-Type: text/plain
{
@@ -267,7 +286,8 @@ class GenericIE(InfoExtractor):
'params': {
# m3u8 downloads
'skip_download': True,
- }
+ },
+ 'skip': 'video gone',
},
# google redirect
{
@@ -352,6 +372,7 @@ class GenericIE(InfoExtractor):
'description': 'Mississauga resident David Farmer is still out of power as a result of the ice storm a month ago. To keep the house warm, Farmer cuts wood from his property for a wood burning stove downstairs.',
},
'add_ie': ['BrightcoveLegacy'],
+ 'skip': 'video gone',
},
{
'url': 'http://www.championat.com/video/football/v/87/87499.html',
@@ -405,19 +426,7 @@ class GenericIE(InfoExtractor):
'params': {
'skip_download': True,
},
- },
- # multiple ooyala embeds on SBN network websites
- {
- 'url': 'http://www.sbnation.com/college-football-recruiting/2015/2/3/7970291/national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
- 'info_dict': {
- 'id': 'national-signing-day-rationalizations-itll-be-ok-itll-be-ok',
- 'title': '25 lies you will tell yourself on National Signing Day - SBNation.com',
- },
- 'playlist_mincount': 3,
- 'params': {
- 'skip_download': True,
- },
- 'add_ie': ['Ooyala'],
+ 'skip': 'movie expired',
},
# embed.ly video
{
@@ -445,6 +454,8 @@ class GenericIE(InfoExtractor):
'title': 'Between Two Ferns with Zach Galifianakis: President Barack Obama',
'description': 'Episode 18: President Barack Obama sits down with Zach Galifianakis for his most memorable interview yet.',
},
+ # HEAD requests lead to endless 301, while GET is OK
+ 'expected_warnings': ['301'],
},
# RUTV embed
{
@@ -474,7 +485,7 @@ class GenericIE(InfoExtractor):
'url': 'http://www.vestifinance.ru/articles/25753',
'info_dict': {
'id': '25753',
- 'title': 'Вести Экономика ― Прямые трансляции с Форума-выставки "Госзаказ-2013"',
+ 'title': 'Прямые трансляции с Форума-выставки "Госзаказ-2013"',
},
'playlist': [{
'info_dict': {
@@ -519,6 +530,9 @@ class GenericIE(InfoExtractor):
'title': '[NSFL] [FM15] which pumiscer was this ( vid ) ( alfa as fuck srx )',
},
'playlist_mincount': 7,
+ # This forum does not allow <iframe> syntaxes anymore
+ # Now HTML tags are displayed as-is
+ 'skip': 'No videos on this page',
},
# Embedded TED video
{
@@ -567,7 +581,8 @@ class GenericIE(InfoExtractor):
},
'params': {
'skip_download': 'Requires rtmpdump'
- }
+ },
+ 'skip': 'video gone',
},
# francetv embed
{
@@ -607,7 +622,11 @@ class GenericIE(InfoExtractor):
'id': 'k2mm4bCdJ6CQ2i7c8o2',
'ext': 'mp4',
'title': 'Le Zap de Spi0n n°216 - Zapping du Web',
+ 'description': 'md5:faf028e48a461b8b7fad38f1e104b119',
'uploader': 'Spi0n',
+ 'uploader_id': 'xgditw',
+ 'upload_date': '20140425',
+ 'timestamp': 1398441542,
},
'add_ie': ['Dailymotion'],
},
@@ -630,13 +649,15 @@ class GenericIE(InfoExtractor):
},
# MTVSercices embed
{
- 'url': 'http://www.gametrailers.com/news-post/76093/north-america-europe-is-getting-that-mario-kart-8-mercedes-dlc-too',
- 'md5': '35727f82f58c76d996fc188f9755b0d5',
+ 'url': 'http://www.vulture.com/2016/06/new-key-peele-sketches-released.html',
+ 'md5': 'ca1aef97695ef2c1d6973256a57e5252',
'info_dict': {
- 'id': '0306a69b-8adf-4fb5-aace-75f8e8cbfca9',
+ 'id': '769f7ec0-0692-4d62-9b45-0d88074bffc1',
'ext': 'mp4',
- 'title': 'Review',
- 'description': 'Mario\'s life in the fast lane has never looked so good.',
+ 'title': 'Key and Peele|October 10, 2012|2|203|Liam Neesons - Uncensored',
+ 'description': 'Two valets share their love for movie star Liam Neesons.',
+ 'timestamp': 1349922600,
+ 'upload_date': '20121011',
},
},
# YouTube embed via <data-embed-url="">
@@ -722,15 +743,18 @@ class GenericIE(InfoExtractor):
},
# Wistia embed
{
- 'url': 'http://education-portal.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
- 'md5': '8788b683c777a5cf25621eaf286d0c23',
+ 'url': 'http://study.com/academy/lesson/north-american-exploration-failed-colonies-of-spain-france-england.html#lesson',
+ 'md5': '1953f3a698ab51cfc948ed3992a0b7ff',
'info_dict': {
- 'id': '1cfaf6b7ea',
+ 'id': '6e2wtrbdaf',
'ext': 'mov',
- 'title': 'md5:51364a8d3d009997ba99656004b5e20d',
- 'duration': 643.0,
- 'filesize': 182808282,
- 'uploader': 'education-portal.com',
+ 'title': 'paywall_north-american-exploration-failed-colonies-of-spain-france-england',
+ 'description': 'a Paywall Videos video from Remilon',
+ 'duration': 644.072,
+ 'uploader': 'study.com',
+ 'timestamp': 1459678540,
+ 'upload_date': '20160403',
+ 'filesize': 24687186,
},
},
{
@@ -739,11 +763,30 @@ class GenericIE(InfoExtractor):
'info_dict': {
'id': 'uxjb0lwrcz',
'ext': 'mp4',
- 'title': 'Conversation about Hexagonal Rails Part 1 - ThoughtWorks',
+ 'title': 'Conversation about Hexagonal Rails Part 1',
+ 'description': 'a Martin Fowler video from ThoughtWorks',
'duration': 1715.0,
'uploader': 'thoughtworks.wistia.com',
+ 'timestamp': 1401832161,
+ 'upload_date': '20140603',
},
},
+ # Wistia standard embed (async)
+ {
+ 'url': 'https://www.getdrip.com/university/brennan-dunn-drip-workshop/',
+ 'info_dict': {
+ 'id': '807fafadvk',
+ 'ext': 'mp4',
+ 'title': 'Drip Brennan Dunn Workshop',
+ 'description': 'a JV Webinars video from getdrip-1',
+ 'duration': 4986.95,
+ 'timestamp': 1463607249,
+ 'upload_date': '20160518',
+ },
+ 'params': {
+ 'skip_download': True,
+ }
+ },
# Soundcloud embed
{
'url': 'http://nakedsecurity.sophos.com/2014/10/29/sscc-171-are-you-sure-that-1234-is-a-bad-password-podcast/',
@@ -756,6 +799,15 @@ class GenericIE(InfoExtractor):
'upload_date': '20141029',
}
},
+ # Soundcloud multiple embeds
+ {
+ 'url': 'http://www.guitarplayer.com/lessons/1014/legato-workout-one-hour-to-more-fluid-performance---tab/52809',
+ 'info_dict': {
+ 'id': '52809',
+ 'title': 'Guitar Essentials: Legato Workout—One-Hour to Fluid Performance | TAB + AUDIO',
+ },
+ 'playlist_mincount': 7,
+ },
# Livestream embed
{
'url': 'http://www.esa.int/Our_Activities/Space_Science/Rosetta/Philae_comet_touch-down_webcast',
@@ -766,6 +818,19 @@ class GenericIE(InfoExtractor):
'title': 'Rosetta #CometLanding webcast HL 10',
}
},
+ # Another Livestream embed, without 'new.' in URL
+ {
+ 'url': 'https://www.freespeech.org/',
+ 'info_dict': {
+ 'id': '123537347',
+ 'ext': 'mp4',
+ 'title': 're:^FSTV [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
+ },
+ 'params': {
+ # Live stream
+ 'skip_download': True,
+ },
+ },
# LazyYT
{
'url': 'http://discourse.ubuntu.com/t/unity-8-desktop-mode-windows-on-mir/1986',
@@ -818,6 +883,7 @@ class GenericIE(InfoExtractor):
'description': 'md5:601cb790edd05908957dae8aaa866465',
'upload_date': '20150220',
},
+ 'skip': 'All The Daily Show URLs now redirect to http://www.cc.com/shows/',
},
# jwplayer YouTube
{
@@ -850,18 +916,6 @@ class GenericIE(InfoExtractor):
'title': 'EP3S5 - Bon Appétit - Baqueira Mi Corazon !',
}
},
- # Kaltura embed
- {
- 'url': 'http://www.monumentalnetwork.com/videos/john-carlson-postgame-2-25-15',
- 'info_dict': {
- 'id': '1_eergr3h1',
- 'ext': 'mp4',
- 'upload_date': '20150226',
- 'uploader_id': 'MonumentalSports-Kaltura@perfectsensedigital.com',
- 'timestamp': int,
- 'title': 'John Carlson Postgame 2/25/15',
- },
- },
# Kaltura embed (different embed code)
{
'url': 'http://www.premierchristianradio.com/Shows/Saturday/Unbelievable/Conference-Videos/Os-Guinness-Is-It-Fools-Talk-Unbelievable-Conference-2014',
@@ -887,9 +941,41 @@ class GenericIE(InfoExtractor):
'uploader_id': 'echojecka',
},
},
+ # Kaltura embed with single quotes
+ {
+ 'url': 'http://fod.infobase.com/p_ViewPlaylist.aspx?AssignmentID=NUN8ZY',
+ 'info_dict': {
+ 'id': '0_izeg5utt',
+ 'ext': 'mp4',
+ 'title': '35871',
+ 'timestamp': 1355743100,
+ 'upload_date': '20121217',
+ 'uploader_id': 'batchUser',
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Kaltura embedded via quoted entry_id
+ 'url': 'https://www.oreilly.com/ideas/my-cloud-makes-pretty-pictures',
+ 'info_dict': {
+ 'id': '0_utuok90b',
+ 'ext': 'mp4',
+ 'title': '06_matthew_brender_raj_dutt',
+ 'timestamp': 1466638791,
+ 'upload_date': '20160622',
+ },
+ 'add_ie': ['Kaltura'],
+ 'expected_warnings': [
+ 'Could not send HEAD request'
+ ],
+ 'params': {
+ 'skip_download': True,
+ }
+ },
# Eagle.Platform embed (generic URL)
{
'url': 'http://lenta.ru/news/2015/03/06/navalny/',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '227304',
'ext': 'mp4',
@@ -904,6 +990,7 @@ class GenericIE(InfoExtractor):
# ClipYou (Eagle.Platform) embed (custom URL)
{
'url': 'http://muz-tv.ru/play/7129/',
+ # Not checking MD5 as sometimes the direct HTTP link results in 404 and HLS is used
'info_dict': {
'id': '12820',
'ext': 'mp4',
@@ -992,18 +1079,36 @@ class GenericIE(InfoExtractor):
'ext': 'flv',
'title': "PFT Live: New leader in the 'new-look' defense",
'description': 'md5:65a19b4bbfb3b0c0c5768bed1dfad74e',
+ 'uploader': 'NBCU-SPORTS',
+ 'upload_date': '20140107',
+ 'timestamp': 1389118457,
+ },
+ },
+ # NBC News embed
+ {
+ 'url': 'http://www.vulture.com/2016/06/letterman-couldnt-care-less-about-late-night.html',
+ 'md5': '1aa589c675898ae6d37a17913cf68d66',
+ 'info_dict': {
+ 'id': '701714499682',
+ 'ext': 'mp4',
+ 'title': 'PREVIEW: On Assignment: David Letterman',
+ 'description': 'A preview of Tom Brokaw\'s interview with David Letterman as part of the On Assignment series powered by Dateline. Airs Sunday June 12 at 7/6c.',
},
},
# UDN embed
{
- 'url': 'http://www.udn.com/news/story/7314/822787',
+ 'url': 'https://video.udn.com/news/300346',
'md5': 'fd2060e988c326991037b9aff9df21a6',
'info_dict': {
'id': '300346',
'ext': 'mp4',
'title': '中一中男師變性 全校師生力挺',
'thumbnail': 're:^https?://.*\.jpg$',
- }
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
},
# Ooyala embed
{
@@ -1020,20 +1125,6 @@ class GenericIE(InfoExtractor):
'skip_download': True,
}
},
- # Contains a SMIL manifest
- {
- 'url': 'http://www.telewebion.com/fa/1263668/%D9%82%D8%B1%D8%B9%D9%87%E2%80%8C%DA%A9%D8%B4%DB%8C-%D9%84%DB%8C%DA%AF-%D9%82%D9%87%D8%B1%D9%85%D8%A7%D9%86%D8%A7%D9%86-%D8%A7%D8%B1%D9%88%D9%BE%D8%A7/%2B-%D9%81%D9%88%D8%AA%D8%A8%D8%A7%D9%84.html',
- 'info_dict': {
- 'id': 'file',
- 'ext': 'flv',
- 'title': '+ Football: Lottery Champions League Europe',
- 'uploader': 'www.telewebion.com',
- },
- 'params': {
- # rtmpe downloads
- 'skip_download': True,
- }
- },
# Brightcove URL in single quotes
{
'url': 'http://www.sportsnet.ca/baseball/mlb/sn-presents-russell-martin-world-citizen/',
@@ -1044,17 +1135,25 @@ class GenericIE(InfoExtractor):
'title': 'SN Presents: Russell Martin, World Citizen',
'description': 'To understand why he was the Toronto Blue Jays’ top off-season priority is to appreciate his background and upbringing in Montreal, where he first developed his baseball skills. Written and narrated by Stephen Brunt.',
'uploader': 'Rogers Sportsnet',
+ 'uploader_id': '1704050871',
+ 'upload_date': '20150525',
+ 'timestamp': 1432570283,
},
},
# Dailymotion Cloud video
{
'url': 'http://replay.publicsenat.fr/vod/le-debat/florent-kolandjian,dominique-cena,axel-decourtye,laurence-abeille,bruno-parmentier/175910',
- 'md5': '49444254273501a64675a7e68c502681',
+ 'md5': 'dcaf23ad0c67a256f4278bce6e0bae38',
'info_dict': {
- 'id': '5585de919473990de4bee11b',
+ 'id': 'x2uy8t3',
'ext': 'mp4',
- 'title': 'Le débat',
+ 'title': 'Sauvons les abeilles ! - Le débat',
+ 'description': 'md5:d9082128b1c5277987825d684939ca26',
'thumbnail': 're:^https?://.*\.jpe?g$',
+ 'timestamp': 1434970506,
+ 'upload_date': '20150622',
+ 'uploader': 'Public Sénat',
+ 'uploader_id': 'xa9gza',
}
},
# OnionStudios embed
@@ -1111,36 +1210,212 @@ class GenericIE(InfoExtractor):
'duration': 51690,
},
},
- # JWPlayer with M3U8
+ # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
+ # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
{
- 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video',
+ 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
'info_dict': {
- 'id': 'playlist',
+ 'id': '4785848093001',
'ext': 'mp4',
- 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ',
- 'uploader': 'ren.tv',
+ 'title': 'The Cardinal Pell Interview',
+ 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
+ 'uploader': 'GlobeCast Australia - GlobeStream',
+ 'uploader_id': '2733773828001',
+ 'upload_date': '20160304',
+ 'timestamp': 1457083087,
},
'params': {
# m3u8 downloads
'skip_download': True,
+ },
+ },
+ # Another form of arte.tv embed
+ {
+ 'url': 'http://www.tv-replay.fr/redirection/09-04-16/arte-reportage-arte-11508975.html',
+ 'md5': '850bfe45417ddf221288c88a0cffe2e2',
+ 'info_dict': {
+ 'id': '030273-562_PLUS7-F',
+ 'ext': 'mp4',
+ 'title': 'ARTE Reportage - Nulle part, en France',
+ 'description': 'md5:e3a0e8868ed7303ed509b9e3af2b870d',
+ 'upload_date': '20160409',
+ },
+ },
+ # LiveLeak embed
+ {
+ 'url': 'http://www.wykop.pl/link/3088787/',
+ 'md5': 'ace83b9ed19b21f68e1b50e844fdf95d',
+ 'info_dict': {
+ 'id': '874_1459135191',
+ 'ext': 'mp4',
+ 'title': 'Man shows poor quality of new apartment building',
+ 'description': 'The wall is like a sand pile.',
+ 'uploader': 'Lake8737',
}
},
- # Brightcove embed, with no valid 'renditions' but valid 'IOSRenditions'
- # This video can't be played in browsers if Flash disabled and UA set to iPhone, which is actually a false alarm
+ # Duplicated embedded video URLs
{
- 'url': 'https://dl.dropboxusercontent.com/u/29092637/interview.html',
+ 'url': 'http://www.hudl.com/athlete/2538180/highlights/149298443',
'info_dict': {
- 'id': '4785848093001',
+ 'id': '149298443_480_16c25b74_2',
'ext': 'mp4',
- 'title': 'The Cardinal Pell Interview',
- 'description': 'Sky News Contributor Andrew Bolt interviews George Pell in Rome, following the Cardinal\'s evidence before the Royal Commission into Child Abuse. ',
- 'uploader': 'GlobeCast Australia - GlobeStream',
+ 'title': 'vs. Blue Orange Spring Game',
+ 'uploader': 'www.hudl.com',
+ },
+ },
+ # twitter:player:stream embed
+ {
+ 'url': 'http://www.rtl.be/info/video/589263.aspx?CategoryID=288',
+ 'info_dict': {
+ 'id': 'master',
+ 'ext': 'mp4',
+ 'title': 'Une nouvelle espèce de dinosaure découverte en Argentine',
+ 'uploader': 'www.rtl.be',
},
'params': {
# m3u8 downloads
'skip_download': True,
},
},
+ # twitter:player embed
+ {
+ 'url': 'http://www.theatlantic.com/video/index/484130/what-do-black-holes-sound-like/',
+ 'md5': 'a3e0df96369831de324f0778e126653c',
+ 'info_dict': {
+ 'id': '4909620399001',
+ 'ext': 'mp4',
+ 'title': 'What Do Black Holes Sound Like?',
+ 'description': 'what do black holes sound like',
+ 'upload_date': '20160524',
+ 'uploader_id': '29913724001',
+ 'timestamp': 1464107587,
+ 'uploader': 'TheAtlantic',
+ },
+ 'add_ie': ['BrightcoveLegacy'],
+ },
+ # Facebook <iframe> embed
+ {
+ 'url': 'https://www.hostblogger.de/blog/archives/6181-Auto-jagt-Betonmischer.html',
+ 'md5': 'fbcde74f534176ecb015849146dd3aee',
+ 'info_dict': {
+ 'id': '599637780109885',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #599637780109885',
+ },
+ },
+ # Facebook API embed
+ {
+ 'url': 'http://www.lothype.com/blue-stars-2016-preview-standstill-full-show/',
+ 'md5': 'a47372ee61b39a7b90287094d447d94e',
+ 'info_dict': {
+ 'id': '10153467542406923',
+ 'ext': 'mp4',
+ 'title': 'Facebook video #10153467542406923',
+ },
+ },
+ # Wordpress "YouTube Video Importer" plugin
+ {
+ 'url': 'http://www.lothype.com/blue-devils-drumline-stanford-lot-2016/',
+ 'md5': 'd16797741b560b485194eddda8121b48',
+ 'info_dict': {
+ 'id': 'HNTXWDXV9Is',
+ 'ext': 'mp4',
+ 'title': 'Blue Devils Drumline Stanford lot 2016',
+ 'upload_date': '20160627',
+ 'uploader_id': 'GENOCIDE8GENERAL10',
+ 'uploader': 'cylus cyrus',
+ },
+ },
+ {
+ # video stored on custom kaltura server
+ 'url': 'http://www.expansion.com/multimedia/videos.html?media=EQcM30NHIPv',
+ 'md5': '537617d06e64dfed891fa1593c4b30cc',
+ 'info_dict': {
+ 'id': '0_1iotm5bh',
+ 'ext': 'mp4',
+ 'title': 'Elecciones británicas: 5 lecciones para Rajoy',
+ 'description': 'md5:435a89d68b9760b92ce67ed227055f16',
+ 'uploader_id': 'videos.expansion@el-mundo.net',
+ 'upload_date': '20150429',
+ 'timestamp': 1430303472,
+ },
+ 'add_ie': ['Kaltura'],
+ },
+ {
+ # Non-standard Vimeo embed
+ 'url': 'https://openclassrooms.com/courses/understanding-the-web',
+ 'md5': '64d86f1c7d369afd9a78b38cbb88d80a',
+ 'info_dict': {
+ 'id': '148867247',
+ 'ext': 'mp4',
+ 'title': 'Understanding the web - Teaser',
+ 'description': 'This is "Understanding the web - Teaser" by openclassrooms on Vimeo, the home for high quality videos and the people who love them.',
+ 'upload_date': '20151214',
+ 'uploader': 'OpenClassrooms',
+ 'uploader_id': 'openclassrooms',
+ },
+ 'add_ie': ['Vimeo'],
+ },
+ {
+ # generic vimeo embed that requires original URL passed as Referer
+ 'url': 'http://racing4everyone.eu/2016/07/30/formula-1-2016-round12-germany/',
+ 'only_matching': True,
+ },
+ {
+ 'url': 'https://support.arkena.com/display/PLAY/Ways+to+embed+your+video',
+ 'md5': 'b96f2f71b359a8ecd05ce4e1daa72365',
+ 'info_dict': {
+ 'id': 'b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe',
+ 'ext': 'mp4',
+ 'title': 'Big Buck Bunny',
+ 'description': 'Royalty free test video',
+ 'timestamp': 1432816365,
+ 'upload_date': '20150528',
+ 'is_live': False,
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [ArkenaIE.ie_key()],
+ },
+ {
+ 'url': 'http://nova.bg/news/view/2016/08/16/156543/%D0%BD%D0%B0-%D0%BA%D0%BE%D1%81%D1%8A%D0%BC-%D0%BE%D1%82-%D0%B2%D0%B7%D1%80%D0%B8%D0%B2-%D0%BE%D1%82%D1%86%D0%B5%D0%BF%D0%B8%D1%85%D0%B0-%D1%86%D1%8F%D0%BB-%D0%BA%D0%B2%D0%B0%D1%80%D1%82%D0%B0%D0%BB-%D0%B7%D0%B0%D1%80%D0%B0%D0%B4%D0%B8-%D0%B8%D0%B7%D1%82%D0%B8%D1%87%D0%B0%D0%BD%D0%B5-%D0%BD%D0%B0-%D0%B3%D0%B0%D0%B7-%D0%B2-%D0%BF%D0%BB%D0%BE%D0%B2%D0%B4%D0%B8%D0%B2/',
+ 'info_dict': {
+ 'id': '1c7141f46c',
+ 'ext': 'mp4',
+ 'title': 'НА КОСЪМ ОТ ВЗРИВ: Изтичане на газ на бензиностанция в Пловдив',
+ },
+ 'params': {
+ 'skip_download': True,
+ },
+ 'add_ie': [Vbox7IE.ie_key()],
+ },
+ {
+ # DBTV embeds
+ 'url': 'http://www.dagbladet.no/2016/02/23/nyheter/nordlys/ski/troms/ver/43254897/',
+ 'info_dict': {
+ 'id': '43254897',
+ 'title': 'Etter ett års planlegging, klaffet endelig alt: - Jeg måtte ta en liten dans',
+ },
+ 'playlist_mincount': 3,
+ },
+ # {
+ # # TODO: find another test
+ # # http://schema.org/VideoObject
+ # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+ # 'md5': '888dcf08b7ea671381f00fab74692755',
+ # 'info_dict': {
+ # 'id': 'nyvTSJMKId',
+ # 'ext': 'mp4',
+ # 'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+ # 'description': '#love for cats.',
+ # 'timestamp': 1461244995,
+ # 'upload_date': '20160421',
+ # },
+ # 'params': {
+ # 'force_generic_extractor': True,
+ # },
+ # }
]
def report_following_redirect(self, new_url):
@@ -1261,7 +1536,7 @@ class GenericIE(InfoExtractor):
force_videoid = smuggled_data['force_videoid']
video_id = force_videoid
else:
- video_id = compat_urllib_parse_unquote(os.path.splitext(url.rstrip('/').split('/')[-1])[0])
+ video_id = self._generic_id(url)
self.to_screen('%s: Requesting header' % video_id)
@@ -1290,7 +1565,7 @@ class GenericIE(InfoExtractor):
info_dict = {
'id': video_id,
- 'title': compat_urllib_parse_unquote(os.path.splitext(url_basename(url))[0]),
+ 'title': self._generic_title(url),
'upload_date': unified_strdate(head_response.headers.get('Last-Modified'))
}
@@ -1361,6 +1636,10 @@ class GenericIE(InfoExtractor):
doc = compat_etree_fromstring(webpage.encode('utf-8'))
if doc.tag == 'rss':
return self._extract_rss(url, video_id, doc)
+ elif doc.tag == 'SmoothStreamingMedia':
+ info_dict['formats'] = self._parse_ism_formats(doc, url)
+ self._sort_formats(info_dict['formats'])
+ return info_dict
elif re.match(r'^(?:{[^}]+})?smil$', doc.tag):
smil = self._parse_smil(doc, url, video_id)
self._sort_formats(smil['formats'])
@@ -1369,7 +1648,9 @@ class GenericIE(InfoExtractor):
return self.playlist_result(self._parse_xspf(doc, video_id), video_id)
elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):
info_dict['formats'] = self._parse_mpd_formats(
- doc, video_id, mpd_base_url=url.rpartition('/')[0])
+ doc, video_id,
+ mpd_base_url=full_response.geturl().rpartition('/')[0],
+ mpd_url=url)
self._sort_formats(info_dict['formats'])
return info_dict
elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag):
@@ -1395,7 +1676,8 @@ class GenericIE(InfoExtractor):
# Site Name | Video Title
# Video Title - Tagline | Site Name
# and so on and so forth; it's just not practical
- video_title = self._html_search_regex(
+ video_title = self._og_search_title(
+ webpage, default=None) or self._html_search_regex(
r'(?s)<title>(.*?)</title>', webpage, 'video title',
default='video')
@@ -1413,6 +1695,9 @@ class GenericIE(InfoExtractor):
video_uploader = self._search_regex(
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
+ video_description = self._og_search_description(webpage, default=None)
+ video_thumbnail = self._og_search_thumbnail(webpage, default=None)
+
# Helper method
def _playlist_from_matches(matches, getter=None, ie=None):
urlrs = orderedSet(
@@ -1443,6 +1728,16 @@ class GenericIE(InfoExtractor):
if bc_urls:
return _playlist_from_matches(bc_urls, ie='BrightcoveNew')
+ # Look for ThePlatform embeds
+ tp_urls = ThePlatformIE._extract_urls(webpage)
+ if tp_urls:
+ return _playlist_from_matches(tp_urls, ie='ThePlatform')
+
+ # Look for Vessel embeds
+ vessel_urls = VesselIE._extract_urls(webpage)
+ if vessel_urls:
+ return _playlist_from_matches(vessel_urls, ie=VesselIE.ie_key())
+
# Look for embedded rtl.nl player
matches = re.findall(
r'<iframe[^>]+?src="((?:https?:)?//(?:www\.)?rtl\.nl/system/videoplayer/[^"]+(?:video_)?embed[^"]+)"',
@@ -1450,9 +1745,9 @@ class GenericIE(InfoExtractor):
if matches:
return _playlist_from_matches(matches, ie='RtlNl')
- vimeo_url = VimeoIE._extract_vimeo_url(url, webpage)
- if vimeo_url is not None:
- return self.url_result(vimeo_url)
+ vimeo_urls = VimeoIE._extract_urls(url, webpage)
+ if vimeo_urls:
+ return _playlist_from_matches(vimeo_urls, ie=VimeoIE.ie_key())
vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
@@ -1483,12 +1778,16 @@ class GenericIE(InfoExtractor):
if matches:
return _playlist_from_matches(matches, lambda m: unescapeHTML(m))
- # Look for embedded Dailymotion player
- matches = re.findall(
- r'<(?:(?:embed|iframe)[^>]+?src=|input[^>]+id=[\'"]dmcloudUrlEmissionSelect[\'"][^>]+value=)(["\'])(?P<url>(?:https?:)?//(?:www\.)?dailymotion\.com/(?:embed|swf)/video/.+?)\1', webpage)
+ # Look for Wordpress "YouTube Video Importer" plugin
+ matches = re.findall(r'''(?x)<div[^>]+
+ class=(?P<q1>[\'"])[^\'"]*\byvii_single_video_player\b[^\'"]*(?P=q1)[^>]+
+ data-video_id=(?P<q2>[\'"])([^\'"]+)(?P=q2)''', webpage)
if matches:
- return _playlist_from_matches(
- matches, lambda m: unescapeHTML(m[1]))
+ return _playlist_from_matches(matches, lambda m: m[-1])
+
+ matches = DailymotionIE._extract_urls(webpage)
+ if matches:
+ return _playlist_from_matches(matches)
# Look for embedded Dailymotion playlist player (#3822)
m = re.search(
@@ -1511,21 +1810,26 @@ class GenericIE(InfoExtractor):
'url': embed_url,
'ie_key': 'Wistia',
'uploader': video_uploader,
- 'title': video_title,
- 'id': video_id,
}
match = re.search(r'(?:id=["\']wistia_|data-wistia-?id=["\']|Wistia\.embed\(["\'])(?P<id>[^"\']+)', webpage)
if match:
return {
'_type': 'url_transparent',
- 'url': 'http://fast.wistia.net/embed/iframe/{0:}'.format(match.group('id')),
+ 'url': 'wistia:%s' % match.group('id'),
'ie_key': 'Wistia',
'uploader': video_uploader,
- 'title': video_title,
- 'id': match.group('id')
}
+ match = re.search(
+ r'''(?sx)
+ <script[^>]+src=(["'])(?:https?:)?//fast\.wistia\.com/assets/external/E-v1\.js\1[^>]*>.*?
+ <div[^>]+class=(["']).*?\bwistia_async_(?P<id>[a-z0-9]+)\b.*?\2
+ ''', webpage)
+ if match:
+ return self.url_result(self._proto_relative_url(
+ 'wistia:%s' % match.group('id')), 'Wistia')
+
# Look for SVT player
svt_url = SVTIE._extract_url(webpage)
if svt_url:
@@ -1620,10 +1924,9 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>https://www\.facebook\.com/video/embed.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'Facebook')
+ facebook_url = FacebookIE._extract_url(webpage)
+ if facebook_url is not None:
+ return self.url_result(facebook_url, 'Facebook')
# Look for embedded VK player
mobj = re.search(r'<iframe[^>]+?src=(["\'])(?P<url>https?://vk\.com/video_ext\.php.+?)\1', webpage)
@@ -1680,11 +1983,6 @@ class GenericIE(InfoExtractor):
if sportbox_urls:
return _playlist_from_matches(sportbox_urls, ie='SportBoxEmbed')
- # Look for embedded PornHub player
- pornhub_url = PornHubIE._extract_url(webpage)
- if pornhub_url:
- return self.url_result(pornhub_url, 'PornHub')
-
# Look for embedded XHamster player
xhamster_urls = XHamsterEmbedIE._extract_urls(webpage)
if xhamster_urls:
@@ -1695,6 +1993,21 @@ class GenericIE(InfoExtractor):
if tnaflix_urls:
return _playlist_from_matches(tnaflix_urls, ie=TNAFlixNetworkEmbedIE.ie_key())
+ # Look for embedded PornHub player
+ pornhub_urls = PornHubIE._extract_urls(webpage)
+ if pornhub_urls:
+ return _playlist_from_matches(pornhub_urls, ie=PornHubIE.ie_key())
+
+ # Look for embedded DrTuber player
+ drtuber_urls = DrTuberIE._extract_urls(webpage)
+ if drtuber_urls:
+ return _playlist_from_matches(drtuber_urls, ie=DrTuberIE.ie_key())
+
+ # Look for embedded RedTube player
+ redtube_urls = RedTubeIE._extract_urls(webpage)
+ if redtube_urls:
+ return _playlist_from_matches(redtube_urls, ie=RedTubeIE.ie_key())
+
# Look for embedded Tvigle player
mobj = re.search(
r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//cloud\.tvigle\.ru/video/.+?)\1', webpage)
@@ -1715,7 +2028,7 @@ class GenericIE(InfoExtractor):
# Look for embedded arte.tv player
mobj = re.search(
- r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
+ r'<(?:script|iframe) [^>]*?src="(?P<url>http://www\.arte\.tv/(?:playerv2/embed|arte_vp/index)[^"]+)"',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
@@ -1738,20 +2051,9 @@ class GenericIE(InfoExtractor):
return self.url_result(myvi_url)
# Look for embedded soundcloud player
- mobj = re.search(
- r'<iframe\s+(?:[a-zA-Z0-9_-]+="[^"]+"\s+)*src="(?P<url>https?://(?:w\.)?soundcloud\.com/player[^"]+)"',
- webpage)
- if mobj is not None:
- url = unescapeHTML(mobj.group('url'))
- return self.url_result(url)
-
- # Look for embedded vulture.com player
- mobj = re.search(
- r'<iframe src="(?P<url>https?://video\.vulture\.com/[^"]+)"',
- webpage)
- if mobj is not None:
- url = unescapeHTML(mobj.group('url'))
- return self.url_result(url, ie='Vulture')
+ soundcloud_urls = SoundcloudIE._extract_urls(webpage)
+ if soundcloud_urls:
+ return _playlist_from_matches(soundcloud_urls, getter=unescapeHTML, ie=SoundcloudIE.ie_key())
# Look for embedded mtvservices player
mtvservices_url = MTVServicesEmbeddedIE._extract_url(webpage)
@@ -1801,7 +2103,7 @@ class GenericIE(InfoExtractor):
return self.url_result(self._proto_relative_url(mobj.group('url'), scheme='http:'), 'CondeNast')
mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://new\.livestream\.com/[^"]+/player[^"]+)"',
+ r'<iframe[^>]+src="(?P<url>https?://(?:new\.)?livestream\.com/[^"]+/player[^"]+)"',
webpage)
if mobj is not None:
return self.url_result(mobj.group('url'), 'Livestream')
@@ -1813,18 +2115,14 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'), 'Zapiks')
# Look for Kaltura embeds
- mobj = (re.search(r"(?s)kWidget\.(?:thumb)?[Ee]mbed\(\{.*?'wid'\s*:\s*'_?(?P<partner_id>[^']+)',.*?'entry_?[Ii]d'\s*:\s*'(?P<id>[^']+)',", webpage) or
- re.search(r'(?s)(?P<q1>["\'])(?:https?:)?//cdnapi(?:sec)?\.kaltura\.com/.*?(?:p|partner_id)/(?P<partner_id>\d+).*?(?P=q1).*?entry_?[Ii]d\s*:\s*(?P<q2>["\'])(?P<id>.+?)(?P=q2)', webpage))
- if mobj is not None:
- return self.url_result(smuggle_url(
- 'kaltura:%(partner_id)s:%(id)s' % mobj.groupdict(),
- {'source_url': url}), 'Kaltura')
+ kaltura_url = KalturaIE._extract_url(webpage)
+ if kaltura_url:
+ return self.url_result(smuggle_url(kaltura_url, {'source_url': url}), KalturaIE.ie_key())
# Look for Eagle.Platform embeds
- mobj = re.search(
- r'<iframe[^>]+src="(?P<url>https?://.+?\.media\.eagleplatform\.com/index/player\?.+?)"', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'EaglePlatform')
+ eagleplatform_url = EaglePlatformIE._extract_url(webpage)
+ if eagleplatform_url:
+ return self.url_result(eagleplatform_url, EaglePlatformIE.ie_key())
# Look for ClipYou (uses Eagle.Platform) embeds
mobj = re.search(
@@ -1865,6 +2163,12 @@ class GenericIE(InfoExtractor):
if nbc_sports_url:
return self.url_result(nbc_sports_url, 'NBCSportsVPlayer')
+ # Look for NBC News embeds
+ nbc_news_embed_url = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//www\.nbcnews\.com/widget/video-embed/[^"\']+)\1', webpage)
+ if nbc_news_embed_url:
+ return self.url_result(nbc_news_embed_url.group('url'), 'NBCNews')
+
# Look for Google Drive embeds
google_drive_url = GoogleDriveIE._extract_url(webpage)
if google_drive_url:
@@ -1892,10 +2196,10 @@ class GenericIE(InfoExtractor):
if onionstudios_url:
return self.url_result(onionstudios_url)
- # Look for SnagFilms embeds
- snagfilms_url = SnagFilmsEmbedIE._extract_url(webpage)
- if snagfilms_url:
- return self.url_result(snagfilms_url)
+ # Look for ViewLift embeds
+ viewlift_url = ViewLiftEmbedIE._extract_url(webpage)
+ if viewlift_url:
+ return self.url_result(viewlift_url)
# Look for JWPlatform embeds
jwplatform_url = JWPlatformIE._extract_url(webpage)
@@ -1912,6 +2216,11 @@ class GenericIE(InfoExtractor):
if digiteka_url:
return self.url_result(self._proto_relative_url(digiteka_url), DigitekaIE.ie_key())
+ # Look for Arkena embeds
+ arkena_url = ArkenaIE._extract_url(webpage)
+ if arkena_url:
+ return self.url_result(arkena_url, ArkenaIE.ie_key())
+
# Look for Limelight embeds
mobj = re.search(r'LimelightPlayer\.doLoad(Media|Channel|ChannelList)\(["\'](?P<id>[a-z0-9]{32})', webpage)
if mobj:
@@ -1940,17 +2249,107 @@ class GenericIE(InfoExtractor):
return self.url_result(
self._proto_relative_url(unescapeHTML(mobj.group(1))), 'Vine')
+ # Look for VODPlatform embeds
+ mobj = re.search(
+ r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?vod-platform\.net/[eE]mbed/.+?)\1',
+ webpage)
+ if mobj is not None:
+ return self.url_result(
+ self._proto_relative_url(unescapeHTML(mobj.group('url'))), 'VODPlatform')
+
+ # Look for Mangomolo embeds
+ mobj = re.search(
+ r'''(?x)<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?admin\.mangomolo\.com/analytics/index\.php/customers/embed/
+ (?:
+ video\?.*?\bid=(?P<video_id>\d+)|
+ index\?.*?\bchannelid=(?P<channel_id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)
+ ).+?)\1''', webpage)
+ if mobj is not None:
+ info = {
+ '_type': 'url_transparent',
+ 'url': self._proto_relative_url(unescapeHTML(mobj.group('url'))),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+ video_id = mobj.group('video_id')
+ if video_id:
+ info.update({
+ 'ie_key': 'MangomoloVideo',
+ 'id': video_id,
+ })
+ else:
+ info.update({
+ 'ie_key': 'MangomoloLive',
+ 'id': mobj.group('channel_id'),
+ })
+ return info
+
# Look for Instagram embeds
instagram_embed_url = InstagramIE._extract_embed_url(webpage)
if instagram_embed_url is not None:
- return self.url_result(instagram_embed_url, InstagramIE.ie_key())
+ return self.url_result(
+ self._proto_relative_url(instagram_embed_url), InstagramIE.ie_key())
+
+ # Look for LiveLeak embeds
+ liveleak_url = LiveLeakIE._extract_url(webpage)
+ if liveleak_url:
+ return self.url_result(liveleak_url, 'LiveLeak')
+
+ # Look for 3Q SDN embeds
+ threeqsdn_url = ThreeQSDNIE._extract_url(webpage)
+ if threeqsdn_url:
+ return {
+ '_type': 'url_transparent',
+ 'ie_key': ThreeQSDNIE.ie_key(),
+ 'url': self._proto_relative_url(threeqsdn_url),
+ 'title': video_title,
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'uploader': video_uploader,
+ }
+
+ # Look for VBOX7 embeds
+ vbox7_url = Vbox7IE._extract_url(webpage)
+ if vbox7_url:
+ return self.url_result(vbox7_url, Vbox7IE.ie_key())
+
+ # Look for DBTV embeds
+ dbtv_urls = DBTVIE._extract_urls(webpage)
+ if dbtv_urls:
+ return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
+
+ # Looking for http://schema.org/VideoObject
+ json_ld = self._search_json_ld(
+ webpage, video_id, default={}, expected_type='VideoObject')
+ if json_ld.get('url'):
+ info_dict.update({
+ 'title': video_title or info_dict['title'],
+ 'description': video_description,
+ 'thumbnail': video_thumbnail,
+ 'age_limit': age_limit
+ })
+ info_dict.update(json_ld)
+ return info_dict
+
+ # Look for HTML5 media
+ entries = self._parse_html5_media_entries(url, webpage, video_id, m3u8_id='hls')
+ if entries:
+ for entry in entries:
+ entry.update({
+ 'id': video_id,
+ 'title': video_title,
+ })
+ self._sort_formats(entry['formats'])
+ return self.playlist_result(entries)
def check_video(vurl):
if YoutubeIE.suitable(vurl):
return True
vpath = compat_urlparse.urlparse(vurl).path
vext = determine_ext(vpath)
- return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml')
+ return '.' in vpath and vext not in ('swf', 'png', 'jpg', 'srt', 'sbv', 'sub', 'vtt', 'ttml', 'js')
def filter_video(urls):
return list(filter(check_video, urls))
@@ -1988,6 +2387,9 @@ class GenericIE(InfoExtractor):
r"cinerama\.embedPlayer\(\s*\'[^']+\',\s*'([^']+)'", webpage)
if not found:
# Try to find twitter cards info
+ # twitter:player:stream should be checked before twitter:player since
+ # it is expected to contain a raw stream (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
found = filter_video(re.findall(
r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage))
if not found:
@@ -1998,9 +2400,6 @@ class GenericIE(InfoExtractor):
if m_video_type is not None:
found = filter_video(re.findall(r'<meta.*?property="og:video".*?content="(.*?)"', webpage))
if not found:
- # HTML5 video
- found = re.findall(r'(?s)<(?:video|audio)[^<]*(?:>.*?<source[^>]*)?\s+src=["\'](.*?)["\']', webpage)
- if not found:
REDIRECT_REGEX = r'[0-9]{,2};\s*(?:URL|url)=\'?([^\'"]+)'
found = re.search(
r'(?i)<meta\s+(?=(?:[a-z-]+="[^"]+"\s+)*http-equiv="refresh")'
@@ -2021,11 +2420,21 @@ class GenericIE(InfoExtractor):
'_type': 'url',
'url': new_url,
}
+
+ if not found:
+ # twitter:player is a https URL to iframe player that may or may not
+ # be supported by youtube-dl thus this is checked the very last (see
+ # https://dev.twitter.com/cards/types/player#On_twitter.com_via_desktop_browser)
+ embed_url = self._html_search_meta('twitter:player', webpage, default=None)
+ if embed_url:
+ return self.url_result(embed_url)
+
if not found:
raise UnsupportedError(url)
entries = []
- for video_url in found:
+ for video_url in orderedSet(found):
+ video_url = unescapeHTML(video_url)
video_url = video_url.replace('\\/', '/')
video_url = compat_urlparse.urljoin(url, video_url)
video_id = compat_urllib_parse_unquote(os.path.basename(video_url))
@@ -2056,6 +2465,21 @@ class GenericIE(InfoExtractor):
entry_info_dict['formats'] = self._extract_mpd_formats(video_url, video_id)
elif ext == 'f4m':
entry_info_dict['formats'] = self._extract_f4m_formats(video_url, video_id)
+ elif re.search(r'(?i)\.(?:ism|smil)/manifest', video_url) and video_url != url:
+ # Just matching .ism/manifest is not enough to be reliably sure
+ # whether it's actually an ISM manifest or some other streaming
+ # manifest since there are various streaming URL formats
+ # possible (see [1]) as well as some other shenanigans like
+ # .smil/manifest URLs that actually serve an ISM (see [2]) and
+ # so on.
+ # Thus the most reasonable way to solve this is to delegate
+ # to generic extractor in order to look into the contents of
+ # the manifest itself.
+ # 1. https://azure.microsoft.com/en-us/documentation/articles/media-services-deliver-content-overview/#streaming-url-formats
+ # 2. https://svs.itworkscdn.net/lbcivod/smil:itwfcdn/lbci/170976.smil/Manifest
+ entry_info_dict = self.url_result(
+ smuggle_url(video_url, {'to_generic': True}),
+ GenericIE.ie_key())
else:
entry_info_dict['url'] = video_url