diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r-- | youtube_dl/extractor/generic.py | 89 |
1 files changed, 64 insertions, 25 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index e7ee31877..cf245d9cd 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -25,6 +25,7 @@ from ..utils import ( from .brightcove import BrightcoveIE from .ooyala import OoyalaIE from .rutv import RUTVIE +from .smotri import SmotriIE class GenericIE(InfoExtractor): @@ -81,6 +82,17 @@ class GenericIE(InfoExtractor): }, 'add_ie': ['Brightcove'], }, + { + 'url': 'http://www.championat.com/video/football/v/87/87499.html', + 'md5': 'fb973ecf6e4a78a67453647444222983', + 'info_dict': { + 'id': '3414141473001', + 'ext': 'mp4', + 'title': 'Видео. Удаление Дзагоева (ЦСКА)', + 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"', + 'uploader': 'Championat', + }, + }, # Direct link to a video { 'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4', @@ -102,20 +114,6 @@ class GenericIE(InfoExtractor): 'title': '2cc213299525360.mov', # that's what we get }, }, - # second style of embedded ooyala videos - { - 'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html', - 'info_dict': { - 'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk', - 'ext': 'mp4', - 'title': 'Behind-the-scenes: Financial Review Sunday ', - 'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.', - }, - 'params': { - # m3u8 download - 'skip_download': True, - }, - }, # google redirect { 'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE', @@ -186,6 +184,17 @@ class GenericIE(InfoExtractor): 'description': 'md5:ddb2a40ecd6b6a147e400e535874947b', } }, + # Embeded Ustream video + { + 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm', + 'md5': '27b99cdb639c9b12a79bca876a073417', + 'info_dict': { + 'id': '45734260', + 'ext': 'flv', + 'uploader': 'AU SPA: The NSA and Privacy', + 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman' + } + }, # nowvideo embed hidden behind percent encoding { 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/', @@ -212,6 +221,21 @@ class GenericIE(InfoExtractor): 'skip_download': 'Requires rtmpdump' } }, + # smotri embed + { + 'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml', + 'md5': 'ec40048448e9284c9a1de77bb188108b', + 'info_dict': { + 'id': 'v27008541fad', + 'ext': 'mp4', + 'title': 'Крым и Севастополь вошли в состав России', + 'description': 'md5:fae01b61f68984c7bd2fa741e11c3175', + 'duration': 900, + 'upload_date': '20140318', + 'uploader': 'rbctv_2012_4', + 'uploader_id': 'rbctv_2012_4', + }, + }, ] def report_download_webpage(self, video_id): @@ -300,13 +324,16 @@ class GenericIE(InfoExtractor): if not parsed_url.scheme: default_search = self._downloader.params.get('default_search') if default_search is None: - default_search = 'auto' + default_search = 'auto_warning' - if default_search == 'auto': + if default_search in ('auto', 'auto_warning'): if '/' in url: self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') return self.url_result('http://' + url) else: + if default_search == 'auto_warning': + self._downloader.report_warning( + 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url) return self.url_result('ytsearch:' + url) else: assert ':' in default_search @@ -484,17 +511,18 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group(1), 'Mpora') - # Look for embedded NovaMov player + # Look for embedded NovaMov-based player mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage) + r'''(?x)<iframe[^>]+?src=(["\']) + (?P<url>http://(?:(?:embed|www)\.)? + (?:novamov\.com| + nowvideo\.(?:ch|sx|eu|at|ag|co)| + videoweed\.(?:es|com)| + movshare\.(?:net|sx|ag)| + divxstage\.(?:eu|net|ch|co|at|ag)) + /embed\.php.+?)\1''', webpage) if mobj is not None: - return self.url_result(mobj.group('url'), 'NovaMov') - - # Look for embedded NowVideo player - mobj = re.search( - r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage) - if mobj is not None: - return self.url_result(mobj.group('url'), 'NowVideo') + return self.url_result(mobj.group('url')) # Look for embedded Facebook player mobj = re.search( @@ -540,6 +568,12 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'TED') + # Look for embedded Ustream videos + mobj = re.search( + r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage) + if mobj is not None: + return self.url_result(mobj.group('url'), 'Ustream') + # Look for embedded arte.tv player mobj = re.search( r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"', @@ -547,6 +581,11 @@ class GenericIE(InfoExtractor): if mobj is not None: return self.url_result(mobj.group('url'), 'ArteTVEmbed') + # Look for embedded smotri.com player + smotri_url = SmotriIE._extract_url(webpage) + if smotri_url: + return self.url_result(smotri_url, 'Smotri') + # Start with something easy: JW Player in SWFObject mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage) if mobj is None: |