aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py89
1 files changed, 64 insertions, 25 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index e7ee31877..cf245d9cd 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -25,6 +25,7 @@ from ..utils import (
from .brightcove import BrightcoveIE
from .ooyala import OoyalaIE
from .rutv import RUTVIE
+from .smotri import SmotriIE
class GenericIE(InfoExtractor):
@@ -81,6 +82,17 @@ class GenericIE(InfoExtractor):
},
'add_ie': ['Brightcove'],
},
+ {
+ 'url': 'http://www.championat.com/video/football/v/87/87499.html',
+ 'md5': 'fb973ecf6e4a78a67453647444222983',
+ 'info_dict': {
+ 'id': '3414141473001',
+ 'ext': 'mp4',
+ 'title': 'Видео. Удаление Дзагоева (ЦСКА)',
+ 'description': 'Онлайн-трансляция матча ЦСКА - "Волга"',
+ 'uploader': 'Championat',
+ },
+ },
# Direct link to a video
{
'url': 'http://media.w3.org/2010/05/sintel/trailer.mp4',
@@ -102,20 +114,6 @@ class GenericIE(InfoExtractor):
'title': '2cc213299525360.mov', # that's what we get
},
},
- # second style of embedded ooyala videos
- {
- 'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
- 'info_dict': {
- 'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
- 'ext': 'mp4',
- 'title': 'Behind-the-scenes: Financial Review Sunday ',
- 'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
- },
- 'params': {
- # m3u8 download
- 'skip_download': True,
- },
- },
# google redirect
{
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -186,6 +184,17 @@ class GenericIE(InfoExtractor):
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
}
},
+ # Embeded Ustream video
+ {
+ 'url': 'http://www.american.edu/spa/pti/nsa-privacy-janus-2014.cfm',
+ 'md5': '27b99cdb639c9b12a79bca876a073417',
+ 'info_dict': {
+ 'id': '45734260',
+ 'ext': 'flv',
+ 'uploader': 'AU SPA: The NSA and Privacy',
+ 'title': 'NSA and Privacy Forum Debate featuring General Hayden and Barton Gellman'
+ }
+ },
# nowvideo embed hidden behind percent encoding
{
'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
@@ -212,6 +221,21 @@ class GenericIE(InfoExtractor):
'skip_download': 'Requires rtmpdump'
}
},
+ # smotri embed
+ {
+ 'url': 'http://rbctv.rbc.ru/archive/news/562949990879132.shtml',
+ 'md5': 'ec40048448e9284c9a1de77bb188108b',
+ 'info_dict': {
+ 'id': 'v27008541fad',
+ 'ext': 'mp4',
+ 'title': 'Крым и Севастополь вошли в состав России',
+ 'description': 'md5:fae01b61f68984c7bd2fa741e11c3175',
+ 'duration': 900,
+ 'upload_date': '20140318',
+ 'uploader': 'rbctv_2012_4',
+ 'uploader_id': 'rbctv_2012_4',
+ },
+ },
]
def report_download_webpage(self, video_id):
@@ -300,13 +324,16 @@ class GenericIE(InfoExtractor):
if not parsed_url.scheme:
default_search = self._downloader.params.get('default_search')
if default_search is None:
- default_search = 'auto'
+ default_search = 'auto_warning'
- if default_search == 'auto':
+ if default_search in ('auto', 'auto_warning'):
if '/' in url:
self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http')
return self.url_result('http://' + url)
else:
+ if default_search == 'auto_warning':
+ self._downloader.report_warning(
+ 'Falling back to youtube search for %s . Set --default-search to "auto" to suppress this warning.' % url)
return self.url_result('ytsearch:' + url)
else:
assert ':' in default_search
@@ -484,17 +511,18 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group(1), 'Mpora')
- # Look for embedded NovaMov player
+ # Look for embedded NovaMov-based player
mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?novamov\.com/embed\.php.+?)\1', webpage)
+ r'''(?x)<iframe[^>]+?src=(["\'])
+ (?P<url>http://(?:(?:embed|www)\.)?
+ (?:novamov\.com|
+ nowvideo\.(?:ch|sx|eu|at|ag|co)|
+ videoweed\.(?:es|com)|
+ movshare\.(?:net|sx|ag)|
+ divxstage\.(?:eu|net|ch|co|at|ag))
+ /embed\.php.+?)\1''', webpage)
if mobj is not None:
- return self.url_result(mobj.group('url'), 'NovaMov')
-
- # Look for embedded NowVideo player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>http://(?:(?:embed|www)\.)?nowvideo\.(?:ch|sx|eu)/embed\.php.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'NowVideo')
+ return self.url_result(mobj.group('url'))
# Look for embedded Facebook player
mobj = re.search(
@@ -540,6 +568,12 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'TED')
+ # Look for embedded Ustream videos
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://www\.ustream\.tv/embed/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'Ustream')
+
# Look for embedded arte.tv player
mobj = re.search(
r'<script [^>]*?src="(?P<url>http://www\.arte\.tv/playerv2/embed[^"]+)"',
@@ -547,6 +581,11 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'), 'ArteTVEmbed')
+ # Look for embedded smotri.com player
+ smotri_url = SmotriIE._extract_url(webpage)
+ if smotri_url:
+ return self.url_result(smotri_url, 'Smotri')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None: