aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py49
1 files changed, 40 insertions, 9 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index 8065b5611..4d649fe71 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -102,6 +102,20 @@ class GenericIE(InfoExtractor):
'title': '2cc213299525360.mov', # that's what we get
},
},
+ # second style of embedded ooyala videos
+ {
+ 'url': 'http://www.smh.com.au/tv/business/show/financial-review-sunday/behind-the-scenes-financial-review-sunday--4350201.html',
+ 'info_dict': {
+ 'id': '13djJjYjptA1XpPx8r9kuzPyj3UZH0Uk',
+ 'ext': 'mp4',
+ 'title': 'Behind-the-scenes: Financial Review Sunday ',
+ 'description': 'Step inside Channel Nine studios for an exclusive tour of its upcoming financial business show.',
+ },
+ 'params': {
+ # m3u8 download
+ 'skip_download': True,
+ },
+ },
# google redirect
{
'url': 'http://www.google.com/url?sa=t&rct=j&q=&esrc=s&source=web&cd=1&cad=rja&ved=0CCUQtwIwAA&url=http%3A%2F%2Fwww.youtube.com%2Fwatch%3Fv%3DcmQHVoWB5FY&ei=F-sNU-LLCaXk4QT52ICQBQ&usg=AFQjCNEw4hL29zgOohLXvpJ-Bdh2bils1Q&bvm=bv.61965928,d.bGE',
@@ -171,7 +185,18 @@ class GenericIE(InfoExtractor):
'uploader': 'Ze Frank',
'description': 'md5:ddb2a40ecd6b6a147e400e535874947b',
}
- }
+ },
+ # nowvideo embed hidden behind percent encoding
+ {
+ 'url': 'http://www.waoanime.tv/the-super-dimension-fortress-macross-episode-1/',
+ 'md5': '2baf4ddd70f697d94b1c18cf796d5107',
+ 'info_dict': {
+ 'id': '06e53103ca9aa',
+ 'ext': 'flv',
+ 'title': 'Macross Episode 001 Watch Macross Episode 001 onl',
+ 'description': 'No description',
+ },
+ },
]
def report_download_webpage(self, video_id):
@@ -323,6 +348,11 @@ class GenericIE(InfoExtractor):
except compat_xml_parse_error:
pass
+ # Sometimes embedded video player is hidden behind percent encoding
+ # (e.g. https://github.com/rg3/youtube-dl/issues/2448)
+ # Unescaping the whole page allows to handle those cases in a generic way
+ webpage = compat_urllib_parse.unquote(webpage)
+
# it's tempting to parse this further, but you would
# have to take into account all the variations like
# Video Title - Site Name
@@ -424,9 +454,10 @@ class GenericIE(InfoExtractor):
return self.url_result(mobj.group('url'))
# Look for Ooyala videos
- mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+ mobj = (re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=(?P<ec>[^"&]+)', webpage) or
+ re.search(r'OO.Player.create\([\'"].*?[\'"],\s*[\'"](?P<ec>.{32})[\'"]', webpage))
if mobj is not None:
- return OoyalaIE._build_url_result(mobj.group(1))
+ return OoyalaIE._build_url_result(mobj.group('ec'))
# Look for Aparat videos
mobj = re.search(r'<iframe src="(http://www\.aparat\.com/video/[^"]+)"', webpage)
@@ -488,6 +519,12 @@ class GenericIE(InfoExtractor):
if rutv_url:
return self.url_result(rutv_url, 'RUTV')
+ # Look for embedded TED player
+ mobj = re.search(
+ r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group('url'), 'TED')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None:
@@ -500,12 +537,6 @@ class GenericIE(InfoExtractor):
# Broaden the search a little bit: JWPlayer JS loader
mobj = re.search(r'[^A-Za-z0-9]?file["\']?:\s*["\'](http(?![^\'"]+\.[0-9]+[\'"])[^\'"]+)["\']', webpage)
- # Look for embedded TED player
- mobj = re.search(
- r'<iframe[^>]+?src=(["\'])(?P<url>http://embed\.ted\.com/.+?)\1', webpage)
- if mobj is not None:
- return self.url_result(mobj.group('url'), 'TED')
-
if mobj is None:
# Try to find twitter cards info
mobj = re.search(r'<meta (?:property|name)="twitter:player:stream" (?:content|value)="(.+?)"', webpage)