aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/generic.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r--youtube_dl/extractor/generic.py40
1 files changed, 31 insertions, 9 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py
index fd32370c2..bdb4f58d6 100644
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -11,12 +11,14 @@ from ..utils import (
compat_urlparse,
ExtractorError,
+ HEADRequest,
smuggle_url,
unescapeHTML,
unified_strdate,
url_basename,
)
from .brightcove import BrightcoveIE
+from .ooyala import OoyalaIE
class GenericIE(InfoExtractor):
@@ -83,7 +85,17 @@ class GenericIE(InfoExtractor):
u'title': u'trailer',
u'upload_date': u'20100513',
}
- }
+ },
+ # ooyala video
+ {
+ u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219',
+ u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c',
+ u'info_dict': {
+ u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ',
+ u'ext': u'mp4',
+ u'title': u'2cc213299525360.mov', #that's what we get
+ },
+ },
]
def report_download_webpage(self, video_id):
@@ -98,21 +110,18 @@ class GenericIE(InfoExtractor):
def _send_head(self, url):
"""Check if it is a redirect, like url shorteners, in case return the new url."""
- class HeadRequest(compat_urllib_request.Request):
- def get_method(self):
- return "HEAD"
class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
"""
Subclass the HTTPRedirectHandler to make it use our
- HeadRequest also on the redirected URL
+ HEADRequest also on the redirected URL
"""
def redirect_request(self, req, fp, code, msg, headers, newurl):
if code in (301, 302, 303, 307):
newurl = newurl.replace(' ', '%20')
newheaders = dict((k,v) for k,v in req.headers.items()
if k.lower() not in ("content-length", "content-type"))
- return HeadRequest(newurl,
+ return HEADRequest(newurl,
headers=newheaders,
origin_req_host=req.get_origin_req_host(),
unverifiable=True)
@@ -141,7 +150,7 @@ class GenericIE(InfoExtractor):
compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:
opener.add_handler(handler())
- response = opener.open(HeadRequest(url))
+ response = opener.open(HEADRequest(url))
if response is None:
raise ExtractorError(u'Invalid URL protocol')
return response
@@ -222,8 +231,11 @@ class GenericIE(InfoExtractor):
return self.url_result(surl, 'Vimeo')
# Look for embedded YouTube player
- matches = re.findall(
- r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage)
+ matches = re.findall(r'''(?x)
+ (?:<iframe[^>]+?src=|embedSWF\(\s*)
+ (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/
+ (?:embed|v)/.+?)
+ \1''', webpage)
if matches:
urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')
for tuppl in matches]
@@ -277,6 +289,16 @@ class GenericIE(InfoExtractor):
if mobj is not None:
return self.url_result(mobj.group('url'))
+ # Look for Ooyala videos
+ mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage)
+ if mobj is not None:
+ return OoyalaIE._build_url_result(mobj.group(1))
+
+ # Look for Aparat videos
+ mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage)
+ if mobj is not None:
+ return self.url_result(mobj.group(1), 'Aparat')
+
# Start with something easy: JW Player in SWFObject
mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)
if mobj is None: