diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 40 | 
1 files changed, 31 insertions, 9 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index fd32370c2..bdb4f58d6 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -11,12 +11,14 @@ from ..utils import (      compat_urlparse,      ExtractorError, +    HEADRequest,      smuggle_url,      unescapeHTML,      unified_strdate,      url_basename,  )  from .brightcove import BrightcoveIE +from .ooyala import OoyalaIE  class GenericIE(InfoExtractor): @@ -83,7 +85,17 @@ class GenericIE(InfoExtractor):                  u'title': u'trailer',                  u'upload_date': u'20100513',              } -        } +        }, +        # ooyala video +        { +            u'url': u'http://www.rollingstone.com/music/videos/norwegian-dj-cashmere-cat-goes-spartan-on-with-me-premiere-20131219', +            u'md5': u'5644c6ca5d5782c1d0d350dad9bd840c', +            u'info_dict': { +                u'id': u'BwY2RxaTrTkslxOfcan0UCf0YqyvWysJ', +                u'ext': u'mp4', +                u'title': u'2cc213299525360.mov', #that's what we get +            }, +        },      ]      def report_download_webpage(self, video_id): @@ -98,21 +110,18 @@ class GenericIE(InfoExtractor):      def _send_head(self, url):          """Check if it is a redirect, like url shorteners, in case return the new url.""" -        class HeadRequest(compat_urllib_request.Request): -            def get_method(self): -                return "HEAD"          class HEADRedirectHandler(compat_urllib_request.HTTPRedirectHandler):              """              Subclass the HTTPRedirectHandler to make it use our -            HeadRequest also on the redirected URL +            HEADRequest also on the redirected URL              """              def redirect_request(self, req, fp, code, msg, headers, newurl):                  if code in (301, 302, 303, 307):                      newurl = newurl.replace(' ', '%20')                      newheaders = dict((k,v) for k,v in req.headers.items()                                        if k.lower() not in ("content-length", "content-type")) -                    return HeadRequest(newurl, +                    return HEADRequest(newurl,                                         headers=newheaders,                                         origin_req_host=req.get_origin_req_host(),                                         unverifiable=True) @@ -141,7 +150,7 @@ class GenericIE(InfoExtractor):                          compat_urllib_request.HTTPErrorProcessor, compat_urllib_request.HTTPSHandler]:              opener.add_handler(handler()) -        response = opener.open(HeadRequest(url)) +        response = opener.open(HEADRequest(url))          if response is None:              raise ExtractorError(u'Invalid URL protocol')          return response @@ -222,8 +231,11 @@ class GenericIE(InfoExtractor):              return self.url_result(surl, 'Vimeo')          # Look for embedded YouTube player -        matches = re.findall( -            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/embed/.+?)\1', webpage) +        matches = re.findall(r'''(?x) +            (?:<iframe[^>]+?src=|embedSWF\(\s*) +            (["\'])(?P<url>(?:https?:)?//(?:www\.)?youtube\.com/ +                (?:embed|v)/.+?) +            \1''', webpage)          if matches:              urlrs = [self.url_result(unescapeHTML(tuppl[1]), 'Youtube')                       for tuppl in matches] @@ -277,6 +289,16 @@ class GenericIE(InfoExtractor):          if mobj is not None:              return self.url_result(mobj.group('url')) +        # Look for Ooyala videos +        mobj = re.search(r'player.ooyala.com/[^"?]+\?[^"]*?(?:embedCode|ec)=([^"&]+)', webpage) +        if mobj is not None: +            return OoyalaIE._build_url_result(mobj.group(1)) + +        # Look for Aparat videos +        mobj = re.search(r'<iframe src="(http://www.aparat.com/video/[^"]+)"', webpage) +        if mobj is not None: +            return self.url_result(mobj.group(1), 'Aparat') +          # Start with something easy: JW Player in SWFObject          mobj = re.search(r'flashvars: [\'"](?:.*&)?file=(http[^\'"&]*)', webpage)          if mobj is None: | 
