diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 22 | 
1 files changed, 18 insertions, 4 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b633e896c..d034a11bb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,12 +7,14 @@ from .common import InfoExtractor  from ..utils import (      compat_urllib_error,      compat_urllib_parse, +    compat_urllib_parse_urlparse,      compat_urllib_request,      ExtractorError,  )  from .brightcove import BrightcoveIE +  class GenericIE(InfoExtractor):      IE_DESC = u'Generic downloader that works on some sites'      _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):              u'file': u'13601338388002.mp4',              u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',              u'info_dict': { -                u"uploader": u"www.hodiho.fr",  +                u"uploader": u"www.hodiho.fr",                  u"title": u"R\u00e9gis plante sa Jeep"              }          }, @@ -107,8 +109,13 @@ class GenericIE(InfoExtractor):          return new_url      def _real_extract(self, url): -        new_url = self._test_redirect(url) -        if new_url: return [self.url_result(new_url)] +        try: +            new_url = self._test_redirect(url) +            if new_url: +                return [self.url_result(new_url)] +        except compat_urllib_error.HTTPError: +            # This may be a stupid server that doesn't like HEAD, our UA, or so +            pass          video_id = url.split('/')[-1]          try: @@ -119,7 +126,7 @@ class GenericIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          self.report_extraction(video_id) -        # Look for BrigthCove: +        # Look for BrightCove:          m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)          if m_brightcove is not None:              self.to_screen(u'Brightcove video detected.') @@ -145,6 +152,9 @@ class GenericIE(InfoExtractor):              if m_video_type is not None:                  mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)          if mobj is None: +            # HTML5 video +            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL) +        if mobj is None:              raise ExtractorError(u'Invalid URL: %s' % url)          # It's possible that one of the regexes @@ -153,6 +163,10 @@ class GenericIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          video_url = compat_urllib_parse.unquote(mobj.group(1)) +        if video_url.startswith('//'): +            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url +        if '://' not in video_url: +            video_url = url + ('' if url.endswith('/') else '/') + video_url          video_id = os.path.basename(video_url)          # here's a fun little line of code for you: | 
