diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 20 | 
1 files changed, 17 insertions, 3 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index b633e896c..8488dca05 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,12 +7,14 @@ from .common import InfoExtractor  from ..utils import (      compat_urllib_error,      compat_urllib_parse, +    compat_urllib_parse_urlparse,      compat_urllib_request,      ExtractorError,  )  from .brightcove import BrightcoveIE +  class GenericIE(InfoExtractor):      IE_DESC = u'Generic downloader that works on some sites'      _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):              u'file': u'13601338388002.mp4',              u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',              u'info_dict': { -                u"uploader": u"www.hodiho.fr",  +                u"uploader": u"www.hodiho.fr",                  u"title": u"R\u00e9gis plante sa Jeep"              }          }, @@ -107,8 +109,13 @@ class GenericIE(InfoExtractor):          return new_url      def _real_extract(self, url): -        new_url = self._test_redirect(url) -        if new_url: return [self.url_result(new_url)] +        try: +            new_url = self._test_redirect(url) +            if new_url: +                return [self.url_result(new_url)] +        except compat_urllib_error.HTTPError: +            # This may be a stupid server that doesn't like HEAD, our UA, or so +            pass          video_id = url.split('/')[-1]          try: @@ -145,6 +152,9 @@ class GenericIE(InfoExtractor):              if m_video_type is not None:                  mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)          if mobj is None: +            # HTML5 video +            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL) +        if mobj is None:              raise ExtractorError(u'Invalid URL: %s' % url)          # It's possible that one of the regexes @@ -153,6 +163,10 @@ class GenericIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          video_url = compat_urllib_parse.unquote(mobj.group(1)) +        if video_url.startswith('//'): +            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url +        if '://' not in video_url: +            video_url = url + ('' if url.endswith('/') else '/') + video_url          video_id = os.path.basename(video_url)          # here's a fun little line of code for you: | 
