diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
| -rw-r--r-- | youtube_dl/extractor/generic.py | 18 | 
1 files changed, 13 insertions, 5 deletions
| diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da016f7ee..f92e61fea 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -8,11 +8,13 @@ from ..utils import (      compat_urllib_error,      compat_urllib_parse,      compat_urllib_request, +    compat_urlparse,      ExtractorError,  )  from .brightcove import BrightcoveIE +  class GenericIE(InfoExtractor):      IE_DESC = u'Generic downloader that works on some sites'      _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):              u'file': u'13601338388002.mp4',              u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',              u'info_dict': { -                u"uploader": u"www.hodiho.fr",  +                u"uploader": u"www.hodiho.fr",                  u"title": u"R\u00e9gis plante sa Jeep"              }          }, @@ -107,6 +109,11 @@ class GenericIE(InfoExtractor):          return new_url      def _real_extract(self, url): +        parsed_url = compat_urlparse.urlparse(url) +        if not parsed_url.scheme: +            self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') +            return self.url_result('http://' + url) +          try:              new_url = self._test_redirect(url)              if new_url: @@ -124,7 +131,7 @@ class GenericIE(InfoExtractor):              raise ExtractorError(u'Invalid URL: %s' % url)          self.report_extraction(video_id) -        # Look for BrigthCove: +        # Look for BrightCove:          m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)          if m_brightcove is not None:              self.to_screen(u'Brightcove video detected.') @@ -151,7 +158,7 @@ class GenericIE(InfoExtractor):                  mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage)          if mobj is None:              # HTML5 video -            mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL) +            mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL)          if mobj is None:              raise ExtractorError(u'Invalid URL: %s' % url) @@ -160,8 +167,9 @@ class GenericIE(InfoExtractor):          if mobj.group(1) is None:              raise ExtractorError(u'Invalid URL: %s' % url) -        video_url = compat_urllib_parse.unquote(mobj.group(1)) -        video_id = os.path.basename(video_url) +        video_url = mobj.group(1) +        video_url = compat_urlparse.urljoin(url, video_url) +        video_id = compat_urllib_parse.unquote(os.path.basename(video_url))          # here's a fun little line of code for you:          video_extension = os.path.splitext(video_id)[1][1:] | 
