diff options
Diffstat (limited to 'youtube_dl/extractor/generic.py')
-rw-r--r-- | youtube_dl/extractor/generic.py | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da016f7ee..f92e61fea 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -8,11 +8,13 @@ from ..utils import ( compat_urllib_error, compat_urllib_parse, compat_urllib_request, + compat_urlparse, ExtractorError, ) from .brightcove import BrightcoveIE + class GenericIE(InfoExtractor): IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor): u'file': u'13601338388002.mp4', u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', u'info_dict': { - u"uploader": u"www.hodiho.fr", + u"uploader": u"www.hodiho.fr", u"title": u"R\u00e9gis plante sa Jeep" } }, @@ -107,6 +109,11 @@ class GenericIE(InfoExtractor): return new_url def _real_extract(self, url): + parsed_url = compat_urlparse.urlparse(url) + if not parsed_url.scheme: + self._downloader.report_warning('The url doesn\'t specify the protocol, trying with http') + return self.url_result('http://' + url) + try: new_url = self._test_redirect(url) if new_url: @@ -124,7 +131,7 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) self.report_extraction(video_id) - # Look for BrigthCove: + # Look for BrightCove: m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL) if m_brightcove is not None: self.to_screen(u'Brightcove video detected.') @@ -151,7 +158,7 @@ class GenericIE(InfoExtractor): mobj = re.search(r'<meta.*?property="og:video".*?content="(.*?)"', webpage) if mobj is None: # HTML5 video - mobj = re.search(r'<video[^<]*>.*?<source .*?src="([^"]+)"', webpage, flags=re.DOTALL) + mobj = re.search(r'<video[^<]*(?:>.*?<source.*?)? src="([^"]+)"', webpage, flags=re.DOTALL) if mobj is None: raise ExtractorError(u'Invalid URL: %s' % url) @@ -160,8 +167,9 @@ class GenericIE(InfoExtractor): if mobj.group(1) is None: raise ExtractorError(u'Invalid URL: %s' % url) - video_url = compat_urllib_parse.unquote(mobj.group(1)) - video_id = os.path.basename(video_url) + video_url = mobj.group(1) + video_url = compat_urlparse.urljoin(url, video_url) + video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) # here's a fun little line of code for you: video_extension = os.path.splitext(video_id)[1][1:] |