From 0838239e8e454c55903f3d69560cf53e25ce69f4 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 22:49:52 +0200 Subject: [generic] Support double slash URLs (Fixes #1309) --- youtube_dl/extractor/generic.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/generic.py') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index da016f7ee..ccbbdd255 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,12 +7,14 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_error, compat_urllib_parse, + compat_urllib_parse_urlparse, compat_urllib_request, ExtractorError, ) from .brightcove import BrightcoveIE + class GenericIE(InfoExtractor): IE_DESC = u'Generic downloader that works on some sites' _VALID_URL = r'.*' @@ -23,7 +25,7 @@ class GenericIE(InfoExtractor): u'file': u'13601338388002.mp4', u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89', u'info_dict': { - u"uploader": u"www.hodiho.fr", + u"uploader": u"www.hodiho.fr", u"title": u"R\u00e9gis plante sa Jeep" } }, @@ -161,6 +163,8 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_url = compat_urllib_parse.unquote(mobj.group(1)) + if video_url.startswith('//'): + video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url video_id = os.path.basename(video_url) # here's a fun little line of code for you: -- cgit v1.2.3 From 9585f890f8c0eff70eb874c7962dc30baea1049c Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Sat, 24 Aug 2013 22:56:37 +0200 Subject: [generic] add support for relative URLs (Fixes #1308) --- youtube_dl/extractor/generic.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'youtube_dl/extractor/generic.py') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index ccbbdd255..8488dca05 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -165,6 +165,8 @@ class GenericIE(InfoExtractor): video_url = compat_urllib_parse.unquote(mobj.group(1)) if video_url.startswith('//'): video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url + if '://' not in video_url: + video_url = url + ('' if url.endswith('/') else '/') + video_url video_id = os.path.basename(video_url) # here's a fun little line of code for you: -- cgit v1.2.3 From 627a91a9a827b48270d3f5e288404388946f0733 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Mon, 26 Aug 2013 21:29:31 +0200 Subject: [generic] small typo --- youtube_dl/extractor/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/generic.py') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 8488dca05..d034a11bb 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -126,7 +126,7 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) self.report_extraction(video_id) - # Look for BrigthCove: + # Look for BrightCove: m_brightcove = re.search(r'', webpage, re.DOTALL) if m_brightcove is not None: self.to_screen(u'Brightcove video detected.') -- cgit v1.2.3 From ae3531adf926998d42d1fb52453491c85e33b5f0 Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Aug 2013 12:04:44 +0200 Subject: [generic] Fix URL concatenation When the url is something like http://example.org/foo/bar?x=y and the added is file/video.mp4 , we want http://example.org/foo/file/video.mp4 Fixes #1268. --- youtube_dl/extractor/generic.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/generic.py') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index d034a11bb..bfc9bff49 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -166,7 +166,12 @@ class GenericIE(InfoExtractor): if video_url.startswith('//'): video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url if '://' not in video_url: - video_url = url + ('' if url.endswith('/') else '/') + video_url + up = compat_urllib_parse_urlparse(url) + if video_url.startswith('/'): + video_url = up.scheme + '://' + up.netloc + video_url + else: # relative path + video_url = (up.scheme + '://' + up.netloc + + up.path.rpartition('/')[0] + '/' + video_url) video_id = os.path.basename(video_url) # here's a fun little line of code for you: -- cgit v1.2.3 From a5caba1eb02665cdc982d6be4a933aafd79243de Mon Sep 17 00:00:00 2001 From: Philipp Hagemeister Date: Wed, 28 Aug 2013 12:47:27 +0200 Subject: [generic] simply use urljoin --- youtube_dl/extractor/generic.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'youtube_dl/extractor/generic.py') diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index bfc9bff49..dc4dea4ad 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -7,8 +7,8 @@ from .common import InfoExtractor from ..utils import ( compat_urllib_error, compat_urllib_parse, - compat_urllib_parse_urlparse, compat_urllib_request, + compat_urlparse, ExtractorError, ) @@ -163,15 +163,7 @@ class GenericIE(InfoExtractor): raise ExtractorError(u'Invalid URL: %s' % url) video_url = compat_urllib_parse.unquote(mobj.group(1)) - if video_url.startswith('//'): - video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url - if '://' not in video_url: - up = compat_urllib_parse_urlparse(url) - if video_url.startswith('/'): - video_url = up.scheme + '://' + up.netloc + video_url - else: # relative path - video_url = (up.scheme + '://' + up.netloc + - up.path.rpartition('/')[0] + '/' + video_url) + video_url = compat_urlparse.urljoin(url, video_url) video_id = os.path.basename(video_url) # here's a fun little line of code for you: -- cgit v1.2.3 From 08e291b54d8aaa34300c02e70ff86aaa36820a62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= Date: Thu, 5 Sep 2013 18:02:17 +0200 Subject: [generic] Recognize html5 video in the format '