From 448ef1f31c8bcc1550cf907fd46e31026ec981b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Oct 2015 09:11:02 +0600 Subject: [extractor/common] Allow angle brackets in attributes in _og_regexes (#7215) --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index a0c4af92f..4365077f1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -645,7 +645,7 @@ class InfoExtractor(object): # Helper functions for extracting OpenGraph info @staticmethod def _og_regexes(prop): - content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))' + content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))' property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)' % {'prop': re.escape(prop)}) template = r'<meta[^>]+?%s[^>]+?%s' -- cgit v1.2.3 From 02835c6bf4403a907c058d43220a83b3b427e181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 18 Oct 2015 09:34:54 +0600 Subject: [extractor/common] Document repost_count --- youtube_dl/extractor/common.py | 1 + 1 file changed, 1 insertion(+) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 4365077f1..6169fbbeb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -172,6 +172,7 @@ class InfoExtractor(object): view_count: How many users have watched the video on the platform. like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video + repost_count: Number of reposts of the video average_rating: Average rating give by users, the scale used depends on the webpage comment_count: Number of comments on the video comments: A list of comments, each with one or more of the following -- cgit v1.2.3 From 943a1e24b896a64c869bbd302f32fe5bd2afec96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 24 Oct 2015 16:25:04 +0600 Subject: [extractor/common] Use more generic URLError in _is_valid_url --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 6169fbbeb..720033ddf 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -842,7 +842,7 @@ class InfoExtractor(object): self._request_webpage(url, video_id, 'Checking %s URL' % item) return True except ExtractorError as e: - if isinstance(e.cause, compat_HTTPError): + if isinstance(e.cause, compat_urllib_error.URLError): self.to_screen( '%s: %s URL is invalid, skipping' % (video_id, item)) return False -- cgit v1.2.3 From 865d1fbafc671815904b2ba3da76544d66c593c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sat, 24 Oct 2015 12:39:23 +0200 Subject: [extractor/common] Remove unused import --- youtube_dl/extractor/common.py | 1 - 1 file changed, 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 720033ddf..04b699972 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -16,7 +16,6 @@ from ..compat import ( compat_cookiejar, compat_cookies, compat_getpass, - compat_HTTPError, compat_http_client, compat_urllib_error, compat_urllib_parse, -- cgit v1.2.3 From 3711304510d3be6a5f9b2b18084aad8687e78001 Mon Sep 17 00:00:00 2001 From: remitamine <remitamine@gmail.com> Date: Tue, 8 Sep 2015 19:35:41 +0100 Subject: [extractor/common] get the redirected m3u8_url in _extract_m3u8_formats --- youtube_dl/extractor/common.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 04b699972..10c0d5d1f 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -943,13 +943,14 @@ class InfoExtractor(object): if re.match(r'^https?://', u) else compat_urlparse.urljoin(m3u8_url, u)) - m3u8_doc = self._download_webpage( + m3u8_doc, urlh = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', errnote=errnote or 'Failed to download m3u8 information', fatal=fatal) if m3u8_doc is False: return m3u8_doc + m3u8_url = urlh.geturl() last_info = None last_media = None kv_rex = re.compile( -- cgit v1.2.3 From 36e6f62cd0883f0f486d1666d010e5d9e6d515bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Sun, 25 Oct 2015 20:04:55 +0100 Subject: Use a wrapper around xml.etree.ElementTree.fromstring in python 2.x (#7178) Attributes aren't unicode objects, so they couldn't be directly used in info_dict fields (for example '--write-description' doesn't work with bytes). --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 10c0d5d1f..52523d7b2 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -10,7 +10,6 @@ import re import socket import sys import time -import xml.etree.ElementTree from ..compat import ( compat_cookiejar, @@ -23,6 +22,7 @@ from ..compat import ( compat_urllib_request, compat_urlparse, compat_str, + compat_etree_fromstring, ) from ..utils import ( NO_DEFAULT, @@ -461,7 +461,7 @@ class InfoExtractor(object): return xml_string if transform_source: xml_string = transform_source(xml_string) - return xml.etree.ElementTree.fromstring(xml_string.encode('utf-8')) + return compat_etree_fromstring(xml_string.encode('utf-8')) def _download_json(self, url_or_request, video_id, note='Downloading JSON metadata', -- cgit v1.2.3 From dc519b5421366a8cac681455a817ae25f7f4aa83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 31 Oct 2015 23:12:57 +0600 Subject: [extractor/common] Make ie_key and IE_NAME return unicode string --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 10c0d5d1f..59c3fa8dc 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -310,11 +310,11 @@ class InfoExtractor(object): @classmethod def ie_key(cls): """A string for getting the InfoExtractor with get_info_extractor""" - return cls.__name__[:-2] + return compat_str(cls.__name__[:-2]) @property def IE_NAME(self): - return type(self).__name__[:-2] + return compat_str(type(self).__name__[:-2]) def _request_webpage(self, url_or_request, video_id, note=None, errnote=None, fatal=True): """ Returns the response handle """ -- cgit v1.2.3 From dbd82a1d4fff1655920e111cc25a7fd526d7bf9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 1 Nov 2015 00:01:34 +0600 Subject: [extractor/common] Fix m3u8 extraction on failure --- youtube_dl/extractor/common.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 59c3fa8dc..1f09fbb47 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -943,13 +943,14 @@ class InfoExtractor(object): if re.match(r'^https?://', u) else compat_urlparse.urljoin(m3u8_url, u)) - m3u8_doc, urlh = self._download_webpage_handle( + res = self._download_webpage_handle( m3u8_url, video_id, note=note or 'Downloading m3u8 information', errnote=errnote or 'Failed to download m3u8 information', fatal=fatal) - if m3u8_doc is False: - return m3u8_doc + if res is False: + return res + m3u8_doc, urlh = res m3u8_url = urlh.geturl() last_info = None last_media = None -- cgit v1.2.3 From 019839faaa28d6233097a669d063a3a42f9fbbc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 21 Nov 2015 18:01:39 +0600 Subject: [extractor/common] Use baseURL from f4m manifest for recursive manifest extraction --- youtube_dl/extractor/common.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 5e263f8b5..71bdcad5a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -891,6 +891,11 @@ class InfoExtractor(object): if not media_nodes: manifest_version = '2.0' media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') + base_url = xpath_text( + manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'], + 'base URL', default=None) + if base_url: + base_url = base_url.strip() for i, media_el in enumerate(media_nodes): if manifest_version == '2.0': media_url = media_el.attrib.get('href') or media_el.attrib.get('url') @@ -898,7 +903,7 @@ class InfoExtractor(object): continue manifest_url = ( media_url if media_url.startswith('http://') or media_url.startswith('https://') - else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url)) + else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url)) # If media_url is itself a f4m manifest do the recursive extraction # since bitrates in parent manifest (this one) and media_url manifest # may differ leading to inability to resolve the format by requested -- cgit v1.2.3 From 5c2266df4b9aeb7881ed8c026a038e2a25e43734 Mon Sep 17 00:00:00 2001 From: Sergey M? <dstftw@gmail.com> Date: Sat, 21 Nov 2015 22:18:17 +0600 Subject: Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8 --- youtube_dl/extractor/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 71bdcad5a..eb9bfa3d1 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -19,7 +19,6 @@ from ..compat import ( compat_urllib_error, compat_urllib_parse, compat_urllib_parse_urlparse, - compat_urllib_request, compat_urlparse, compat_str, compat_etree_fromstring, @@ -37,6 +36,7 @@ from ..utils import ( int_or_none, RegexNotFoundError, sanitize_filename, + sanitized_Request, unescapeHTML, unified_strdate, url_basename, @@ -1285,7 +1285,7 @@ class InfoExtractor(object): def _get_cookies(self, url): """ Return a compat_cookies.SimpleCookie with the cookies for the url """ - req = compat_urllib_request.Request(url) + req = sanitized_Request(url) self._downloader.cookiejar.add_cookie_header(req) return compat_cookies.SimpleCookie(req.get_header('Cookie')) -- cgit v1.2.3 From 62d231c00493c329a78a30b15a336f131fc392f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Thu, 3 Dec 2015 20:55:02 +0600 Subject: [extractor/common] Clarify duration can be float --- youtube_dl/extractor/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/common.py') diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index eb9bfa3d1..6ab2d68d6 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -167,7 +167,7 @@ class InfoExtractor(object): "ext" will be calculated from URL if missing automatic_captions: Like 'subtitles', used by the YoutubeIE for automatically generated captions - duration: Length of the video in seconds, as an integer. + duration: Length of the video in seconds, as an integer or float. view_count: How many users have watched the video on the platform. like_count: Number of positive ratings of the video dislike_count: Number of negative ratings of the video -- cgit v1.2.3