diff options
Diffstat (limited to 'youtube_dl')
53 files changed, 447 insertions, 217 deletions
diff --git a/youtube_dl/compat.py b/youtube_dl/compat.py index c3783337a..db0da5828 100644 --- a/youtube_dl/compat.py +++ b/youtube_dl/compat.py @@ -75,42 +75,73 @@ except ImportError: import BaseHTTPServer as compat_http_server try: + from urllib.parse import unquote_to_bytes as compat_urllib_parse_unquote_to_bytes from urllib.parse import unquote as compat_urllib_parse_unquote -except ImportError: - def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): - if string == '': + from urllib.parse import unquote_plus as compat_urllib_parse_unquote_plus +except ImportError: # Python 2 + _asciire = re.compile('([\x00-\x7f]+)') if sys.version_info < (2, 7) else compat_urllib_parse._asciire + + # HACK: The following are the correct unquote_to_bytes, unquote and unquote_plus + # implementations from cpython 3.4.3's stdlib. Python 2's version + # is apparently broken (see https://github.com/rg3/youtube-dl/pull/6244) + + def compat_urllib_parse_unquote_to_bytes(string): + """unquote_to_bytes('abc%20def') -> b'abc def'.""" + # Note: strings are encoded as UTF-8. This is only an issue if it contains + # unescaped non-ASCII characters, which URIs should not. + if not string: + # Is it a string-like object? + string.split + return b'' + if isinstance(string, unicode): + string = string.encode('utf-8') + bits = string.split(b'%') + if len(bits) == 1: return string - res = string.split('%') - if len(res) == 1: + res = [bits[0]] + append = res.append + for item in bits[1:]: + try: + append(compat_urllib_parse._hextochr[item[:2]]) + append(item[2:]) + except KeyError: + append(b'%') + append(item) + return b''.join(res) + + def compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'): + """Replace %xx escapes by their single-character equivalent. The optional + encoding and errors parameters specify how to decode percent-encoded + sequences into Unicode characters, as accepted by the bytes.decode() + method. + By default, percent-encoded sequences are decoded with UTF-8, and invalid + sequences are replaced by a placeholder character. + + unquote('abc%20def') -> 'abc def'. + """ + if '%' not in string: + string.split return string if encoding is None: encoding = 'utf-8' if errors is None: errors = 'replace' - # pct_sequence: contiguous sequence of percent-encoded bytes, decoded - pct_sequence = b'' - string = res[0] - for item in res[1:]: - try: - if not item: - raise ValueError - pct_sequence += item[:2].decode('hex') - rest = item[2:] - if not rest: - # This segment was just a single percent-encoded character. - # May be part of a sequence of code units, so delay decoding. - # (Stored in pct_sequence). - continue - except ValueError: - rest = '%' + item - # Encountered non-percent-encoded characters. Flush the current - # pct_sequence. - string += pct_sequence.decode(encoding, errors) + rest - pct_sequence = b'' - if pct_sequence: - # Flush the final pct_sequence - string += pct_sequence.decode(encoding, errors) - return string + bits = _asciire.split(string) + res = [bits[0]] + append = res.append + for i in range(1, len(bits), 2): + append(compat_urllib_parse_unquote_to_bytes(bits[i]).decode(encoding, errors)) + append(bits[i + 1]) + return ''.join(res) + + def compat_urllib_parse_unquote_plus(string, encoding='utf-8', errors='replace'): + """Like unquote(), but also replace plus signs by spaces, as required for + unquoting HTML form values. + + unquote_plus('%7e/abc+def') -> '~/abc def' + """ + string = string.replace('+', ' ') + return compat_urllib_parse_unquote(string, encoding, errors) try: compat_str = unicode # Python 2 @@ -422,6 +453,8 @@ __all__ = [ 'compat_urllib_error', 'compat_urllib_parse', 'compat_urllib_parse_unquote', + 'compat_urllib_parse_unquote_plus', + 'compat_urllib_parse_unquote_to_bytes', 'compat_urllib_parse_urlparse', 'compat_urllib_request', 'compat_urlparse', diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index 3f4f23521..06f21064b 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -389,7 +389,8 @@ from .npo import ( NPOLiveIE, NPORadioIE, NPORadioFragmentIE, - TegenlichtVproIE, + VPROIE, + WNLIE ) from .nrk import ( NRKIE, diff --git a/youtube_dl/extractor/bet.py b/youtube_dl/extractor/bet.py index 26b934543..03dad4636 100644 --- a/youtube_dl/extractor/bet.py +++ b/youtube_dl/extractor/bet.py @@ -1,7 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_unquote from ..utils import ( xpath_text, xpath_with_ns, @@ -57,7 +57,7 @@ class BetIE(InfoExtractor): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) - media_url = compat_urllib_parse.unquote(self._search_regex( + media_url = compat_urllib_parse_unquote(self._search_regex( [r'mediaURL\s*:\s*"([^"]+)"', r"var\s+mrssMediaUrl\s*=\s*'([^']+)'"], webpage, 'media URL')) diff --git a/youtube_dl/extractor/bliptv.py b/youtube_dl/extractor/bliptv.py index fb56cd78d..a69ee482b 100644 --- a/youtube_dl/extractor/bliptv.py +++ b/youtube_dl/extractor/bliptv.py @@ -14,6 +14,8 @@ from ..utils import ( int_or_none, parse_iso8601, unescapeHTML, + xpath_text, + xpath_with_ns, ) @@ -23,10 +25,10 @@ class BlipTVIE(InfoExtractor): _TESTS = [ { 'url': 'http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352', - 'md5': 'c6934ad0b6acf2bd920720ec888eb812', + 'md5': '80baf1ec5c3d2019037c1c707d676b9f', 'info_dict': { 'id': '5779306', - 'ext': 'mov', + 'ext': 'm4v', 'title': 'CBR EXCLUSIVE: "Gotham City Imposters" Bats VS Jokerz Short 3', 'description': 'md5:9bc31f227219cde65e47eeec8d2dc596', 'timestamp': 1323138843, @@ -100,6 +102,20 @@ class BlipTVIE(InfoExtractor): 'vcodec': 'none', } }, + { + # missing duration + 'url': 'http://blip.tv/rss/flash/6700880', + 'info_dict': { + 'id': '6684191', + 'ext': 'm4v', + 'title': 'Cowboy Bebop: Gateway Shuffle Review', + 'description': 'md5:3acc480c0f9ae157f5fe88547ecaf3f8', + 'timestamp': 1386639757, + 'upload_date': '20131210', + 'uploader': 'sfdebris', + 'uploader_id': '706520', + } + } ] @staticmethod @@ -128,35 +144,34 @@ class BlipTVIE(InfoExtractor): rss = self._download_xml('http://blip.tv/rss/flash/%s' % video_id, video_id, 'Downloading video RSS') - def blip(s): - return '{http://blip.tv/dtd/blip/1.0}%s' % s - - def media(s): - return '{http://search.yahoo.com/mrss/}%s' % s - - def itunes(s): - return '{http://www.itunes.com/dtds/podcast-1.0.dtd}%s' % s + def _x(p): + return xpath_with_ns(p, { + 'blip': 'http://blip.tv/dtd/blip/1.0', + 'media': 'http://search.yahoo.com/mrss/', + 'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd', + }) item = rss.find('channel/item') - video_id = item.find(blip('item_id')).text - title = item.find('./title').text - description = clean_html(compat_str(item.find(blip('puredescription')).text)) - timestamp = parse_iso8601(item.find(blip('datestamp')).text) - uploader = item.find(blip('user')).text - uploader_id = item.find(blip('userid')).text - duration = int(item.find(blip('runtime')).text) - media_thumbnail = item.find(media('thumbnail')) - thumbnail = media_thumbnail.get('url') if media_thumbnail is not None else item.find(itunes('image')).text - categories = [category.text for category in item.findall('category')] + video_id = xpath_text(item, _x('blip:item_id'), 'video id') or lookup_id + title = xpath_text(item, 'title', 'title', fatal=True) + description = clean_html(xpath_text(item, _x('blip:puredescription'), 'description')) + timestamp = parse_iso8601(xpath_text(item, _x('blip:datestamp'), 'timestamp')) + uploader = xpath_text(item, _x('blip:user'), 'uploader') + uploader_id = xpath_text(item, _x('blip:userid'), 'uploader id') + duration = int_or_none(xpath_text(item, _x('blip:runtime'), 'duration')) + media_thumbnail = item.find(_x('media:thumbnail')) + thumbnail = (media_thumbnail.get('url') if media_thumbnail is not None + else xpath_text(item, 'image', 'thumbnail')) + categories = [category.text for category in item.findall('category') if category is not None] formats = [] subtitles_urls = {} - media_group = item.find(media('group')) - for media_content in media_group.findall(media('content')): + media_group = item.find(_x('media:group')) + for media_content in media_group.findall(_x('media:content')): url = media_content.get('url') - role = media_content.get(blip('role')) + role = media_content.get(_x('blip:role')) msg = self._download_webpage( url + '?showplayer=20140425131715&referrer=http://blip.tv&mask=7&skin=flashvars&view=url', video_id, 'Resolving URL for %s' % role) @@ -175,8 +190,8 @@ class BlipTVIE(InfoExtractor): 'url': real_url, 'format_id': role, 'format_note': media_type, - 'vcodec': media_content.get(blip('vcodec')) or 'none', - 'acodec': media_content.get(blip('acodec')), + 'vcodec': media_content.get(_x('blip:vcodec')) or 'none', + 'acodec': media_content.get(_x('blip:acodec')), 'filesize': media_content.get('filesize'), 'width': int_or_none(media_content.get('width')), 'height': int_or_none(media_content.get('height')), diff --git a/youtube_dl/extractor/ceskatelevize.py b/youtube_dl/extractor/ceskatelevize.py index 65f6be623..dda583680 100644 --- a/youtube_dl/extractor/ceskatelevize.py +++ b/youtube_dl/extractor/ceskatelevize.py @@ -7,6 +7,7 @@ from .common import InfoExtractor from ..compat import ( compat_urllib_request, compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -88,7 +89,7 @@ class CeskaTelevizeIE(InfoExtractor): if playlist_url == 'error_region': raise ExtractorError(NOT_AVAILABLE_STRING, expected=True) - req = compat_urllib_request.Request(compat_urllib_parse.unquote(playlist_url)) + req = compat_urllib_request.Request(compat_urllib_parse_unquote(playlist_url)) req.add_header('Referer', url) playlist = self._download_json(req, video_id) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 82f5de2d8..5a2d0d995 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -27,7 +27,9 @@ from ..utils import ( bug_reports_message, clean_html, compiled_regex_type, + determine_ext, ExtractorError, + fix_xml_ampersands, float_or_none, int_or_none, RegexNotFoundError, @@ -706,10 +708,23 @@ class InfoExtractor(object): 'twitter card player') @staticmethod - def _form_hidden_inputs(html): - return dict(re.findall( - r'<input\s+type="hidden"\s+name="([^"]+)"\s+(?:id="[^"]+"\s+)?value="([^"]*)"', - html)) + def _hidden_inputs(html): + return dict([ + (input.group('name'), input.group('value')) for input in re.finditer( + r'''(?x) + <input\s+ + type=(?P<q_hidden>["\'])hidden(?P=q_hidden)\s+ + name=(?P<q_name>["\'])(?P<name>.+?)(?P=q_name)\s+ + (?:id=(?P<q_id>["\']).+?(?P=q_id)\s+)? + value=(?P<q_value>["\'])(?P<value>.*?)(?P=q_value) + ''', html) + ]) + + def _form_hidden_inputs(self, form_id, html): + form = self._search_regex( + r'(?s)<form[^>]+?id=(["\'])%s\1[^>]*>(?P<form>.+?)</form>' % form_id, + html, '%s form' % form_id, group='form') + return self._hidden_inputs(form) def _sort_formats(self, formats, field_preference=None): if not formats: @@ -821,10 +836,14 @@ class InfoExtractor(object): self.to_screen(msg) time.sleep(timeout) - def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None): + def _extract_f4m_formats(self, manifest_url, video_id, preference=None, f4m_id=None, + transform_source=lambda s: fix_xml_ampersands(s).strip()): manifest = self._download_xml( manifest_url, video_id, 'Downloading f4m manifest', - 'Unable to download f4m manifest') + 'Unable to download f4m manifest', + # Some manifests may be malformed, e.g. prosiebensat1 generated manifests + # (see https://github.com/rg3/youtube-dl/issues/6215#issuecomment-121704244) + transform_source=transform_source) formats = [] manifest_version = '1.0' @@ -834,8 +853,19 @@ class InfoExtractor(object): media_nodes = manifest.findall('{http://ns.adobe.com/f4m/2.0}media') for i, media_el in enumerate(media_nodes): if manifest_version == '2.0': - manifest_url = ('/'.join(manifest_url.split('/')[:-1]) + '/' + - (media_el.attrib.get('href') or media_el.attrib.get('url'))) + media_url = media_el.attrib.get('href') or media_el.attrib.get('url') + if not media_url: + continue + manifest_url = ( + media_url if media_url.startswith('http://') or media_url.startswith('https://') + else ('/'.join(manifest_url.split('/')[:-1]) + '/' + media_url)) + # If media_url is itself a f4m manifest do the recursive extraction + # since bitrates in parent manifest (this one) and media_url manifest + # may differ leading to inability to resolve the format by requested + # bitrate in f4m downloader + if determine_ext(manifest_url) == 'f4m': + formats.extend(self._extract_f4m_formats(manifest_url, video_id, preference, f4m_id)) + continue tbr = int_or_none(media_el.attrib.get('bitrate')) formats.append({ 'format_id': '-'.join(filter(None, [f4m_id, compat_str(i if tbr is None else tbr)])), diff --git a/youtube_dl/extractor/crunchyroll.py b/youtube_dl/extractor/crunchyroll.py index 73f1e22ef..d1b6d7366 100644 --- a/youtube_dl/extractor/crunchyroll.py +++ b/youtube_dl/extractor/crunchyroll.py @@ -12,6 +12,7 @@ from math import pow, sqrt, floor from .common import InfoExtractor from ..compat import ( compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_request, ) from ..utils import ( @@ -254,7 +255,7 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text video_upload_date = unified_strdate(video_upload_date) video_uploader = self._html_search_regex(r'<div>\s*Publisher:(.+?)</div>', webpage, 'video_uploader', fatal=False, flags=re.DOTALL) - playerdata_url = compat_urllib_parse.unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) + playerdata_url = compat_urllib_parse_unquote(self._html_search_regex(r'"config_url":"([^"]+)', webpage, 'playerdata_url')) playerdata_req = compat_urllib_request.Request(playerdata_url) playerdata_req.data = compat_urllib_parse.urlencode({'current_page': webpage_url}) playerdata_req.add_header('Content-Type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/dailymotion.py b/youtube_dl/extractor/dailymotion.py index 8852f0add..1a41c0db1 100644 --- a/youtube_dl/extractor/dailymotion.py +++ b/youtube_dl/extractor/dailymotion.py @@ -53,6 +53,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'uploader': 'IGN', 'title': 'Steam Machine Models, Pricing Listed on Steam Store - IGN News', 'upload_date': '20150306', + 'duration': 74, } }, # Vevo video @@ -164,6 +165,7 @@ class DailymotionIE(DailymotionBaseInfoExtractor): 'thumbnail': info['thumbnail_url'], 'age_limit': age_limit, 'view_count': view_count, + 'duration': info['duration'] } def _get_subtitles(self, video_id, webpage): diff --git a/youtube_dl/extractor/dfb.py b/youtube_dl/extractor/dfb.py index 8049779b0..263532cc6 100644 --- a/youtube_dl/extractor/dfb.py +++ b/youtube_dl/extractor/dfb.py @@ -3,42 +3,47 @@ from __future__ import unicode_literals import re from .common import InfoExtractor +from ..utils import unified_strdate class DFBIE(InfoExtractor): IE_NAME = 'tv.dfb.de' - _VALID_URL = r'https?://tv\.dfb\.de/video/[^/]+/(?P<id>\d+)' + _VALID_URL = r'https?://tv\.dfb\.de/video/(?P<display_id>[^/]+)/(?P<id>\d+)' _TEST = { - 'url': 'http://tv.dfb.de/video/highlights-des-empfangs-in-berlin/9070/', + 'url': 'http://tv.dfb.de/video/u-19-em-stimmen-zum-spiel-gegen-russland/11633/', # The md5 is different each time 'info_dict': { - 'id': '9070', + 'id': '11633', + 'display_id': 'u-19-em-stimmen-zum-spiel-gegen-russland', 'ext': 'flv', - 'title': 'Highlights des Empfangs in Berlin', - 'upload_date': '20140716', + 'title': 'U 19-EM: Stimmen zum Spiel gegen Russland', + 'upload_date': '20150714', }, } def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) video_id = mobj.group('id') + display_id = mobj.group('display_id') - webpage = self._download_webpage(url, video_id) + webpage = self._download_webpage(url, display_id) player_info = self._download_xml( 'http://tv.dfb.de/server/hd_video.php?play=%s' % video_id, - video_id) + display_id) video_info = player_info.find('video') - f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id) + f4m_info = self._download_xml( + self._proto_relative_url(video_info.find('url').text.strip()), display_id) token_el = f4m_info.find('token') manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0' + formats = self._extract_f4m_formats(manifest_url, display_id) return { 'id': video_id, + 'display_id': display_id, 'title': video_info.find('title').text, - 'url': manifest_url, - 'ext': 'flv', 'thumbnail': self._og_search_thumbnail(webpage), - 'upload_date': ''.join(video_info.find('time_date').text.split('.')[::-1]), + 'upload_date': unified_strdate(video_info.find('time_date').text), + 'formats': formats, } diff --git a/youtube_dl/extractor/dramafever.py b/youtube_dl/extractor/dramafever.py index ca41a3abf..38e6597c8 100644 --- a/youtube_dl/extractor/dramafever.py +++ b/youtube_dl/extractor/dramafever.py @@ -23,8 +23,23 @@ class DramaFeverBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.dramafever.com/accounts/login/' _NETRC_MACHINE = 'dramafever' + _CONSUMER_SECRET = 'DA59dtVXYLxajktV' + + _consumer_secret = None + + def _get_consumer_secret(self): + mainjs = self._download_webpage( + 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js', + None, 'Downloading main.js', fatal=False) + if not mainjs: + return self._CONSUMER_SECRET + return self._search_regex( + r"var\s+cs\s*=\s*'([^']+)'", mainjs, + 'consumer secret', default=self._CONSUMER_SECRET) + def _real_initialize(self): self._login() + self._consumer_secret = self._get_consumer_secret() def _login(self): (username, password) = self._get_login_info() @@ -119,6 +134,23 @@ class DramaFeverIE(DramaFeverBaseIE): 'url': href, }] + series_id, episode_number = video_id.split('.') + episode_info = self._download_json( + # We only need a single episode info, so restricting page size to one episode + # and dealing with page number as with episode number + r'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_number=%s&page_size=1' + % (self._consumer_secret, series_id, episode_number), + video_id, 'Downloading episode info JSON', fatal=False) + if episode_info: + value = episode_info.get('value') + if value: + subfile = value[0].get('subfile') or value[0].get('new_subfile') + if subfile and subfile != 'http://www.dramafever.com/st/': + subtitles.setdefault('English', []).append({ + 'ext': 'srt', + 'url': subfile, + }) + return { 'id': video_id, 'title': title, @@ -152,27 +184,14 @@ class DramaFeverSeriesIE(DramaFeverBaseIE): 'playlist_count': 20, }] - _CONSUMER_SECRET = 'DA59dtVXYLxajktV' _PAGE_SIZE = 60 # max is 60 (see http://api.drama9.com/#get--api-4-episode-series-) - def _get_consumer_secret(self, video_id): - mainjs = self._download_webpage( - 'http://www.dramafever.com/static/51afe95/df2014/scripts/main.js', - video_id, 'Downloading main.js', fatal=False) - if not mainjs: - return self._CONSUMER_SECRET - return self._search_regex( - r"var\s+cs\s*=\s*'([^']+)'", mainjs, - 'consumer secret', default=self._CONSUMER_SECRET) - def _real_extract(self, url): series_id = self._match_id(url) - consumer_secret = self._get_consumer_secret(series_id) - series = self._download_json( 'http://www.dramafever.com/api/4/series/query/?cs=%s&series_id=%s' - % (consumer_secret, series_id), + % (self._consumer_secret, series_id), series_id, 'Downloading series JSON')['series'][series_id] title = clean_html(series['name']) @@ -182,7 +201,7 @@ class DramaFeverSeriesIE(DramaFeverBaseIE): for page_num in itertools.count(1): episodes = self._download_json( 'http://www.dramafever.com/api/4/episode/series/?cs=%s&series_id=%s&page_size=%d&page_number=%d' - % (consumer_secret, series_id, self._PAGE_SIZE, page_num), + % (self._consumer_secret, series_id, self._PAGE_SIZE, page_num), series_id, 'Downloading episodes JSON page #%d' % page_num) for episode in episodes.get('value', []): episode_url = episode.get('episode_url') diff --git a/youtube_dl/extractor/ehow.py b/youtube_dl/extractor/ehow.py index 9cb1bf301..b1cd4f5d4 100644 --- a/youtube_dl/extractor/ehow.py +++ b/youtube_dl/extractor/ehow.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals -from ..compat import ( - compat_urllib_parse, -) from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote class EHowIE(InfoExtractor): @@ -26,7 +24,7 @@ class EHowIE(InfoExtractor): webpage = self._download_webpage(url, video_id) video_url = self._search_regex( r'(?:file|source)=(http[^\'"&]*)', webpage, 'video URL') - final_url = compat_urllib_parse.unquote(video_url) + final_url = compat_urllib_parse_unquote(video_url) uploader = self._html_search_meta('uploader', webpage) title = self._og_search_title(webpage).replace(' | eHow', '') diff --git a/youtube_dl/extractor/facebook.py b/youtube_dl/extractor/facebook.py index 82dc27bc6..e17bb9aea 100644 --- a/youtube_dl/extractor/facebook.py +++ b/youtube_dl/extractor/facebook.py @@ -9,7 +9,7 @@ from ..compat import ( compat_http_client, compat_str, compat_urllib_error, - compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_request, ) from ..utils import ( @@ -136,7 +136,7 @@ class FacebookIE(InfoExtractor): else: raise ExtractorError('Cannot parse data') data = dict(json.loads(m.group(1))) - params_raw = compat_urllib_parse.unquote(data['params']) + params_raw = compat_urllib_parse_unquote(data['params']) params = json.loads(params_raw) video_data = params['video_data'][0] diff --git a/youtube_dl/extractor/gamespot.py b/youtube_dl/extractor/gamespot.py index 2d33fa7f5..b3f1bafcc 100644 --- a/youtube_dl/extractor/gamespot.py +++ b/youtube_dl/extractor/gamespot.py @@ -5,7 +5,7 @@ import json from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_unquote, compat_urlparse, ) from ..utils import ( @@ -75,7 +75,7 @@ class GameSpotIE(InfoExtractor): return { 'id': data_video['guid'], 'display_id': page_id, - 'title': compat_urllib_parse.unquote(data_video['title']), + 'title': compat_urllib_parse_unquote(data_video['title']), 'formats': formats, 'description': self._html_search_meta('description', webpage), 'thumbnail': self._og_search_thumbnail(webpage), diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 392ad3648..a62287e50 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -8,7 +8,6 @@ import re from .common import InfoExtractor from .youtube import YoutubeIE from ..compat import ( - compat_urllib_parse, compat_urllib_parse_unquote, compat_urllib_request, compat_urlparse, @@ -1115,7 +1114,7 @@ class GenericIE(InfoExtractor): # Sometimes embedded video player is hidden behind percent encoding # (e.g. https://github.com/rg3/youtube-dl/issues/2448) # Unescaping the whole page allows to handle those cases in a generic way - webpage = compat_urllib_parse.unquote(webpage) + webpage = compat_urllib_parse_unquote(webpage) # it's tempting to parse this further, but you would # have to take into account all the variations like @@ -1369,7 +1368,7 @@ class GenericIE(InfoExtractor): return self.url_result(mobj.group('url')) mobj = re.search(r'class=["\']embedly-embed["\'][^>]src=["\'][^"\']*url=(?P<url>[^&]+)', webpage) if mobj is not None: - return self.url_result(compat_urllib_parse.unquote(mobj.group('url'))) + return self.url_result(compat_urllib_parse_unquote(mobj.group('url'))) # Look for funnyordie embed matches = re.findall(r'<iframe[^>]+?src="(https?://(?:www\.)?funnyordie\.com/embed/[^"]+)"', webpage) @@ -1682,7 +1681,7 @@ class GenericIE(InfoExtractor): entries = [] for video_url in found: video_url = compat_urlparse.urljoin(url, video_url) - video_id = compat_urllib_parse.unquote(os.path.basename(video_url)) + video_id = compat_urllib_parse_unquote(os.path.basename(video_url)) # Sometimes, jwplayer extraction will result in a YouTube URL if YoutubeIE.suitable(video_url): diff --git a/youtube_dl/extractor/gorillavid.py b/youtube_dl/extractor/gorillavid.py index aabf07a20..f006f0cb1 100644 --- a/youtube_dl/extractor/gorillavid.py +++ b/youtube_dl/extractor/gorillavid.py @@ -78,7 +78,7 @@ class GorillaVidIE(InfoExtractor): if re.search(self._FILE_NOT_FOUND_REGEX, webpage) is not None: raise ExtractorError('Video %s does not exist' % video_id, expected=True) - fields = self._form_hidden_inputs(webpage) + fields = self._hidden_inputs(webpage) if fields['op'] == 'download1': countdown = int_or_none(self._search_regex( diff --git a/youtube_dl/extractor/hostingbulk.py b/youtube_dl/extractor/hostingbulk.py index 63f579592..a3154cfde 100644 --- a/youtube_dl/extractor/hostingbulk.py +++ b/youtube_dl/extractor/hostingbulk.py @@ -58,7 +58,7 @@ class HostingBulkIE(InfoExtractor): r'<img src="([^"]+)".+?class="pic"', webpage, 'thumbnail', fatal=False) - fields = self._form_hidden_inputs(webpage) + fields = self._hidden_inputs(webpage) request = compat_urllib_request.Request(url, urlencode_postdata(fields)) request.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/howstuffworks.py b/youtube_dl/extractor/howstuffworks.py index e97339121..663e6632a 100644 --- a/youtube_dl/extractor/howstuffworks.py +++ b/youtube_dl/extractor/howstuffworks.py @@ -10,7 +10,7 @@ from ..utils import ( class HowStuffWorksIE(InfoExtractor): - _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*\d+-(?P<id>.+?)-video\.htm' + _VALID_URL = r'https?://[\da-z-]+\.howstuffworks\.com/(?:[^/]+/)*(?:\d+-)?(?P<id>.+?)-video\.htm' _TESTS = [ { 'url': 'http://adventure.howstuffworks.com/5266-cool-jobs-iditarod-musher-video.htm', @@ -46,6 +46,10 @@ class HowStuffWorksIE(InfoExtractor): 'thumbnail': 're:^https?://.*\.jpg$', }, }, + { + 'url': 'http://shows.howstuffworks.com/stuff-to-blow-your-mind/optical-illusions-video.htm', + 'only_matching': True, + } ] def _real_extract(self, url): diff --git a/youtube_dl/extractor/infoq.py b/youtube_dl/extractor/infoq.py index 91a1b3ccb..71cfd12c5 100644 --- a/youtube_dl/extractor/infoq.py +++ b/youtube_dl/extractor/infoq.py @@ -4,7 +4,7 @@ import base64 from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_unquote, compat_urlparse, ) @@ -39,7 +39,7 @@ class InfoQIE(InfoExtractor): # Extract video URL encoded_id = self._search_regex( r"jsclassref\s*=\s*'([^']*)'", webpage, 'encoded id') - real_id = compat_urllib_parse.unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) + real_id = compat_urllib_parse_unquote(base64.b64decode(encoded_id.encode('ascii')).decode('utf-8')) playpath = 'mp4:' + real_id video_filename = playpath.split('/')[-1] diff --git a/youtube_dl/extractor/jeuxvideo.py b/youtube_dl/extractor/jeuxvideo.py index d0720ff56..1df084d87 100644 --- a/youtube_dl/extractor/jeuxvideo.py +++ b/youtube_dl/extractor/jeuxvideo.py @@ -8,9 +8,9 @@ from .common import InfoExtractor class JeuxVideoIE(InfoExtractor): - _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)-\d+\.htm' + _VALID_URL = r'http://.*?\.jeuxvideo\.com/.*/(.*?)\.htm' - _TEST = { + _TESTS = [{ 'url': 'http://www.jeuxvideo.com/reportages-videos-jeux/0004/00046170/tearaway-playstation-vita-gc-2013-tearaway-nous-presente-ses-papiers-d-identite-00115182.htm', 'md5': '046e491afb32a8aaac1f44dd4ddd54ee', 'info_dict': { @@ -19,7 +19,10 @@ class JeuxVideoIE(InfoExtractor): 'title': 'Tearaway : GC 2013 : Tearaway nous présente ses papiers d\'identité', 'description': 'Lorsque les développeurs de LittleBigPlanet proposent un nouveau titre, on ne peut que s\'attendre à un résultat original et fort attrayant.', }, - } + }, { + 'url': 'http://www.jeuxvideo.com/videos/chroniques/434220/l-histoire-du-jeu-video-la-saturn.htm', + 'only_matching': True, + }] def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) diff --git a/youtube_dl/extractor/karaoketv.py b/youtube_dl/extractor/karaoketv.py index e3b43ff8d..06daf5a89 100644 --- a/youtube_dl/extractor/karaoketv.py +++ b/youtube_dl/extractor/karaoketv.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_unquote_plus from ..utils import ( js_to_json, ) @@ -24,7 +24,7 @@ class KaraoketvIE(InfoExtractor): webpage = self._download_webpage(url, video_id) page_video_url = self._og_search_video_url(webpage, video_id) - config_json = compat_urllib_parse.unquote_plus(self._search_regex( + config_json = compat_urllib_parse_unquote_plus(self._search_regex( r'config=(.*)', page_video_url, 'configuration')) urls_info_json = self._download_json( diff --git a/youtube_dl/extractor/malemotion.py b/youtube_dl/extractor/malemotion.py index 0b85a59d1..92511a671 100644 --- a/youtube_dl/extractor/malemotion.py +++ b/youtube_dl/extractor/malemotion.py @@ -2,9 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_unquote class MalemotionIE(InfoExtractor): @@ -24,7 +22,7 @@ class MalemotionIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - video_url = compat_urllib_parse.unquote(self._search_regex( + video_url = compat_urllib_parse_unquote(self._search_regex( r'<source type="video/mp4" src="(.+?)"', webpage, 'video URL')) video_title = self._html_search_regex( r'<title>(.*?)</title', webpage, 'title') diff --git a/youtube_dl/extractor/metacafe.py b/youtube_dl/extractor/metacafe.py index 8bc333b02..6e2e73a51 100644 --- a/youtube_dl/extractor/metacafe.py +++ b/youtube_dl/extractor/metacafe.py @@ -6,6 +6,7 @@ from .common import InfoExtractor from ..compat import ( compat_parse_qs, compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_request, ) from ..utils import ( @@ -155,7 +156,7 @@ class MetacafeIE(InfoExtractor): video_url = None mobj = re.search(r'(?m)&mediaURL=([^&]+)', webpage) if mobj is not None: - mediaURL = compat_urllib_parse.unquote(mobj.group(1)) + mediaURL = compat_urllib_parse_unquote(mobj.group(1)) video_ext = mediaURL[-3:] # Extract gdaKey if available diff --git a/youtube_dl/extractor/mitele.py b/youtube_dl/extractor/mitele.py index 7091f3335..852d72266 100644 --- a/youtube_dl/extractor/mitele.py +++ b/youtube_dl/extractor/mitele.py @@ -5,6 +5,7 @@ import json from .common import InfoExtractor from ..compat import ( compat_urllib_parse, + compat_urllib_parse_unquote, compat_urlparse, ) from ..utils import ( @@ -48,7 +49,7 @@ class MiTeleIE(InfoExtractor): domain = 'http://' + domain info_url = compat_urlparse.urljoin( domain, - compat_urllib_parse.unquote(embed_data['flashvars']['host']) + compat_urllib_parse_unquote(embed_data['flashvars']['host']) ) info_el = self._download_xml(info_url, episode).find('./video/info') diff --git a/youtube_dl/extractor/mixcloud.py b/youtube_dl/extractor/mixcloud.py index 425a4ccf1..d47aeceda 100644 --- a/youtube_dl/extractor/mixcloud.py +++ b/youtube_dl/extractor/mixcloud.py @@ -3,9 +3,7 @@ from __future__ import unicode_literals import re from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_unquote from ..utils import ( ExtractorError, HEADRequest, @@ -60,7 +58,7 @@ class MixcloudIE(InfoExtractor): mobj = re.match(self._VALID_URL, url) uploader = mobj.group(1) cloudcast_name = mobj.group(2) - track_id = compat_urllib_parse.unquote('-'.join((uploader, cloudcast_name))) + track_id = compat_urllib_parse_unquote('-'.join((uploader, cloudcast_name))) webpage = self._download_webpage(url, track_id) diff --git a/youtube_dl/extractor/mofosex.py b/youtube_dl/extractor/mofosex.py index 2cec12d35..9bf99a54a 100644 --- a/youtube_dl/extractor/mofosex.py +++ b/youtube_dl/extractor/mofosex.py @@ -5,9 +5,9 @@ import re from .common import InfoExtractor from ..compat import ( + compat_urllib_parse_unquote, compat_urllib_parse_urlparse, compat_urllib_request, - compat_urllib_parse, ) @@ -34,7 +34,7 @@ class MofosexIE(InfoExtractor): webpage = self._download_webpage(req, video_id) video_title = self._html_search_regex(r'<h1>(.+?)<', webpage, 'title') - video_url = compat_urllib_parse.unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url')) + video_url = compat_urllib_parse_unquote(self._html_search_regex(r'flashvars.video_url = \'([^\']+)', webpage, 'video_url')) path = compat_urllib_parse_urlparse(video_url).path extension = os.path.splitext(path)[1][1:] format = path.split('/')[5].split('_')[:2] diff --git a/youtube_dl/extractor/myspass.py b/youtube_dl/extractor/myspass.py index 5b9b9fbcd..4557a2b13 100644 --- a/youtube_dl/extractor/myspass.py +++ b/youtube_dl/extractor/myspass.py @@ -35,7 +35,8 @@ class MySpassIE(InfoExtractor): # get metadata metadata_url = META_DATA_URL_TEMPLATE % video_id - metadata = self._download_xml(metadata_url, video_id) + metadata = self._download_xml( + metadata_url, video_id, transform_source=lambda s: s.strip()) # extract values from metadata url_flv_el = metadata.find('url_flv') diff --git a/youtube_dl/extractor/myvideo.py b/youtube_dl/extractor/myvideo.py index 5e754fcff..c96f472a3 100644 --- a/youtube_dl/extractor/myvideo.py +++ b/youtube_dl/extractor/myvideo.py @@ -10,6 +10,7 @@ from .common import InfoExtractor from ..compat import ( compat_ord, compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_request, ) from ..utils import ( @@ -107,7 +108,7 @@ class MyVideoIE(InfoExtractor): if not a == '_encxml': params[a] = b else: - encxml = compat_urllib_parse.unquote(b) + encxml = compat_urllib_parse_unquote(b) if not params.get('domain'): params['domain'] = 'www.myvideo.de' xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params)) @@ -135,7 +136,7 @@ class MyVideoIE(InfoExtractor): video_url = None mobj = re.search('connectionurl=\'(.*?)\'', dec_data) if mobj: - video_url = compat_urllib_parse.unquote(mobj.group(1)) + video_url = compat_urllib_parse_unquote(mobj.group(1)) if 'myvideo2flash' in video_url: self.report_warning( 'Rewriting URL to use unencrypted rtmp:// ...', @@ -147,10 +148,10 @@ class MyVideoIE(InfoExtractor): mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data) if mobj is None: raise ExtractorError('unable to extract url') - video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2)) + video_url = compat_urllib_parse_unquote(mobj.group(1)) + compat_urllib_parse_unquote(mobj.group(2)) video_file = self._search_regex('source=\'(.*?)\'', dec_data, 'video file') - video_file = compat_urllib_parse.unquote(video_file) + video_file = compat_urllib_parse_unquote(video_file) if not video_file.endswith('f4m'): ppath, prefix = video_file.split('.') @@ -159,7 +160,7 @@ class MyVideoIE(InfoExtractor): video_playpath = '' video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, 'swfobj') - video_swfobj = compat_urllib_parse.unquote(video_swfobj) + video_swfobj = compat_urllib_parse_unquote(video_swfobj) video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>", webpage, 'title') diff --git a/youtube_dl/extractor/nowtv.py b/youtube_dl/extractor/nowtv.py index 173e46cd8..0b5ff4760 100644 --- a/youtube_dl/extractor/nowtv.py +++ b/youtube_dl/extractor/nowtv.py @@ -133,7 +133,7 @@ class NowTVIE(InfoExtractor): station = mobj.group('station') info = self._download_json( - 'https://api.nowtv.de/v3/movies/%s?fields=*,format,files' % display_id, + 'https://api.nowtv.de/v3/movies/%s?fields=id,title,free,geoblocked,articleLong,articleShort,broadcastStartDate,seoUrl,duration,format,files' % display_id, display_id) video_id = compat_str(info['id']) diff --git a/youtube_dl/extractor/npo.py b/youtube_dl/extractor/npo.py index 62d12b7a6..0c2d02c10 100644 --- a/youtube_dl/extractor/npo.py +++ b/youtube_dl/extractor/npo.py @@ -1,5 +1,7 @@ from __future__ import unicode_literals +import re + from .common import InfoExtractor from ..utils import ( fix_xml_ampersands, @@ -7,7 +9,6 @@ from ..utils import ( qualities, strip_jsonp, unified_strdate, - url_basename, ) @@ -37,8 +38,21 @@ class NPOBaseIE(InfoExtractor): class NPOIE(NPOBaseIE): - IE_NAME = 'npo.nl' - _VALID_URL = r'https?://(?:www\.)?npo\.nl/(?!live|radio)[^/]+/[^/]+/(?P<id>[^/?]+)' + IE_NAME = 'npo' + IE_DESC = 'npo.nl and ntr.nl' + _VALID_URL = r'''(?x) + (?: + npo:| + https?:// + (?:www\.)? + (?: + npo\.nl/(?!live|radio)(?:[^/]+/){2}| + ntr\.nl/(?:[^/]+/){2,}| + omroepwnl\.nl/video/fragment/[^/]+__ + ) + ) + (?P<id>[^/?#]+) + ''' _TESTS = [ { @@ -58,7 +72,7 @@ class NPOIE(NPOBaseIE): 'info_dict': { 'id': 'VARA_101191800', 'ext': 'm4v', - 'title': 'De Mega Mike & Mega Thomas show', + 'title': 'De Mega Mike & Mega Thomas show: The best of.', 'description': 'md5:3b74c97fc9d6901d5a665aac0e5400f4', 'upload_date': '20090227', 'duration': 2400, @@ -70,8 +84,8 @@ class NPOIE(NPOBaseIE): 'info_dict': { 'id': 'VPWON_1169289', 'ext': 'm4v', - 'title': 'Tegenlicht', - 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'title': 'Tegenlicht: De toekomst komt uit Afrika', + 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', 'duration': 3000, }, @@ -100,6 +114,30 @@ class NPOIE(NPOBaseIE): 'title': 'Hoe gaat Europa verder na Parijs?', }, }, + { + 'url': 'http://www.ntr.nl/Aap-Poot-Pies/27/detail/Aap-poot-pies/VPWON_1233944#content', + 'md5': '01c6a2841675995da1f0cf776f03a9c3', + 'info_dict': { + 'id': 'VPWON_1233944', + 'ext': 'm4v', + 'title': 'Aap, poot, pies', + 'description': 'md5:c9c8005d1869ae65b858e82c01a91fde', + 'upload_date': '20150508', + 'duration': 599, + }, + }, + { + 'url': 'http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698', + 'md5': 'd30cd8417b8b9bca1fdff27428860d08', + 'info_dict': { + 'id': 'POW_00996502', + 'ext': 'm4v', + 'title': '''"Dit is wel een 'landslide'..."''', + 'description': 'md5:f8d66d537dfb641380226e31ca57b8e8', + 'upload_date': '20150508', + 'duration': 462, + }, + } ] def _real_extract(self, url): @@ -114,6 +152,18 @@ class NPOIE(NPOBaseIE): transform_source=strip_jsonp, ) + # For some videos actual video id (prid) is different (e.g. for + # http://www.omroepwnl.nl/video/fragment/vandaag-de-dag-verkiezingen__POMS_WNL_853698 + # video id is POMS_WNL_853698 but prid is POW_00996502) + video_id = metadata.get('prid') or video_id + + # titel is too generic in some cases so utilize aflevering_titel as well + # when available (e.g. http://tegenlicht.vpro.nl/afleveringen/2014-2015/access-to-africa.html) + title = metadata['titel'] + sub_title = metadata.get('aflevering_titel') + if sub_title and sub_title != title: + title += ': %s' % sub_title + token = self._get_token(video_id) formats = [] @@ -186,8 +236,8 @@ class NPOIE(NPOBaseIE): return { 'id': video_id, - 'title': metadata['titel'], - 'description': metadata['info'], + 'title': title, + 'description': metadata.get('info'), 'thumbnail': metadata.get('images', [{'url': None}])[-1]['url'], 'upload_date': unified_strdate(metadata.get('gidsdatum')), 'duration': parse_duration(metadata.get('tijdsduur')), @@ -356,9 +406,8 @@ class NPORadioFragmentIE(InfoExtractor): } -class TegenlichtVproIE(NPOIE): - IE_NAME = 'tegenlicht.vpro.nl' - _VALID_URL = r'https?://tegenlicht\.vpro\.nl/afleveringen/.*?' +class VPROIE(NPOIE): + _VALID_URL = r'https?://(?:www\.)?(?:tegenlicht\.)?vpro\.nl/(?:[^/]+/){2,}(?P<id>[^/]+)\.html' _TESTS = [ { @@ -367,17 +416,72 @@ class TegenlichtVproIE(NPOIE): 'info_dict': { 'id': 'VPWON_1169289', 'ext': 'm4v', - 'title': 'Tegenlicht', - 'description': 'md5:d6476bceb17a8c103c76c3b708f05dd1', + 'title': 'De toekomst komt uit Afrika', + 'description': 'md5:52cf4eefbc96fffcbdc06d024147abea', 'upload_date': '20130225', }, }, + { + 'url': 'http://www.vpro.nl/programmas/2doc/2015/sergio-herman.html', + 'info_dict': { + 'id': 'sergio-herman', + 'title': 'Sergio Herman: Fucking perfect', + }, + 'playlist_count': 2, + }, + { + # playlist with youtube embed + 'url': 'http://www.vpro.nl/programmas/2doc/2015/education-education.html', + 'info_dict': { + 'id': 'education-education', + 'title': '2Doc', + }, + 'playlist_count': 2, + } ] def _real_extract(self, url): - name = url_basename(url) - webpage = self._download_webpage(url, name) - urn = self._html_search_meta('mediaurn', webpage) - info_page = self._download_json( - 'http://rs.vpro.nl/v2/api/media/%s.json' % urn, name) - return self._get_info(info_page['mid']) + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id if not video_id.startswith('http') else video_id) + for video_id in re.findall(r'data-media-id="([^"]+)"', webpage) + ] + + playlist_title = self._search_regex( + r'<title>\s*([^>]+?)\s*-\s*Teledoc\s*-\s*VPRO\s*</title>', + webpage, 'playlist title', default=None) or self._og_search_title(webpage) + + return self.playlist_result(entries, playlist_id, playlist_title) + + +class WNLIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?omroepwnl\.nl/video/detail/(?P<id>[^/]+)__\d+' + + _TEST = { + 'url': 'http://www.omroepwnl.nl/video/detail/vandaag-de-dag-6-mei__060515', + 'info_dict': { + 'id': 'vandaag-de-dag-6-mei', + 'title': 'Vandaag de Dag 6 mei', + }, + 'playlist_count': 4, + } + + def _real_extract(self, url): + playlist_id = self._match_id(url) + + webpage = self._download_webpage(url, playlist_id) + + entries = [ + self.url_result('npo:%s' % video_id, 'NPO') + for video_id, part in re.findall( + r'<a[^>]+href="([^"]+)"[^>]+class="js-mid"[^>]*>(Deel \d+)', webpage) + ] + + playlist_title = self._html_search_regex( + r'(?s)<h1[^>]+class="subject"[^>]*>(.+?)</h1>', + webpage, 'playlist title') + + return self.playlist_result(entries, playlist_id, playlist_title) diff --git a/youtube_dl/extractor/odnoklassniki.py b/youtube_dl/extractor/odnoklassniki.py index 6c7149fe3..215ffe87b 100644 --- a/youtube_dl/extractor/odnoklassniki.py +++ b/youtube_dl/extractor/odnoklassniki.py @@ -2,7 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_unquote from ..utils import ( unified_strdate, int_or_none, @@ -62,7 +62,7 @@ class OdnoklassnikiIE(InfoExtractor): metadata = self._parse_json(metadata, video_id) else: metadata = self._download_json( - compat_urllib_parse.unquote(flashvars['metadataUrl']), + compat_urllib_parse_unquote(flashvars['metadataUrl']), video_id, 'Downloading metadata JSON') movie = metadata['movie'] diff --git a/youtube_dl/extractor/openfilm.py b/youtube_dl/extractor/openfilm.py index 2249657eb..d2ceedd01 100644 --- a/youtube_dl/extractor/openfilm.py +++ b/youtube_dl/extractor/openfilm.py @@ -3,9 +3,9 @@ from __future__ import unicode_literals import json from .common import InfoExtractor +from ..compat import compat_urllib_parse_unquote_plus from ..utils import ( parse_iso8601, - compat_urllib_parse, parse_age_limit, int_or_none, ) @@ -37,7 +37,7 @@ class OpenFilmIE(InfoExtractor): webpage = self._download_webpage(url, display_id) - player = compat_urllib_parse.unquote_plus( + player = compat_urllib_parse_unquote_plus( self._og_search_video_url(webpage)) video = json.loads(self._search_regex( diff --git a/youtube_dl/extractor/photobucket.py b/youtube_dl/extractor/photobucket.py index c66db3cdc..788411ccc 100644 --- a/youtube_dl/extractor/photobucket.py +++ b/youtube_dl/extractor/photobucket.py @@ -4,7 +4,7 @@ import json import re from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_unquote class PhotobucketIE(InfoExtractor): @@ -34,7 +34,7 @@ class PhotobucketIE(InfoExtractor): info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', webpage, 'info json') info = json.loads(info_json) - url = compat_urllib_parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) + url = compat_urllib_parse_unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) return { 'id': video_id, 'url': url, diff --git a/youtube_dl/extractor/played.py b/youtube_dl/extractor/played.py index 9fe1524f2..8a1c296dd 100644 --- a/youtube_dl/extractor/played.py +++ b/youtube_dl/extractor/played.py @@ -38,7 +38,7 @@ class PlayedIE(InfoExtractor): if m_error: raise ExtractorError(m_error.group('msg'), expected=True) - data = self._form_hidden_inputs(orig_webpage) + data = self._hidden_inputs(orig_webpage) self._sleep(2, video_id) diff --git a/youtube_dl/extractor/playvid.py b/youtube_dl/extractor/playvid.py index c3e667e9e..2eb4fd96d 100644 --- a/youtube_dl/extractor/playvid.py +++ b/youtube_dl/extractor/playvid.py @@ -4,7 +4,8 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, ) from ..utils import ( clean_html, @@ -44,7 +45,7 @@ class PlayvidIE(InfoExtractor): flashvars = self._html_search_regex( r'flashvars="(.+?)"', webpage, 'flashvars') - infos = compat_urllib_parse.unquote(flashvars).split(r'&') + infos = compat_urllib_parse_unquote(flashvars).split(r'&') for info in infos: videovars_match = re.match(r'^video_vars\[(.+?)\]=(.+?)$', info) if videovars_match: @@ -52,7 +53,7 @@ class PlayvidIE(InfoExtractor): val = videovars_match.group(2) if key == 'title': - video_title = compat_urllib_parse.unquote_plus(val) + video_title = compat_urllib_parse_unquote_plus(val) if key == 'duration': try: duration = int(val) diff --git a/youtube_dl/extractor/pornhub.py b/youtube_dl/extractor/pornhub.py index 8172bc997..0b7886840 100644 --- a/youtube_dl/extractor/pornhub.py +++ b/youtube_dl/extractor/pornhub.py @@ -5,7 +5,8 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, compat_urllib_parse_urlparse, compat_urllib_request, ) @@ -69,7 +70,7 @@ class PornHubIE(InfoExtractor): webpage, 'uploader', fatal=False) thumbnail = self._html_search_regex(r'"image_url":"([^"]+)', webpage, 'thumbnail', fatal=False) if thumbnail: - thumbnail = compat_urllib_parse.unquote(thumbnail) + thumbnail = compat_urllib_parse_unquote(thumbnail) view_count = self._extract_count( r'<span class="count">([\d,\.]+)</span> views', webpage, 'view') @@ -80,9 +81,9 @@ class PornHubIE(InfoExtractor): comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') - video_urls = list(map(compat_urllib_parse.unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) + video_urls = list(map(compat_urllib_parse_unquote, re.findall(r'"quality_[0-9]{3}p":"([^"]+)', webpage))) if webpage.find('"encrypted":true') != -1: - password = compat_urllib_parse.unquote_plus( + password = compat_urllib_parse_unquote_plus( self._search_regex(r'"video_title":"([^"]+)', webpage, 'password')) video_urls = list(map(lambda s: aes_decrypt_text(s, password, 32).decode('utf-8'), video_urls)) diff --git a/youtube_dl/extractor/primesharetv.py b/youtube_dl/extractor/primesharetv.py index 9aa0c862a..304359dc5 100644 --- a/youtube_dl/extractor/primesharetv.py +++ b/youtube_dl/extractor/primesharetv.py @@ -29,7 +29,7 @@ class PrimeShareTVIE(InfoExtractor): if '>File not exist<' in webpage: raise ExtractorError('Video %s does not exist' % video_id, expected=True) - fields = self._form_hidden_inputs(webpage) + fields = self._hidden_inputs(webpage) headers = { 'Referer': url, diff --git a/youtube_dl/extractor/promptfile.py b/youtube_dl/extractor/promptfile.py index 81a63c7fc..8190ed676 100644 --- a/youtube_dl/extractor/promptfile.py +++ b/youtube_dl/extractor/promptfile.py @@ -35,7 +35,7 @@ class PromptFileIE(InfoExtractor): raise ExtractorError('Video %s does not exist' % video_id, expected=True) - fields = self._form_hidden_inputs(webpage) + fields = self._hidden_inputs(webpage) post = compat_urllib_parse.urlencode(fields) req = compat_urllib_request.Request(url, post) req.add_header('Content-type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/prosiebensat1.py b/youtube_dl/extractor/prosiebensat1.py index 536a42dc8..fec008ce7 100644 --- a/youtube_dl/extractor/prosiebensat1.py +++ b/youtube_dl/extractor/prosiebensat1.py @@ -9,8 +9,9 @@ from ..compat import ( compat_urllib_parse, ) from ..utils import ( - unified_strdate, + determine_ext, int_or_none, + unified_strdate, ) @@ -21,6 +22,11 @@ class ProSiebenSat1IE(InfoExtractor): _TESTS = [ { + # Tests changes introduced in https://github.com/rg3/youtube-dl/pull/6242 + # in response to fixing https://github.com/rg3/youtube-dl/issues/6215: + # - malformed f4m manifest support + # - proper handling of URLs starting with `https?://` in 2.0 manifests + # - recursive child f4m manifests extraction 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', 'info_dict': { 'id': '2104602', @@ -208,7 +214,7 @@ class ProSiebenSat1IE(InfoExtractor): clip_id = self._html_search_regex(self._CLIPID_REGEXES, webpage, 'clip id') access_token = 'prosieben' - client_name = 'kolibri-1.12.6' + client_name = 'kolibri-2.0.19-splec4' client_location = url videos_api_url = 'http://vas.sim-technik.de/vas/live/v2/videos?%s' % compat_urllib_parse.urlencode({ @@ -275,8 +281,9 @@ class ProSiebenSat1IE(InfoExtractor): for source in urls_sources: protocol = source['protocol'] + source_url = source['url'] if protocol == 'rtmp' or protocol == 'rtmpe': - mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source['url']) + mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) if not mobj: continue path = mobj.group('path') @@ -293,9 +300,11 @@ class ProSiebenSat1IE(InfoExtractor): 'ext': 'mp4', 'format_id': '%s_%s' % (source['cdn'], source['bitrate']), }) + elif 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': + formats.extend(self._extract_f4m_formats(source_url, clip_id)) else: formats.append({ - 'url': source['url'], + 'url': source_url, 'vbr': fix_bitrate(source['bitrate']), }) diff --git a/youtube_dl/extractor/shared.py b/youtube_dl/extractor/shared.py index 6e2b94e7d..a07677686 100644 --- a/youtube_dl/extractor/shared.py +++ b/youtube_dl/extractor/shared.py @@ -34,7 +34,7 @@ class SharedIE(InfoExtractor): raise ExtractorError( 'Video %s does not exist' % video_id, expected=True) - download_form = self._form_hidden_inputs(webpage) + download_form = self._hidden_inputs(webpage) request = compat_urllib_request.Request( url, compat_urllib_parse.urlencode(download_form)) request.add_header('Content-Type', 'application/x-www-form-urlencoded') diff --git a/youtube_dl/extractor/spankwire.py b/youtube_dl/extractor/spankwire.py index bff75d6b2..5fa6faf18 100644 --- a/youtube_dl/extractor/spankwire.py +++ b/youtube_dl/extractor/spankwire.py @@ -4,7 +4,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_parse_urlparse, compat_urllib_request, ) @@ -68,7 +68,7 @@ class SpankwireIE(InfoExtractor): webpage, 'comment count', fatal=False)) video_urls = list(map( - compat_urllib_parse.unquote, + compat_urllib_parse_unquote, re.findall(r'playerData\.cdnPath[0-9]{3,}\s*=\s*(?:encodeURIComponent\()?["\']([^"\']+)["\']', webpage))) if webpage.find('flashvars\.encrypted = "true"') != -1: password = self._search_regex( diff --git a/youtube_dl/extractor/twitch.py b/youtube_dl/extractor/twitch.py index af2b798fb..92b6dc1b8 100644 --- a/youtube_dl/extractor/twitch.py +++ b/youtube_dl/extractor/twitch.py @@ -59,7 +59,7 @@ class TwitchBaseIE(InfoExtractor): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') - login_form = self._form_hidden_inputs(login_page) + login_form = self._hidden_inputs(login_page) login_form.update({ 'login': username.encode('utf-8'), diff --git a/youtube_dl/extractor/udemy.py b/youtube_dl/extractor/udemy.py index 4667ed83b..e2bab52fe 100644 --- a/youtube_dl/extractor/udemy.py +++ b/youtube_dl/extractor/udemy.py @@ -15,7 +15,8 @@ from ..utils import ( class UdemyIE(InfoExtractor): IE_NAME = 'udemy' _VALID_URL = r'https?://www\.udemy\.com/(?:[^#]+#/lecture/|lecture/view/?\?lectureId=)(?P<id>\d+)' - _LOGIN_URL = 'https://www.udemy.com/join/login-submit/' + _LOGIN_URL = 'https://www.udemy.com/join/login-popup/?displayType=ajax&showSkipButton=1' + _ORIGIN_URL = 'https://www.udemy.com' _NETRC_MACHINE = 'udemy' _TESTS = [{ @@ -74,29 +75,33 @@ class UdemyIE(InfoExtractor): expected=True) login_popup = self._download_webpage( - 'https://www.udemy.com/join/login-popup?displayType=ajax&showSkipButton=1', None, - 'Downloading login popup') + self._LOGIN_URL, None, 'Downloading login popup') if login_popup == '<div class="run-command close-popup redirect" data-url="https://www.udemy.com/"></div>': return - csrf = self._html_search_regex( - r'<input type="hidden" name="csrf" value="(.+?)"', - login_popup, 'csrf token') + login_form = self._form_hidden_inputs('login-form', login_popup) + + login_form.update({ + 'email': username.encode('utf-8'), + 'password': password.encode('utf-8'), + }) - login_form = { - 'email': username, - 'password': password, - 'csrf': csrf, - 'displayType': 'json', - 'isSubmitted': '1', - } request = compat_urllib_request.Request( self._LOGIN_URL, compat_urllib_parse.urlencode(login_form).encode('utf-8')) - response = self._download_json( + request.add_header('Referer', self._ORIGIN_URL) + request.add_header('Origin', self._ORIGIN_URL) + + response = self._download_webpage( request, None, 'Logging in as %s' % username) - if 'returnUrl' not in response: + if all(logout_pattern not in response + for logout_pattern in ['href="https://www.udemy.com/user/logout/', '>Logout<']): + error = self._html_search_regex( + r'(?s)<div[^>]+class="form-errors[^"]*">(.+?)</div>', + response, 'error message', default=None) + if error: + raise ExtractorError('Unable to login: %s' % error, expected=True) raise ExtractorError('Unable to log in') def _real_extract(self, url): diff --git a/youtube_dl/extractor/veehd.py b/youtube_dl/extractor/veehd.py index 346edf485..0d8d832cc 100644 --- a/youtube_dl/extractor/veehd.py +++ b/youtube_dl/extractor/veehd.py @@ -5,6 +5,7 @@ import json from .common import InfoExtractor from ..compat import ( + compat_urllib_parse_unquote, compat_urlparse, ) from ..utils import ( @@ -76,7 +77,7 @@ class VeeHDIE(InfoExtractor): if config_json: config = json.loads(config_json) - video_url = compat_urlparse.unquote(config['clip']['url']) + video_url = compat_urllib_parse_unquote(config['clip']['url']) if not video_url: video_url = self._html_search_regex( diff --git a/youtube_dl/extractor/vimeo.py b/youtube_dl/extractor/vimeo.py index d63c03183..10d6745af 100644 --- a/youtube_dl/extractor/vimeo.py +++ b/youtube_dl/extractor/vimeo.py @@ -452,7 +452,7 @@ class VimeoChannelIE(InfoExtractor): password = self._downloader.params.get('videopassword', None) if password is None: raise ExtractorError('This album is protected by a password, use the --video-password option', expected=True) - fields = self._form_hidden_inputs(login_form) + fields = self._hidden_inputs(login_form) token = self._search_regex(r'xsrft[\s=:"\']+([^"\']+)', webpage, 'login token') fields['token'] = token fields['password'] = password diff --git a/youtube_dl/extractor/vk.py b/youtube_dl/extractor/vk.py index 8ac3aeac0..8f677cae3 100644 --- a/youtube_dl/extractor/vk.py +++ b/youtube_dl/extractor/vk.py @@ -168,7 +168,7 @@ class VKIE(InfoExtractor): login_page = self._download_webpage( 'https://vk.com', None, 'Downloading login page') - login_form = self._form_hidden_inputs(login_page) + login_form = self._hidden_inputs(login_page) login_form.update({ 'email': username.encode('cp1251'), diff --git a/youtube_dl/extractor/vodlocker.py b/youtube_dl/extractor/vodlocker.py index 4804692bf..ccf1928b5 100644 --- a/youtube_dl/extractor/vodlocker.py +++ b/youtube_dl/extractor/vodlocker.py @@ -26,7 +26,7 @@ class VodlockerIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - fields = self._form_hidden_inputs(webpage) + fields = self._hidden_inputs(webpage) if fields['op'] == 'download1': self._sleep(3, video_id) # they do detect when requests happen too fast! diff --git a/youtube_dl/extractor/xbef.py b/youtube_dl/extractor/xbef.py index 80c48c37d..4ff99e5ca 100644 --- a/youtube_dl/extractor/xbef.py +++ b/youtube_dl/extractor/xbef.py @@ -1,9 +1,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_unquote class XBefIE(InfoExtractor): @@ -30,7 +28,7 @@ class XBefIE(InfoExtractor): config_url_enc = self._download_webpage( 'http://xbef.com/Main/GetVideoURLEncoded/%s' % video_id, video_id, note='Retrieving config URL') - config_url = compat_urllib_parse.unquote(config_url_enc) + config_url = compat_urllib_parse_unquote(config_url_enc) config = self._download_xml( config_url, video_id, note='Retrieving config') diff --git a/youtube_dl/extractor/xnxx.py b/youtube_dl/extractor/xnxx.py index 79ed6c744..5a41f8ffa 100644 --- a/youtube_dl/extractor/xnxx.py +++ b/youtube_dl/extractor/xnxx.py @@ -2,9 +2,7 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..compat import ( - compat_urllib_parse, -) +from ..compat import compat_urllib_parse_unquote class XNXXIE(InfoExtractor): @@ -26,7 +24,7 @@ class XNXXIE(InfoExtractor): video_url = self._search_regex(r'flv_url=(.*?)&', webpage, 'video URL') - video_url = compat_urllib_parse.unquote(video_url) + video_url = compat_urllib_parse_unquote(video_url) video_title = self._html_search_regex(r'<title>(.*?)\s+-\s+XNXX.COM', webpage, 'title') diff --git a/youtube_dl/extractor/xtube.py b/youtube_dl/extractor/xtube.py index 1644f53c8..779e4f46a 100644 --- a/youtube_dl/extractor/xtube.py +++ b/youtube_dl/extractor/xtube.py @@ -5,7 +5,7 @@ import re from .common import InfoExtractor from ..compat import ( compat_urllib_request, - compat_urllib_parse, + compat_urllib_parse_unquote, ) from ..utils import ( parse_duration, @@ -59,7 +59,7 @@ class XTubeIE(InfoExtractor): for format_id, video_url in re.findall( r'flashvars\.quality_(.+?)\s*=\s*"([^"]+)"', webpage): fmt = { - 'url': compat_urllib_parse.unquote(video_url), + 'url': compat_urllib_parse_unquote(video_url), 'format_id': format_id, } m = re.search(r'^(?P<height>\d+)[pP]', format_id) @@ -68,7 +68,7 @@ class XTubeIE(InfoExtractor): formats.append(fmt) if not formats: - video_url = compat_urllib_parse.unquote(self._search_regex( + video_url = compat_urllib_parse_unquote(self._search_regex( r'flashvars\.video_url\s*=\s*"([^"]+)"', webpage, 'video URL')) formats.append({'url': video_url}) diff --git a/youtube_dl/extractor/xvideos.py b/youtube_dl/extractor/xvideos.py index d8415bed4..5dcf2fdd1 100644 --- a/youtube_dl/extractor/xvideos.py +++ b/youtube_dl/extractor/xvideos.py @@ -4,7 +4,7 @@ import re from .common import InfoExtractor from ..compat import ( - compat_urllib_parse, + compat_urllib_parse_unquote, compat_urllib_request, ) from ..utils import ( @@ -37,7 +37,7 @@ class XVideosIE(InfoExtractor): if mobj: raise ExtractorError('%s said: %s' % (self.IE_NAME, clean_html(mobj.group(1))), expected=True) - video_url = compat_urllib_parse.unquote( + video_url = compat_urllib_parse_unquote( self._search_regex(r'flv_url=(.+?)&', webpage, 'video URL')) video_title = self._html_search_regex( r'<title>(.*?)\s+-\s+XVID', webpage, 'title') diff --git a/youtube_dl/extractor/ynet.py b/youtube_dl/extractor/ynet.py index 894678a23..869f3e819 100644 --- a/youtube_dl/extractor/ynet.py +++ b/youtube_dl/extractor/ynet.py @@ -5,7 +5,7 @@ import re import json from .common import InfoExtractor -from ..compat import compat_urllib_parse +from ..compat import compat_urllib_parse_unquote_plus class YnetIE(InfoExtractor): @@ -34,7 +34,7 @@ class YnetIE(InfoExtractor): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) - content = compat_urllib_parse.unquote_plus(self._og_search_video_url(webpage)) + content = compat_urllib_parse_unquote_plus(self._og_search_video_url(webpage)) config = json.loads(self._search_regex(r'config=({.+?})$', content, 'video config')) f4m_url = config['clip']['url'] title = self._og_search_title(webpage) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 3c629d38a..e7f5c7861 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -17,6 +17,8 @@ from ..compat import ( compat_chr, compat_parse_qs, compat_urllib_parse, + compat_urllib_parse_unquote, + compat_urllib_parse_unquote_plus, compat_urllib_request, compat_urlparse, compat_str, @@ -865,7 +867,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # Extract original video URL from URL with redirection, like age verification, using next_url parameter mobj = re.search(self._NEXT_URL_RE, url) if mobj: - url = proto + '://www.youtube.com/' + compat_urllib_parse.unquote(mobj.group(1)).lstrip('/') + url = proto + '://www.youtube.com/' + compat_urllib_parse_unquote(mobj.group(1)).lstrip('/') video_id = self.extract_id(url) # Get video webpage @@ -973,7 +975,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): # uploader if 'author' not in video_info: raise ExtractorError('Unable to extract uploader name') - video_uploader = compat_urllib_parse.unquote_plus(video_info['author'][0]) + video_uploader = compat_urllib_parse_unquote_plus(video_info['author'][0]) # uploader_id video_uploader_id = None @@ -1000,7 +1002,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._downloader.report_warning('unable to extract video thumbnail') video_thumbnail = None else: # don't panic if we can't find it - video_thumbnail = compat_urllib_parse.unquote_plus(video_info['thumbnail_url'][0]) + video_thumbnail = compat_urllib_parse_unquote_plus(video_info['thumbnail_url'][0]) # upload date upload_date = self._html_search_meta( @@ -1062,7 +1064,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): self._downloader.report_warning('unable to extract video duration') video_duration = None else: - video_duration = int(compat_urllib_parse.unquote_plus(video_info['length_seconds'][0])) + video_duration = int(compat_urllib_parse_unquote_plus(video_info['length_seconds'][0])) # annotations video_annotations = None @@ -1609,7 +1611,7 @@ class YoutubeSearchURLIE(InfoExtractor): def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) - query = compat_urllib_parse.unquote_plus(mobj.group('query')) + query = compat_urllib_parse_unquote_plus(mobj.group('query')) webpage = self._download_webpage(url, query) result_code = self._search_regex( diff --git a/youtube_dl/postprocessor/ffmpeg.py b/youtube_dl/postprocessor/ffmpeg.py index 1ecce22e7..1f723908b 100644 --- a/youtube_dl/postprocessor/ffmpeg.py +++ b/youtube_dl/postprocessor/ffmpeg.py @@ -265,7 +265,7 @@ class FFmpegExtractAudioPP(FFmpegPostProcessor): # If we download foo.mp3 and convert it to... foo.mp3, then don't delete foo.mp3, silly. if (new_path == path or (self._nopostoverwrites and os.path.exists(encodeFilename(new_path)))): - self._downloader.to_screen('[youtube] Post-process file %s exists, skipping' % new_path) + self._downloader.to_screen('[ffmpeg] Post-process file %s exists, skipping' % new_path) return [], information try: |