From 648e6a1ffe45ceae2995c3f9ec6a9413aad55640 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 18 Oct 2015 00:11:34 +0600 Subject: [youtube] Generalize playlist entries extraction (Closes #6699, closes #6992) --- youtube_dl/extractor/youtube.py | 121 +++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 69 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b252e36e1..08e821362 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -178,6 +178,52 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return +class YoutubePlaylistBaseInfoExtractor(InfoExtractor): + # Extract the video ids from the playlist pages + def _entries(self, page, playlist_id): + more_widget_html = content_html = page + for page_num in itertools.count(1): + for video_id, video_title in self.extract_videos_from_page(content_html): + yield self.url_result( + video_id, 'Youtube', video_id=video_id, + video_title=video_title) + + mobj = re.search(r'data-uix-load-more-href="/?(?P[^"]+)"', more_widget_html) + if not mobj: + break + + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), playlist_id, + 'Downloading page #%s' % page_num, + transform_source=uppercase_escape) + content_html = more['content_html'] + if not content_html.strip(): + # Some webpages show a "Load more" button but they don't + # have more videos + break + more_widget_html = more['load_more_widget_html'] + + def extract_videos_from_page(self, page): + ids_in_page = [] + titles_in_page = [] + for mobj in re.finditer(self._VIDEO_RE, page): + # The link with index 0 is not the first video of the playlist (not sure if still actual) + if 'index' in mobj.groupdict() and mobj.group('id') == '0': + continue + video_id = mobj.group('id') + video_title = unescapeHTML(mobj.group('title')) + if video_title: + video_title = video_title.strip() + try: + idx = ids_in_page.index(video_id) + if video_title and not titles_in_page[idx]: + titles_in_page[idx] = video_title + except ValueError: + ids_in_page.append(video_id) + titles_in_page.append(video_title) + return zip(ids_in_page, titles_in_page) + + class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com' _VALID_URL = r"""(?x)^ @@ -1419,7 +1465,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): } -class YoutubePlaylistIE(YoutubeBaseInfoExtractor): +class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com playlists' _VALID_URL = r"""(?x)(?: (?:https?://)? @@ -1440,7 +1486,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): ((?:PL|LL|EC|UU|FL|RD|UL)[0-9A-Za-z-_]{10,}) )""" _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s' - _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)' + _VIDEO_RE = r'href="\s*/watch\?v=(?P[0-9A-Za-z_-]{11})&[^"]*?index=(?P\d+)(?:[^>]+>(?P[^<]+))?' IE_NAME = 'youtube:playlist' _TESTS = [{ 'url': 'https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re', @@ -1557,37 +1603,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): else: self.report_warning('Youtube gives an alert message: ' + match) - # Extract the video ids from the playlist pages - def _entries(): - more_widget_html = content_html = page - for page_num in itertools.count(1): - matches = re.finditer(self._VIDEO_RE, content_html) - # We remove the duplicates and the link with index 0 - # (it's not the first video of the playlist) - new_ids = orderedSet(m.group('id') for m in matches if m.group('index') != '0') - for vid_id in new_ids: - yield self.url_result(vid_id, 'Youtube', video_id=vid_id) - - mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) - if not mobj: - break - - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), playlist_id, - 'Downloading page #%s' % page_num, - transform_source=uppercase_escape) - content_html = more['content_html'] - if not content_html.strip(): - # Some webpages show a "Load more" button but they don't - # have more videos - break - more_widget_html = more['load_more_widget_html'] - playlist_title = self._html_search_regex( r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', page, 'title') - return self.playlist_result(_entries(), playlist_id, playlist_title) + return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) def _real_extract(self, url): # Extract playlist id @@ -1613,10 +1633,11 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): return self._extract_playlist(playlist_id) -class YoutubeChannelIE(InfoExtractor): +class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' _TEMPLATE_URL = 'https://www.youtube.com/channel/%s/videos' + _VIDEO_RE = r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?' IE_NAME = 'youtube:channel' _TESTS = [{ 'note': 'paginated channel', @@ -1627,22 +1648,6 @@ class YoutubeChannelIE(InfoExtractor): } }] - @staticmethod - def extract_videos_from_page(page): - ids_in_page = [] - titles_in_page = [] - for mobj in re.finditer(r'(?:title="(?P<title>[^"]+)"[^>]+)?href="/watch\?v=(?P<id>[0-9A-Za-z_-]+)&?', page): - video_id = mobj.group('id') - video_title = unescapeHTML(mobj.group('title')) - try: - idx = ids_in_page.index(video_id) - if video_title and not titles_in_page[idx]: - titles_in_page[idx] = video_title - except ValueError: - ids_in_page.append(video_id) - titles_in_page.append(video_title) - return zip(ids_in_page, titles_in_page) - def _real_extract(self, url): channel_id = self._match_id(url) @@ -1685,29 +1690,7 @@ class YoutubeChannelIE(InfoExtractor): for video_id, video_title in self.extract_videos_from_page(channel_page)] return self.playlist_result(entries, channel_id) - def _entries(): - more_widget_html = content_html = channel_page - for pagenum in itertools.count(1): - - for video_id, video_title in self.extract_videos_from_page(content_html): - yield self.url_result( - video_id, 'Youtube', video_id=video_id, - video_title=video_title) - - mobj = re.search( - r'data-uix-load-more-href="/?(?P<more>[^"]+)"', - more_widget_html) - if not mobj: - break - - more = self._download_json( - 'https://youtube.com/%s' % mobj.group('more'), channel_id, - 'Downloading page #%s' % (pagenum + 1), - transform_source=uppercase_escape) - content_html = more['content_html'] - more_widget_html = more['load_more_widget_html'] - - return self.playlist_result(_entries(), channel_id) + return self.playlist_result(self._entries(channel_page, channel_id), channel_id) class YoutubeUserIE(YoutubeChannelIE): -- cgit v1.2.3 From 9170ca5b16f3420892ff06bbe5cccf1679eb75e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 23 Oct 2015 14:16:08 +0200 Subject: [youtube:channel] Fix test --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 08e821362..bae1b1117 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1644,7 +1644,8 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'playlist_mincount': 91, 'info_dict': { - 'id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w', + 'title': 'Uploads from lex will', } }] -- cgit v1.2.3 From 5c43afd40f8ba101e0cf90b8fcb5713b378a62c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Fri, 23 Oct 2015 14:23:45 +0200 Subject: [youtube:channel] Support age restricted channels (fixes #7277) --- youtube_dl/extractor/youtube.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index bae1b1117..d7eda7aa7 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1647,6 +1647,15 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'id': 'UUKfVa3S1e4PHvxWcwyMMg8w', 'title': 'Uploads from lex will', } + }, { + 'note': 'Age restricted channel', + # from https://www.youtube.com/user/DeusExOfficial + 'url': 'https://www.youtube.com/channel/UCs0ifCMCm1icqRbqhUINa0w', + 'playlist_mincount': 64, + 'info_dict': { + 'id': 'UUs0ifCMCm1icqRbqhUINa0w', + 'title': 'Uploads from Deus Ex', + }, }] def _real_extract(self, url): @@ -1667,7 +1676,7 @@ class YoutubeChannelIE(YoutubePlaylistBaseInfoExtractor): 'channelId', channel_page, 'channel id', default=None) if not channel_playlist_id: channel_playlist_id = self._search_regex( - r'data-channel-external-id="([^"]+)"', + r'data-(?:channel-external-|yt)id="([^"]+)"', channel_page, 'channel id', default=None) if channel_playlist_id and channel_playlist_id.startswith('UC'): playlist_id = 'UU' + channel_playlist_id[2:] -- cgit v1.2.3 From 44b2264feae331eeb34e83eed1387def3d61a437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 4 Nov 2015 22:12:24 +0600 Subject: [youtube] Prefer video_info with token available --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index d7eda7aa7..5eeb3c663 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1107,6 +1107,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not video_info: video_info = get_video_info if 'token' in get_video_info: + if 'token' not in video_info: + video_info = get_video_info break if 'token' not in video_info: if 'reason' in video_info: -- cgit v1.2.3 From 89ea063eebae84792a7ccb968533ff8bf6a41d56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Wed, 4 Nov 2015 22:49:23 +0600 Subject: [youtube] Clarify rationale for preferring a video info with token (#7362) --- youtube_dl/extractor/youtube.py | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5eeb3c663..e2a43299f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1107,6 +1107,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if not video_info: video_info = get_video_info if 'token' in get_video_info: + # Different get_video_info requests may report different results, e.g. + # some may report video unavailability, but some may serve it without + # any complaint (see https://github.com/rg3/youtube-dl/issues/7362, + # the original webpage as well as el=info and el=embedded get_video_info + # requests report video unavailability due to geo restriction while + # el=detailpage succeeds and returns valid data). This is probably + # due to YouTube measures against IP ranges of hosting providers. + # Working around by preferring the first succeeded video_info containing + # the token if no such video_info yet was found. if 'token' not in video_info: video_info = get_video_info break -- cgit v1.2.3 From 50f84a9ae171233c08ada41e03f6555c5ed95236 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Tue, 10 Nov 2015 12:55:01 +0800 Subject: [youtube] Support new base.js html5 player --- youtube_dl/extractor/youtube.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e2a43299f..687e0b4db 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -703,7 +703,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _extract_signature_function(self, video_id, player_url, example_sig): id_m = re.match( - r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?)?\.(?P<ext>[a-z]+)$', + r'.*?-(?P<id>[a-zA-Z0-9_-]+)(?:/watch_as3|/html5player(?:-new)?|/base)?\.(?P<ext>[a-z]+)$', player_url) if not id_m: raise ExtractorError('Cannot identify player %r' % player_url) @@ -1343,7 +1343,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): player_desc = 'flash player %s' % player_version else: player_version = self._search_regex( - r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', + [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js', r'(?:www|player)-([^/]+)/base\.js'], player_url, 'html5 player', fatal=False) player_desc = 'html5 player %s' % player_version -- cgit v1.2.3 From 63b4295d20a5b98a9fca4dc3ce132b26408110d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jaime=20Marqui=CC=81nez=20Ferra=CC=81ndiz?= <jaime.marquinez.ferrandiz@gmail.com> Date: Wed, 18 Nov 2015 18:28:05 +0100 Subject: [youtube:playlist] fix title extraction (fixes #7544 and #7545) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 687e0b4db..364ca102a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1615,7 +1615,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtract self.report_warning('Youtube gives an alert message: ' + match) playlist_title = self._html_search_regex( - r'(?s)<h1 class="pl-header-title[^"]*">\s*(.*?)\s*</h1>', + r'(?s)<h1 class="pl-header-title[^"]*"[^>]*>\s*(.*?)\s*</h1>', page, 'title') return self.playlist_result(self._entries(page, playlist_id), playlist_id, playlist_title) -- cgit v1.2.3 From 0c14841585db1baa2f9a4a5ff263035977cf0964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Nov 2015 04:17:07 +0600 Subject: [youtube:user:playlists] Add extractor (Closes #3817) --- youtube_dl/extractor/youtube.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 364ca102a..abc67f07f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -224,6 +224,17 @@ class YoutubePlaylistBaseInfoExtractor(InfoExtractor): return zip(ids_in_page, titles_in_page) +class YoutubePlaylistsBaseInfoExtractor(InfoExtractor): + def _real_extract(self, url): + playlist_id = self._match_id(url) + webpage = self._download_webpage(url, playlist_id) + entries = [ + self.url_result(compat_urlparse.urljoin(url, playlist), 'YoutubePlaylist') + for playlist in re.findall(r'href="(/playlist\?list=.+?)"', webpage)] + title = self._og_search_title(webpage, fatal=False) + return self.playlist_result(entries, playlist_id, title) + + class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube.com' _VALID_URL = r"""(?x)^ @@ -1742,6 +1753,21 @@ class YoutubeUserIE(YoutubeChannelIE): return super(YoutubeUserIE, cls).suitable(url) +class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor): + IE_DESC = 'YouTube.com user playlists' + _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists' + IE_NAME = 'youtube:user:playlists' + + _TEST = { + 'url': 'http://www.youtube.com/user/ThirstForScience/playlists', + 'playlist_mincount': 4, + 'info_dict': { + 'id': 'ThirstForScience', + 'title': 'Thirst for Science', + }, + } + + class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): IE_DESC = 'YouTube.com searches' # there doesn't appear to be a real limit, for example if you search for -- cgit v1.2.3 From 136dadde9543a80f490b26c822dcfdff5541c335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Nov 2015 04:18:20 +0600 Subject: [youtube:show] Rework in terms of playlists base extractor --- youtube_dl/extractor/youtube.py | 25 ++++--------------------- 1 file changed, 4 insertions(+), 21 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index abc67f07f..c56f8a0a2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1863,7 +1863,7 @@ class YoutubeSearchURLIE(InfoExtractor): } -class YoutubeShowIE(InfoExtractor): +class YoutubeShowIE(YoutubePlaylistsBaseInfoExtractor): IE_DESC = 'YouTube.com (multi-season) shows' _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)' IE_NAME = 'youtube:show' @@ -1877,26 +1877,9 @@ class YoutubeShowIE(InfoExtractor): }] def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - playlist_id = mobj.group('id') - webpage = self._download_webpage( - 'https://www.youtube.com/show/%s/playlists' % playlist_id, playlist_id, 'Downloading show webpage') - # There's one playlist for each season of the show - m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) - self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons))) - entries = [ - self.url_result( - 'https://www.youtube.com' + season.group(1), 'YoutubePlaylist') - for season in m_seasons - ] - title = self._og_search_title(webpage, fatal=False) - - return { - '_type': 'playlist', - 'id': playlist_id, - 'title': title, - 'entries': entries, - } + playlist_id = self._match_id(url) + return super(YoutubeShowIE, self)._real_extract( + 'https://www.youtube.com/show/%s/playlists' % playlist_id) class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor): -- cgit v1.2.3 From 061a75edd6bab2f30978b458fe7402ff9e9c02a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Nov 2015 05:01:01 +0600 Subject: [youtube] Extract base for entry list extractors and support multi page lists of playlists --- youtube_dl/extractor/youtube.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index c56f8a0a2..8352ad1da 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -178,15 +178,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): return -class YoutubePlaylistBaseInfoExtractor(InfoExtractor): - # Extract the video ids from the playlist pages +class YoutubeEntryListBaseInfoExtractor(InfoExtractor): + # Extract entries from page with "Load more" button def _entries(self, page, playlist_id): more_widget_html = content_html = page for page_num in itertools.count(1): - for video_id, video_title in self.extract_videos_from_page(content_html): - yield self.url_result( - video_id, 'Youtube', video_id=video_id, - video_title=video_title) + for entry in self._process_page(content_html): + yield entry mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html) if not mobj: @@ -203,6 +201,12 @@ class YoutubePlaylistBaseInfoExtractor(InfoExtractor): break more_widget_html = more['load_more_widget_html'] + +class YoutubePlaylistBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): + def _process_page(self, content): + for video_id, video_title in self.extract_videos_from_page(content): + yield self.url_result(video_id, 'Youtube', video_id, video_title) + def extract_videos_from_page(self, page): ids_in_page = [] titles_in_page = [] @@ -224,15 +228,17 @@ class YoutubePlaylistBaseInfoExtractor(InfoExtractor): return zip(ids_in_page, titles_in_page) -class YoutubePlaylistsBaseInfoExtractor(InfoExtractor): +class YoutubePlaylistsBaseInfoExtractor(YoutubeEntryListBaseInfoExtractor): + def _process_page(self, content): + for playlist_id in re.findall(r'href="/?playlist\?list=(.+?)"', content): + yield self.url_result( + 'https://www.youtube.com/playlist?list=%s' % playlist_id, 'YoutubePlaylist') + def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) - entries = [ - self.url_result(compat_urlparse.urljoin(url, playlist), 'YoutubePlaylist') - for playlist in re.findall(r'href="(/playlist\?list=.+?)"', webpage)] title = self._og_search_title(webpage, fatal=False) - return self.playlist_result(entries, playlist_id, title) + return self.playlist_result(self._entries(webpage, playlist_id), playlist_id, title) class YoutubeIE(YoutubeBaseInfoExtractor): -- cgit v1.2.3 From e568c2233e7b4b27c9a5c56322ab7633a5f0b1f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 22 Nov 2015 05:03:23 +0600 Subject: [youtube] Add test for multi page list of playlists --- youtube_dl/extractor/youtube.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8352ad1da..4a0ff6e9c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1764,14 +1764,22 @@ class YoutubeUserPlaylistsIE(YoutubePlaylistsBaseInfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?youtube\.com/user/(?P<id>[^/]+)/playlists' IE_NAME = 'youtube:user:playlists' - _TEST = { + _TESTS = [{ 'url': 'http://www.youtube.com/user/ThirstForScience/playlists', 'playlist_mincount': 4, 'info_dict': { 'id': 'ThirstForScience', 'title': 'Thirst for Science', }, - } + }, { + # with "Load more" button + 'url': 'http://www.youtube.com/user/igorkle1/playlists?view=1&sort=dd', + 'playlist_mincount': 70, + 'info_dict': { + 'id': 'igorkle1', + 'title': 'Игорь Клейнер', + }, + }] class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): -- cgit v1.2.3 From 3cfd000849208b58dab4f78d1486d3f24552009e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk> Date: Sun, 22 Nov 2015 13:14:35 +0100 Subject: [youtube] More explicit player config JSON extraction (fixes #7468) --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 687e0b4db..21731188a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1074,7 +1074,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): age_gate = False video_info = None # Try looking directly into the video webpage - mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});', video_webpage) + mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});ytplayer', video_webpage) if mobj: json_code = uppercase_escape(mobj.group(1)) ytplayer_config = json.loads(json_code) -- cgit v1.2.3 From 0e49d9a6b0216555c2a3ee063ae3d1c6d09edbd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk> Date: Sun, 22 Nov 2015 13:49:33 +0100 Subject: [youtube] Fall back to the original regex for ytplayer.config --- youtube_dl/extractor/youtube.py | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 21731188a..7e74d2368 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -674,7 +674,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor): { 'url': 'http://vid.plus/FlRa-iH7PGw', 'only_matching': True, - } + }, + { + # Title with JS-like syntax "};" + 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', + 'info_dict': { + 'id': 'lsguqyKfVQg', + 'ext': 'mp4', + 'title': '{dark walk}; Loki/AC/Dishonored; collab w/Elflover21', + 'description': 'md5:8085699c11dc3f597ce0410b0dcbb34a', + 'upload_date': '20151119', + 'uploader_id': 'IronSoulElf', + 'uploader': 'IronSoulElf', + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -858,16 +874,25 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return {} return sub_lang_list + def _get_ytplayer_config(self, webpage): + patterns = [ + r';ytplayer\.config\s*=\s*({.*?});ytplayer', + r';ytplayer\.config\s*=\s*({.*?});', + ] + for pattern in patterns: + config = self._search_regex(pattern, webpage, 'ytconfig.player', default=None) + if config is not None: + return json.loads(uppercase_escape(config)) + def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" self.to_screen('%s: Looking for automatic captions' % video_id) - mobj = re.search(r';ytplayer.config = ({.*?});', webpage) + player_config = self._get_ytplayer_config(webpage) err_msg = 'Couldn\'t find automatic captions for %s' % video_id - if mobj is None: + if player_config is None: self._downloader.report_warning(err_msg) return {} - player_config = json.loads(mobj.group(1)) try: args = player_config['args'] caption_url = args['ttsurl'] @@ -1074,10 +1099,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): age_gate = False video_info = None # Try looking directly into the video webpage - mobj = re.search(r';ytplayer\.config\s*=\s*({.*?});ytplayer', video_webpage) - if mobj: - json_code = uppercase_escape(mobj.group(1)) - ytplayer_config = json.loads(json_code) + ytplayer_config = self._get_ytplayer_config(video_webpage) + if ytplayer_config is not None: args = ytplayer_config['args'] if args.get('url_encoded_fmt_stream_map'): # Convert to the same format returned by compat_parse_qs -- cgit v1.2.3 From b41631c4e6e56afb2427513c84df1b13681cf4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk> Date: Sun, 22 Nov 2015 13:53:26 +0100 Subject: [youtube] Send the list of patterns directly to _search_regex --- youtube_dl/extractor/youtube.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7e74d2368..247769067 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -879,10 +879,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r';ytplayer\.config\s*=\s*({.*?});ytplayer', r';ytplayer\.config\s*=\s*({.*?});', ] - for pattern in patterns: - config = self._search_regex(pattern, webpage, 'ytconfig.player', default=None) - if config is not None: - return json.loads(uppercase_escape(config)) + config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None) + if config is not None: + return json.loads(uppercase_escape(config)) def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an -- cgit v1.2.3 From a72778d364022612ba88bdfd9affef0d7b0ca864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Nov 2015 21:00:06 +0600 Subject: [youtube] Improve ytplayer.config extraction --- youtube_dl/extractor/youtube.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1580c54fe..052f6922a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -891,22 +891,24 @@ class YoutubeIE(YoutubeBaseInfoExtractor): return {} return sub_lang_list - def _get_ytplayer_config(self, webpage): - patterns = [ - r';ytplayer\.config\s*=\s*({.*?});ytplayer', - r';ytplayer\.config\s*=\s*({.*?});', - ] - config = self._search_regex(patterns, webpage, 'ytconfig.player', default=None) - if config is not None: - return json.loads(uppercase_escape(config)) + def _get_ytplayer_config(self, video_id, webpage): + patterns = ( + r';ytplayer\.config\s*=\s*({.+?});ytplayer', + r';ytplayer\.config\s*=\s*({.+?});', + ) + config = self._search_regex( + patterns, webpage, 'ytplayer.config', default=None) + if config: + return self._parse_json( + uppercase_escape(config), video_id, fatal=False) def _get_automatic_captions(self, video_id, webpage): """We need the webpage for getting the captions url, pass it as an argument to speed up the process.""" self.to_screen('%s: Looking for automatic captions' % video_id) - player_config = self._get_ytplayer_config(webpage) + player_config = self._get_ytplayer_config(video_id, webpage) err_msg = 'Couldn\'t find automatic captions for %s' % video_id - if player_config is None: + if not player_config: self._downloader.report_warning(err_msg) return {} try: @@ -1115,8 +1117,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): age_gate = False video_info = None # Try looking directly into the video webpage - ytplayer_config = self._get_ytplayer_config(video_webpage) - if ytplayer_config is not None: + ytplayer_config = self._get_ytplayer_config(video_id, video_webpage) + if ytplayer_config: args = ytplayer_config['args'] if args.get('url_encoded_fmt_stream_map'): # Convert to the same format returned by compat_parse_qs -- cgit v1.2.3 From 61f92af1cfacb9a5a6e368d0093fb71dbac0af6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Nov 2015 21:02:37 +0600 Subject: [youtube] Add test with '};' in tags --- youtube_dl/extractor/youtube.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 052f6922a..824335d0a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -693,7 +693,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'only_matching': True, }, { - # Title with JS-like syntax "};" + # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468) 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', 'info_dict': { 'id': 'lsguqyKfVQg', @@ -708,6 +708,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # Tags with '};' (see https://github.com/rg3/youtube-dl/issues/7468) + 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8', + 'only_matching': True, + }, ] def __init__(self, *args, **kwargs): -- cgit v1.2.3 From 526b3b071632bc3c840ae4dd3579e015f41df6f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Nov 2015 21:14:03 +0600 Subject: [youtube] Clarify ytplayer.config extraction rationale --- youtube_dl/extractor/youtube.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 824335d0a..5482aac3b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -898,6 +898,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): def _get_ytplayer_config(self, video_id, webpage): patterns = ( + # User data may contain arbitrary character sequences that may affect + # JSON extraction with regex, e.g. when '};' is contained the second + # regex won't capture the whole JSON. Yet working around by trying more + # concrete regex first keeping in mind proper quoted string handling + # to be implemented in future that will replace this workaround (see + # https://github.com/rg3/youtube-dl/issues/7468, + # https://github.com/rg3/youtube-dl/pull/7599) r';ytplayer\.config\s*=\s*({.+?});ytplayer', r';ytplayer\.config\s*=\s*({.+?});', ) -- cgit v1.2.3 From 94bfcd23b7e2488b5e4bbf076965ab9ed980f1ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Nov 2015 21:35:23 +0600 Subject: [youtube] Fix test --- youtube_dl/extractor/youtube.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 5482aac3b..e0e345496 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -426,7 +426,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Principal Sexually Assaults A Teacher - Episode 117 - 8th June 2012', 'description': 'md5:09b78bd971f1e3e289601dfba15ca4f7', 'uploader': 'SET India', - 'uploader_id': 'setindia' + 'uploader_id': 'setindia', + 'age_limit': 18, } }, { -- cgit v1.2.3 From 9022726446c659f2bc38556105991e9797e0c8c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 23 Nov 2015 21:37:21 +0600 Subject: [youtube] Fix test --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index e0e345496..0246050c2 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -564,7 +564,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'info_dict': { 'id': 'lqQg6PlCWgI', 'ext': 'mp4', - 'upload_date': '20120724', + 'upload_date': '20150827', 'uploader_id': 'olympic', 'description': 'HO09 - Women - GER-AUS - Hockey - 31 July 2012 - London 2012 Olympic Games', 'uploader': 'Olympics', -- cgit v1.2.3 From 5c2266df4b9aeb7881ed8c026a038e2a25e43734 Mon Sep 17 00:00:00 2001 From: Sergey M? <dstftw@gmail.com> Date: Sat, 21 Nov 2015 22:18:17 +0600 Subject: Switch codebase to use sanitized_Request instead of compat_urllib_request.Request [downloader/dash] Use sanitized_Request [downloader/http] Use sanitized_Request [atresplayer] Use sanitized_Request [bambuser] Use sanitized_Request [bliptv] Use sanitized_Request [brightcove] Use sanitized_Request [cbs] Use sanitized_Request [ceskatelevize] Use sanitized_Request [collegerama] Use sanitized_Request [extractor/common] Use sanitized_Request [crunchyroll] Use sanitized_Request [dailymotion] Use sanitized_Request [dcn] Use sanitized_Request [dramafever] Use sanitized_Request [dumpert] Use sanitized_Request [eitb] Use sanitized_Request [escapist] Use sanitized_Request [everyonesmixtape] Use sanitized_Request [extremetube] Use sanitized_Request [facebook] Use sanitized_Request [fc2] Use sanitized_Request [flickr] Use sanitized_Request [4tube] Use sanitized_Request [gdcvault] Use sanitized_Request [extractor/generic] Use sanitized_Request [hearthisat] Use sanitized_Request [hotnewhiphop] Use sanitized_Request [hypem] Use sanitized_Request [iprima] Use sanitized_Request [ivi] Use sanitized_Request [keezmovies] Use sanitized_Request [letv] Use sanitized_Request [lynda] Use sanitized_Request [metacafe] Use sanitized_Request [minhateca] Use sanitized_Request [miomio] Use sanitized_Request [meovideo] Use sanitized_Request [mofosex] Use sanitized_Request [moniker] Use sanitized_Request [mooshare] Use sanitized_Request [movieclips] Use sanitized_Request [mtv] Use sanitized_Request [myvideo] Use sanitized_Request [neteasemusic] Use sanitized_Request [nfb] Use sanitized_Request [niconico] Use sanitized_Request [noco] Use sanitized_Request [nosvideo] Use sanitized_Request [novamov] Use sanitized_Request [nowness] Use sanitized_Request [nuvid] Use sanitized_Request [played] Use sanitized_Request [pluralsight] Use sanitized_Request [pornhub] Use sanitized_Request [pornotube] Use sanitized_Request [primesharetv] Use sanitized_Request [promptfile] Use sanitized_Request [qqmusic] Use sanitized_Request [rtve] Use sanitized_Request [safari] Use sanitized_Request [sandia] Use sanitized_Request [shared] Use sanitized_Request [sharesix] Use sanitized_Request [sina] Use sanitized_Request [smotri] Use sanitized_Request [sohu] Use sanitized_Request [spankwire] Use sanitized_Request [sportdeutschland] Use sanitized_Request [streamcloud] Use sanitized_Request [streamcz] Use sanitized_Request [tapely] Use sanitized_Request [tube8] Use sanitized_Request [tubitv] Use sanitized_Request [twitch] Use sanitized_Request [twitter] Use sanitized_Request [udemy] Use sanitized_Request [vbox7] Use sanitized_Request [veoh] Use sanitized_Request [vessel] Use sanitized_Request [vevo] Use sanitized_Request [viddler] Use sanitized_Request [videomega] Use sanitized_Request [viewvster] Use sanitized_Request [viki] Use sanitized_Request [vk] Use sanitized_Request [vodlocker] Use sanitized_Request [voicerepublic] Use sanitized_Request [wistia] Use sanitized_Request [xfileshare] Use sanitized_Request [xtube] Use sanitized_Request [xvideos] Use sanitized_Request [yandexmusic] Use sanitized_Request [youku] Use sanitized_Request [youporn] Use sanitized_Request [youtube] Use sanitized_Request [patreon] Use sanitized_Request [extractor/common] Remove unused import [nfb] PEP 8 --- youtube_dl/extractor/youtube.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0246050c2..cfe9eed55 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -20,7 +20,6 @@ from ..compat import ( compat_urllib_parse_unquote, compat_urllib_parse_unquote_plus, compat_urllib_parse_urlparse, - compat_urllib_request, compat_urlparse, compat_str, ) @@ -35,6 +34,7 @@ from ..utils import ( orderedSet, parse_duration, remove_start, + sanitized_Request, smuggle_url, str_to_int, unescapeHTML, @@ -114,7 +114,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): login_data = compat_urllib_parse.urlencode(encode_dict(login_form_strs)).encode('ascii') - req = compat_urllib_request.Request(self._LOGIN_URL, login_data) + req = sanitized_Request(self._LOGIN_URL, login_data) login_results = self._download_webpage( req, None, note='Logging in', errnote='unable to log in', fatal=False) @@ -147,7 +147,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): tfa_data = compat_urllib_parse.urlencode(encode_dict(tfa_form_strs)).encode('ascii') - tfa_req = compat_urllib_request.Request(self._TWOFACTOR_URL, tfa_data) + tfa_req = sanitized_Request(self._TWOFACTOR_URL, tfa_data) tfa_results = self._download_webpage( tfa_req, None, note='Submitting TFA code', errnote='unable to submit tfa', fatal=False) -- cgit v1.2.3 From 313dfc45f500db3acc348d58f431197ce7c153a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk> Date: Sat, 28 Nov 2015 01:07:07 +0100 Subject: [youtube] Ignore yt:stretch with zero width/height --- youtube_dl/extractor/youtube.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cfe9eed55..726b5ba0a 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -714,6 +714,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'url': 'https://www.youtube.com/watch?v=Ms7iBXnlUO8', 'only_matching': True, }, + { + # Video with yt:stretch=17:0 + 'url': 'https://www.youtube.com/watch?v=Q39EVAstoRM', + 'info_dict': { + 'id': 'Q39EVAstoRM', + 'ext': 'mp4', + 'title': 'Clash Of Clans#14 Dicas De Ataque Para CV 4', + 'description': 'md5:ee18a25c350637c8faff806845bddee9', + 'upload_date': '20151107', + 'uploader_id': 'UCCr7TALkRbo3EtFzETQF1LA', + 'uploader': 'CH GAMER DROID', + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): @@ -1496,10 +1512,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'<meta\s+property="og:video:tag".*?content="yt:stretch=(?P<w>[0-9]+):(?P<h>[0-9]+)">', video_webpage) if stretched_m: - ratio = float(stretched_m.group('w')) / float(stretched_m.group('h')) - for f in formats: - if f.get('vcodec') != 'none': - f['stretched_ratio'] = ratio + w = float(stretched_m.group('w')) + h = float(stretched_m.group('h')) + if w > 0 and h > 0: + ratio = float(stretched_m.group('w')) / float(stretched_m.group('h')) + for f in formats: + if f.get('vcodec') != 'none': + f['stretched_ratio'] = ratio self._sort_formats(formats) -- cgit v1.2.3 From 41f24c321d04108a32f457d5d5445f2cfce705a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= <lukas@oxygene.sk> Date: Sat, 28 Nov 2015 08:16:46 +0100 Subject: [youtube] Use the existing `w` and `h` variables --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 726b5ba0a..9da8d4bc5 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1515,7 +1515,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): w = float(stretched_m.group('w')) h = float(stretched_m.group('h')) if w > 0 and h > 0: - ratio = float(stretched_m.group('w')) / float(stretched_m.group('h')) + ratio = w / h for f in formats: if f.get('vcodec') != 'none': f['stretched_ratio'] = ratio -- cgit v1.2.3 From 5faf9fed7e1c7922578467cfd48db5867ef9b91b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sat, 28 Nov 2015 18:50:21 +0600 Subject: [youtube] Clarify rationale for yt:stretch validation --- youtube_dl/extractor/youtube.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9da8d4bc5..1c2420a33 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1514,6 +1514,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if stretched_m: w = float(stretched_m.group('w')) h = float(stretched_m.group('h')) + # yt:stretch may hold invalid ratio data (e.g. for Q39EVAstoRM ratio is 17:0). + # We will only process correct ratios. if w > 0 and h > 0: ratio = w / h for f in formats: -- cgit v1.2.3 From ac5a69af45307b583a9a6088abe5939bec18d562 Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 Nov 2015 12:44:24 +0800 Subject: [youtube] Disable compression for live streams --- youtube_dl/extractor/youtube.py | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 1c2420a33..52f4fe36d 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1475,6 +1475,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): manifest_url = video_info['hlsvp'][0] url_map = self._extract_from_m3u8(manifest_url, video_id) formats = _map_to_format_list(url_map) + # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming + for a_format in formats: + if 'http_headers' not in a_format: + a_format['http_headers'] = {} + a_format['http_headers']['Youtubedl-no-compression'] = True else: raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') -- cgit v1.2.3 From 049d71d8745014bf5ec23e25e51d6b92556baa8c Mon Sep 17 00:00:00 2001 From: Yen Chi Hsuan <yan12125@gmail.com> Date: Sun, 29 Nov 2015 19:52:48 +0800 Subject: [youtube] Simplify and make sure header values are strings --- youtube_dl/extractor/youtube.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 52f4fe36d..4f375e2c8 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1477,9 +1477,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): formats = _map_to_format_list(url_map) # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming for a_format in formats: - if 'http_headers' not in a_format: - a_format['http_headers'] = {} - a_format['http_headers']['Youtubedl-no-compression'] = True + a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' else: raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') -- cgit v1.2.3 From 040ac686798fdc922157cca64d654933e3f6d096 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Nov 2015 21:01:59 +0600 Subject: [youtube] Extend _VALID_URL (Closes #7694) --- youtube_dl/extractor/youtube.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 4f375e2c8..55a06eb68 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -258,7 +258,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): |(?: # or the v= param in all its forms (?:(?:watch|movie)(?:_popup)?(?:\.php)?/?)? # preceding watch(_popup|.php) or nothing (like /?v=xxxx) (?:\?|\#!?) # the params delimiter ? or # or #! - (?:.*?&)?? # any other preceding param (like /?s=tuff&v=xxxx) + (?:.*?[&;])?? # any other preceding param (like /?s=tuff&v=xxxx or ?s=tuff&v=V36LpHqtcDY) v= ) )) @@ -730,6 +730,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + 'url': 'https://www.youtube.com/watch?feature=player_embedded&amp;v=V36LpHqtcDY', + 'only_matching': True, + } ] def __init__(self, *args, **kwargs): -- cgit v1.2.3 From 2e1b92854000662e554413df0c34c1cbc0d7ffc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Sun, 29 Nov 2015 21:04:11 +0600 Subject: [youtube:playlist] Extend _VALID_URL --- youtube_dl/extractor/youtube.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 55a06eb68..032691e7f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1566,7 +1566,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor, YoutubePlaylistBaseInfoExtract youtube\.com/ (?: (?:course|view_play_list|my_playlists|artist|playlist|watch|embed/videoseries) - \? (?:.*?&)*? (?:p|a|list)= + \? (?:.*?[&;])*? (?:p|a|list)= | p/ ) ( -- cgit v1.2.3 From 4c6b4764f0260808d321cfb6cca1daa5e3eb13d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= <dstftw@gmail.com> Date: Mon, 30 Nov 2015 20:42:05 +0600 Subject: [youtube] Clarify itag 272 possible resolutions (#7699) --- youtube_dl/extractor/youtube.py | 1 + 1 file changed, 1 insertion(+) (limited to 'youtube_dl/extractor/youtube.py') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 032691e7f..9b39505ba 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -346,6 +346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): '247': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '248': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '271': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, + # itag 272 videos are either 3840x2160 (e.g. RtoitU2A-3E) or 7680x4320 (sLprVF6d7Ug) '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'vp9'}, -- cgit v1.2.3