diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-12-12 22:23:54 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-12-12 22:23:54 +0100 |
commit | 23d3608c6b5b50cbbc81314d18824c4951f8af27 (patch) | |
tree | 981e733829b5734e549643563ff0a3e3e225a524 /youtube_dl/extractor | |
parent | baa7081d68996377e44225c74a1ec05e801617a2 (diff) |
[youtube:channel] Fix extraction (fixes #4435)
It uses now the same pagination system as playlists
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 21 |
1 files changed, 13 insertions, 8 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 7b6179a2a..a7bdce72f 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1269,8 +1269,6 @@ class YoutubeTopListIE(YoutubePlaylistIE): class YoutubeChannelIE(InfoExtractor): IE_DESC = 'YouTube.com channels' _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)' - _MORE_PAGES_INDICATOR = 'yt-uix-load-more' - _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' IE_NAME = 'youtube:channel' _TESTS = [{ 'note': 'paginated channel', @@ -1307,20 +1305,27 @@ class YoutubeChannelIE(InfoExtractor): return self.playlist_result(entries, channel_id) def _entries(): + more_widget_html = content_html = channel_page for pagenum in itertools.count(1): - url = self._MORE_PAGES_URL % (pagenum, channel_id) - page = self._download_json( - url, channel_id, note='Downloading page #%s' % pagenum, - transform_source=uppercase_escape) - ids_in_page = self.extract_videos_from_page(page['content_html']) + ids_in_page = self.extract_videos_from_page(content_html) for video_id in ids_in_page: yield self.url_result( video_id, 'Youtube', video_id=video_id) - if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: + mobj = re.search( + r'data-uix-load-more-href="/?(?P<more>[^"]+)"', + more_widget_html) + if not mobj: break + more = self._download_json( + 'https://youtube.com/%s' % mobj.group('more'), channel_id, + 'Downloading page #%s' % (pagenum + 1), + transform_source=uppercase_escape) + content_html = more['content_html'] + more_widget_html = more['load_more_widget_html'] + return self.playlist_result(_entries(), channel_id) |