aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-15 11:51:45 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-15 11:51:45 +0100
commitb9643eed7c9b081c91e72257b380ccbd92555254 (patch)
tree9dab7c071ad224f488c7e4074abcef7547c0400e
parentfeee2ecfa9fbc6fd34246c7e167ac9542ae7def2 (diff)
downloadyoutube-dl-b9643eed7c9b081c91e72257b380ccbd92555254.tar.xz
[youtube:channel] Fix the extraction of autogenerated channels
The ajax pages are empty, now it looks directly in the channel's /videos page
-rw-r--r--youtube_dl/extractor/youtube.py37
1 files changed, 24 insertions, 13 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 7d7aeb461..8c0e6f252 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1594,20 +1594,31 @@ class YoutubeChannelIE(InfoExtractor):
# Download channel page
channel_id = mobj.group(1)
video_ids = []
+ url = 'https://www.youtube.com/channel/%s/videos' % channel_id
+ channel_page = self._download_webpage(url, channel_id)
+ if re.search(r'channel-header-autogenerated-label', channel_page) is not None:
+ autogenerated = True
+ else:
+ autogenerated = False
- # Download all channel pages using the json-based channel_ajax query
- for pagenum in itertools.count(1):
- url = self._MORE_PAGES_URL % (pagenum, channel_id)
- page = self._download_webpage(url, channel_id,
- u'Downloading page #%s' % pagenum)
-
- page = json.loads(page)
-
- ids_in_page = self.extract_videos_from_page(page['content_html'])
- video_ids.extend(ids_in_page)
-
- if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
- break
+ if autogenerated:
+ # The videos are contained in a single page
+ # the ajax pages can't be used, they are empty
+ video_ids = self.extract_videos_from_page(channel_page)
+ else:
+ # Download all channel pages using the json-based channel_ajax query
+ for pagenum in itertools.count(1):
+ url = self._MORE_PAGES_URL % (pagenum, channel_id)
+ page = self._download_webpage(url, channel_id,
+ u'Downloading page #%s' % pagenum)
+
+ page = json.loads(page)
+
+ ids_in_page = self.extract_videos_from_page(page['content_html'])
+ video_ids.extend(ids_in_page)
+
+ if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']:
+ break
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))