aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2016-04-17 17:07:57 +0200
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2016-04-17 17:07:57 +0200
commit1b6182d8f759546b54a675123901348361bad979 (patch)
treee3552f926d004d6bb22605cb2f34e05793540210
parent7bab22a4029feb651d08f4b631359a0d1aaffd71 (diff)
[youtube:playlist] Fetch all the videos in a mix (fixes #3837)
Since there doesn't seem to be any indication, it stops when there aren't new videos in the webpage.
-rw-r--r--test/test_youtube_lists.py2
-rw-r--r--youtube_dl/extractor/youtube.py28
2 files changed, 21 insertions, 9 deletions
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 47df0f348..af1c45421 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase):
ie = YoutubePlaylistIE(dl)
result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')
entries = result['entries']
- self.assertTrue(len(entries) >= 20)
+ self.assertTrue(len(entries) >= 50)
original_video = entries[0]
self.assertEqual(original_video['id'], 'OQpdSVF_k_w')
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 44c1191bd..a4dd628a1 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1818,20 +1818,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
- url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id)
- webpage = self._download_webpage(
- url, playlist_id, 'Downloading Youtube mix')
+ ids = []
+ last_id = playlist_id[-11:]
+ for n in itertools.count(1):
+ url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id)
+ webpage = self._download_webpage(
+ url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n))
+ new_ids = orderedSet(re.findall(
+ r'''(?xs)data-video-username=".*?".*?
+ href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
+ webpage))
+ # Fetch new pages until all the videos are repeated, it seems that
+ # there are always 51 unique videos.
+ new_ids = [_id for _id in new_ids if _id not in ids]
+ if not new_ids:
+ break
+ ids.extend(new_ids)
+ last_id = ids[-1]
+
+ url_results = self._ids_to_results(ids)
+
search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)
title_span = (
search_title('playlist-title') or
search_title('title long-title') or
search_title('title'))
title = clean_html(title_span)
- ids = orderedSet(re.findall(
- r'''(?xs)data-video-username=".*?".*?
- href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s''' % re.escape(playlist_id),
- webpage))
- url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, title)