diff options
| -rw-r--r-- | test/test_youtube_lists.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 28 | 
2 files changed, 21 insertions, 9 deletions
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 47df0f348..af1c45421 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -44,7 +44,7 @@ class TestYoutubeLists(unittest.TestCase):          ie = YoutubePlaylistIE(dl)          result = ie.extract('https://www.youtube.com/watch?v=W01L70IGBgE&index=2&list=RDOQpdSVF_k_w')          entries = result['entries'] -        self.assertTrue(len(entries) >= 20) +        self.assertTrue(len(entries) >= 50)          original_video = entries[0]          self.assertEqual(original_video['id'], 'OQpdSVF_k_w') diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 44c1191bd..a4dd628a1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1818,20 +1818,32 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor):      def _extract_mix(self, playlist_id):          # The mixes are generated from a single video          # the id of the playlist is just 'RD' + video_id -        url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) -        webpage = self._download_webpage( -            url, playlist_id, 'Downloading Youtube mix') +        ids = [] +        last_id = playlist_id[-11:] +        for n in itertools.count(1): +            url = 'https://youtube.com/watch?v=%s&list=%s' % (last_id, playlist_id) +            webpage = self._download_webpage( +                url, playlist_id, 'Downloading page {0} of Youtube mix'.format(n)) +            new_ids = orderedSet(re.findall( +                r'''(?xs)data-video-username=".*?".*? +                           href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), +                webpage)) +            # Fetch new pages until all the videos are repeated, it seems that +            # there are always 51 unique videos. +            new_ids = [_id for _id in new_ids if _id not in ids] +            if not new_ids: +                break +            ids.extend(new_ids) +            last_id = ids[-1] + +        url_results = self._ids_to_results(ids) +          search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage)          title_span = (              search_title('playlist-title') or              search_title('title long-title') or              search_title('title'))          title = clean_html(title_span) -        ids = orderedSet(re.findall( -            r'''(?xs)data-video-username=".*?".*? -                       href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id), -            webpage)) -        url_results = self._ids_to_results(ids)          return self.playlist_result(url_results, playlist_id, title)  | 
