diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2013-11-22 20:11:54 +0100 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2013-11-22 20:11:54 +0100 | 
| commit | 3f8ced5144a76a3f9ab7ee8cd06cc79bb75dc564 (patch) | |
| tree | 240d40bd76180dafa9dccf4e9b33e5f87456185d | |
| parent | 00ea0f11eb76e7a67648790524a50f7254b9578f (diff) | |
| parent | 880e1c529de1d0f7f0a065afc4148320894a25b4 (diff) | |
Merge remote-tracking branch 'jaimeMF/yt-playlists'
| -rw-r--r-- | test/test_youtube_lists.py | 14 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 48 | 
2 files changed, 24 insertions, 38 deletions
| diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 4b7a7847b..50ad52695 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -27,7 +27,7 @@ class TestYoutubeLists(unittest.TestCase):      def test_youtube_playlist(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) -        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] +        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')          self.assertIsPlaylist(result)          self.assertEqual(result['title'], 'ytdl test PL')          ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']] @@ -44,13 +44,13 @@ class TestYoutubeLists(unittest.TestCase):      def test_issue_673(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) -        result = ie.extract('PLBB231211A4F62143')[0] +        result = ie.extract('PLBB231211A4F62143')          self.assertTrue(len(result['entries']) > 25)      def test_youtube_playlist_long(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) -        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] +        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')          self.assertIsPlaylist(result)          self.assertTrue(len(result['entries']) >= 799) @@ -58,7 +58,7 @@ class TestYoutubeLists(unittest.TestCase):          #651          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) -        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] +        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')          ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]          self.assertFalse('pElCt5oNDuI' in ytie_results)          self.assertFalse('KdPEApIVdWM' in ytie_results) @@ -66,7 +66,7 @@ class TestYoutubeLists(unittest.TestCase):      def test_youtube_playlist_empty(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) -        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')[0] +        result = ie.extract('https://www.youtube.com/playlist?list=PLtPgu7CB4gbZDA7i_euNxn75ISqxwZPYx')          self.assertIsPlaylist(result)          self.assertEqual(len(result['entries']), 0) @@ -74,7 +74,7 @@ class TestYoutubeLists(unittest.TestCase):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl)          # TODO find a > 100 (paginating?) videos course -        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] +        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')          entries = result['entries']          self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs')          self.assertEqual(len(entries), 25) @@ -99,7 +99,7 @@ class TestYoutubeLists(unittest.TestCase):      def test_youtube_safe_search(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) -        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')[0] +        result = ie.extract('PLtPgu7CB4gbY9oDN3drwC3cMbJggS7dKl')          self.assertEqual(len(result['entries']), 2)      def test_youtube_show(self): diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 41838237c..9b09793eb 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1510,7 +1510,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):              })          return results -class YoutubePlaylistIE(InfoExtractor): +class YoutubePlaylistIE(YoutubeBaseInfoExtractor):      IE_DESC = u'YouTube.com playlists'      _VALID_URL = r"""(?:                          (?:https?://)? @@ -1526,8 +1526,9 @@ class YoutubePlaylistIE(InfoExtractor):                       |                          ((?:PL|EC|UU|FL)[0-9A-Za-z-_]{10,})                       )""" -    _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/playlists/%s?max-results=%i&start-index=%i&v=2&alt=json&safeSearch=none' -    _MAX_RESULTS = 50 +    _TEMPLATE_URL = 'https://www.youtube.com/playlist?list=%s&page=%s' +    _MORE_PAGES_INDICATOR = r'data-link-type="next"' +    _VIDEO_RE = r'href="/watch\?v=([0-9A-Za-z_-]{11})&'      IE_NAME = u'youtube:playlist'      @classmethod @@ -1535,6 +1536,9 @@ class YoutubePlaylistIE(InfoExtractor):          """Receives a URL and returns True if suitable for this IE."""          return re.match(cls._VALID_URL, url, re.VERBOSE) is not None +    def _real_initialize(self): +        self._login() +      def _real_extract(self, url):          # Extract playlist id          mobj = re.match(self._VALID_URL, url, re.VERBOSE) @@ -1552,41 +1556,23 @@ class YoutubePlaylistIE(InfoExtractor):              else:                  self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) -        # Download playlist videos from API -        videos = [] +        # Extract the video ids from the playlist pages +        ids = []          for page_num in itertools.count(1): -            start_index = self._MAX_RESULTS * (page_num - 1) + 1 -            if start_index >= 1000: -                self._downloader.report_warning(u'Max number of results reached') -                break -            url = self._TEMPLATE_URL % (playlist_id, self._MAX_RESULTS, start_index) +            url = self._TEMPLATE_URL % (playlist_id, page_num)              page = self._download_webpage(url, playlist_id, u'Downloading page #%s' % page_num) +            # The ids are duplicated +            new_ids = orderedSet(re.findall(self._VIDEO_RE, page)) +            ids.extend(new_ids) -            try: -                response = json.loads(page) -            except ValueError as err: -                raise ExtractorError(u'Invalid JSON in API response: ' + compat_str(err)) - -            if 'feed' not in response: -                raise ExtractorError(u'Got a malformed response from YouTube API') -            playlist_title = response['feed']['title']['$t'] -            if 'entry' not in response['feed']: -                # Number of videos is a multiple of self._MAX_RESULTS +            if re.search(self._MORE_PAGES_INDICATOR, page) is None:                  break -            for entry in response['feed']['entry']: -                index = entry['yt$position']['$t'] -                if 'media$group' in entry and 'yt$videoid' in entry['media$group']: -                    videos.append(( -                        index, -                        'https://www.youtube.com/watch?v=' + entry['media$group']['yt$videoid']['$t'] -                    )) - -        videos = [v[1] for v in sorted(videos)] +        playlist_title = self._og_search_title(page) -        url_results = [self.url_result(vurl, 'Youtube') for vurl in videos] -        return [self.playlist_result(url_results, playlist_id, playlist_title)] +        url_results = [self.url_result(vid, 'Youtube') for vid in ids] +        return self.playlist_result(url_results, playlist_id, playlist_title)  class YoutubeChannelIE(InfoExtractor): | 
