diff options
| -rw-r--r-- | test/test_youtube_lists.py | 39 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 71 | 
2 files changed, 62 insertions, 48 deletions
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 1fa99f88b..410f9edc2 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,7 +10,6 @@ from test.helper import FakeYDL  from youtube_dl.extractor import ( -    YoutubeUserIE,      YoutubePlaylistIE,      YoutubeIE,      YoutubeChannelIE, @@ -43,28 +42,6 @@ class TestYoutubeLists(unittest.TestCase):          self.assertEqual(len(entries), 25)          self.assertEqual(YoutubeIE().extract_id(entries[-1]['url']), 'rYefUsYuEp0') -    def test_youtube_channel(self): -        dl = FakeYDL() -        ie = YoutubeChannelIE(dl) -        #test paginated channel -        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w') -        self.assertTrue(len(result['entries']) > 90) -        #test autogenerated channel -        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') -        self.assertTrue(len(result['entries']) >= 18) - -    def test_youtube_user(self): -        dl = FakeYDL() -        ie = YoutubeUserIE(dl) -        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation') -        self.assertTrue(len(result['entries']) >= 320) - -    def test_youtube_show(self): -        dl = FakeYDL() -        ie = YoutubeShowIE(dl) -        result = ie.extract('http://www.youtube.com/show/airdisasters') -        self.assertTrue(len(result) >= 3) -      def test_youtube_mix(self):          dl = FakeYDL()          ie = YoutubePlaylistIE(dl) @@ -83,21 +60,5 @@ class TestYoutubeLists(unittest.TestCase):          entries = result['entries']          self.assertEqual(len(entries), 100) -    def test_youtube_toplist(self): -        dl = FakeYDL() -        ie = YoutubeTopListIE(dl) -        result = ie.extract('yttoplist:music:Trending') -        entries = result['entries'] -        self.assertTrue(len(entries) >= 5) - -    def test_youtube_search_url(self): -        dl = FakeYDL() -        ie = YoutubeSearchURLIE(dl) -        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') -        entries = result['entries'] -        self.assertIsPlaylist(result) -        self.assertEqual(result['title'], 'youtube-dl test video') -        self.assertTrue(len(entries) >= 5) -  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0257ee2f9..2ef76b69b 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1160,16 +1160,25 @@ class YoutubeTopListIE(YoutubePlaylistIE):      IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"'          ' (Example: "yttoplist:music:Top Tracks")')      _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' -    _TESTS = [] +    _TESTS = [{ +        'url': 'yttoplist:music:Trending', +        'playlist_mincount': 5, +        'skip': 'Only works for logged-in users', +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url)          channel = mobj.group('chann')          title = mobj.group('title')          query = compat_urllib_parse.urlencode({'title': title}) -        playlist_re = 'href="([^"]+?%s.*?)"' % re.escape(query) -        channel_page = self._download_webpage('https://www.youtube.com/%s' % channel, title) -        link = self._html_search_regex(playlist_re, channel_page, 'list') +        channel_page = self._download_webpage( +            'https://www.youtube.com/%s' % channel, title) +        link = self._html_search_regex( +            r'''(?x) +                <a\s+href="([^"]+)".*?>\s* +                <span\s+class="branded-page-module-title-text">\s* +                <span[^>]*>.*?%s.*?</span>''' % re.escape(query), +            channel_page, 'list')          url = compat_urlparse.urljoin('https://www.youtube.com/', link)          video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' @@ -1195,6 +1204,11 @@ class YoutubeChannelIE(InfoExtractor):      _MORE_PAGES_INDICATOR = 'yt-uix-load-more'      _MORE_PAGES_URL = 'https://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s'      IE_NAME = 'youtube:channel' +    _TESTS = [{ +        'note': 'paginated channel', +        'url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', +        'playlist_mincount': 91, +    }]      def extract_videos_from_page(self, page):          ids_in_page = [] @@ -1253,6 +1267,17 @@ class YoutubeUserIE(InfoExtractor):      _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'      IE_NAME = 'youtube:user' +    _TESTS = [{ +        'url': 'https://www.youtube.com/user/TheLinuxFoundation', +        'playlist_mincount': 320, +        'info_dict': { +            'title': 'TheLinuxFoundation', +        } +    }, { +        'url': 'ytuser:phihag', +        'only_matching': True, +    }] +      @classmethod      def suitable(cls, url):          # Don't return True if the url can be extracted with other youtube @@ -1361,6 +1386,13 @@ class YoutubeSearchURLIE(InfoExtractor):      IE_DESC = 'YouTube.com search URLs'      IE_NAME = 'youtube:search_url'      _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' +    _TESTS = [{ +        'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', +        'playlist_mincount': 5, +        'info_dict': { +            'title': 'youtube-dl test video', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) @@ -1395,17 +1427,38 @@ class YoutubeSearchURLIE(InfoExtractor):  class YoutubeShowIE(InfoExtractor):      IE_DESC = 'YouTube.com (multi-season) shows' -    _VALID_URL = r'https?://www\.youtube\.com/show/(.*)' +    _VALID_URL = r'https?://www\.youtube\.com/show/(?P<id>[^?#]*)'      IE_NAME = 'youtube:show' +    _TESTS = [{ +        'url': 'http://www.youtube.com/show/airdisasters', +        'playlist_mincount': 3, +        'info_dict': { +            'id': 'airdisasters', +            'title': 'Air Disasters', +        } +    }]      def _real_extract(self, url):          mobj = re.match(self._VALID_URL, url) -        show_name = mobj.group(1) -        webpage = self._download_webpage(url, show_name, 'Downloading show webpage') +        playlist_id = mobj.group('id') +        webpage = self._download_webpage( +            url, playlist_id, 'Downloading show webpage')          # There's one playlist for each season of the show          m_seasons = list(re.finditer(r'href="(/playlist\?list=.*?)"', webpage)) -        self.to_screen('%s: Found %s seasons' % (show_name, len(m_seasons))) -        return [self.url_result('https://www.youtube.com' + season.group(1), 'YoutubePlaylist') for season in m_seasons] +        self.to_screen('%s: Found %s seasons' % (playlist_id, len(m_seasons))) +        entries = [ +            self.url_result( +                'https://www.youtube.com' + season.group(1), 'YoutubePlaylist') +            for season in m_seasons +        ] +        title = self._og_search_title(webpage, fatal=False) + +        return { +            '_type': 'playlist', +            'id': playlist_id, +            'title': title, +            'entries': entries, +        }  class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):  | 
