diff options
Diffstat (limited to 'youtube_dl/extractor/qqmusic.py')
-rw-r--r-- | youtube_dl/extractor/qqmusic.py | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index 174c8e0ae..d4a85d8c3 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,6 +9,7 @@ from .common import InfoExtractor from ..utils import ( strip_jsonp, unescapeHTML, + js_to_json, ) from ..compat import compat_urllib_request @@ -168,3 +169,57 @@ class QQMusicAlbumIE(QQPlaylistBaseIE): album_page, 'album details', default=None) return self.playlist_result(entries, mid, album_name, album_detail) + + +class QQMusicToplistIE(QQPlaylistBaseIE): + _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)' + + _TESTS = [{ + 'url': 'http://y.qq.com/#type=toplist&p=global_12', + 'info_dict': { + 'id': 'global_12', + 'title': 'itunes榜', + }, + 'playlist_count': 10, + }, { + 'url': 'http://y.qq.com/#type=toplist&p=top_6', + 'info_dict': { + 'id': 'top_6', + 'title': 'QQ音乐巅峰榜·欧美', + }, + 'playlist_count': 100, + }] + + @staticmethod + def strip_qq_jsonp(code): + return js_to_json(re.sub(r'^MusicJsonCallback\((.*?)\)/\*.+?\*/$', r'\1', code)) + + def _real_extract(self, url): + list_id = self._match_id(url) + + list_type = list_id.split("_")[0] + num_id = list_id.split("_")[1] + + list_page = self._download_webpage("http://y.qq.com/y/static/toplist/index/%s.html" % list_id, list_id, 'Download toplist page') + entries = [] + if list_type == 'top': + list = self._download_json( + "http://y.qq.com/y/static/toplist/json/top/%s/1.js" % num_id, + list_id, note='Retrieve toplist json', errnote='Unable to get toplist json', transform_source=self.strip_qq_jsonp) + + for song in list['l']: + s = song['s'] + song_mid = s.split("|")[20] + entries.append(self.url_result( + 'http://y.qq.com/#type=song&mid=' + song_mid, 'QQMusic', + song_mid)) + + elif list_type == 'global': + entries = self.get_entries_from_page(list_page) + + list_name = self._html_search_regex( + r'<h2 id="top_name">([^\']+)</h2>', list_page, 'top list name', + default=None) + list_desc = None + + return self.playlist_result(entries, list_id, list_name, list_desc)
\ No newline at end of file |