diff options
Diffstat (limited to 'youtube_dl/extractor/qqmusic.py')
| -rw-r--r-- | youtube_dl/extractor/qqmusic.py | 102 | 
1 files changed, 87 insertions, 15 deletions
diff --git a/youtube_dl/extractor/qqmusic.py b/youtube_dl/extractor/qqmusic.py index bafa81c21..1654a641f 100644 --- a/youtube_dl/extractor/qqmusic.py +++ b/youtube_dl/extractor/qqmusic.py @@ -9,12 +9,14 @@ from .common import InfoExtractor  from ..utils import (      strip_jsonp,      unescapeHTML, +    clean_html,  )  from ..compat import compat_urllib_request  class QQMusicIE(InfoExtractor):      IE_NAME = 'qqmusic' +    IE_DESC = 'QQ音乐'      _VALID_URL = r'http://y.qq.com/#type=song&mid=(?P<id>[0-9A-Za-z]+)'      _TESTS = [{          'url': 'http://y.qq.com/#type=song&mid=004295Et37taLD', @@ -26,6 +28,20 @@ class QQMusicIE(InfoExtractor):              'upload_date': '20141227',              'creator': '林俊杰',              'description': 'md5:d327722d0361576fde558f1ac68a7065', +            'thumbnail': 're:^https?://.*\.jpg$', +        } +    }, { +        'note': 'There is no mp3-320 version of this song.', +        'url': 'http://y.qq.com/#type=song&mid=004MsGEo3DdNxV', +        'md5': 'fa3926f0c585cda0af8fa4f796482e3e', +        'info_dict': { +            'id': '004MsGEo3DdNxV', +            'ext': 'mp3', +            'title': '如果', +            'upload_date': '20050626', +            'creator': '李季美', +            'description': 'md5:46857d5ed62bc4ba84607a805dccf437', +            'thumbnail': 're:^https?://.*\.jpg$',          }      }] @@ -68,6 +84,14 @@ class QQMusicIE(InfoExtractor):          if lrc_content:              lrc_content = lrc_content.replace('\\n', '\n') +        thumbnail_url = None +        albummid = self._search_regex( +            [r'albummid:\'([0-9a-zA-Z]+)\'', r'"albummid":"([0-9a-zA-Z]+)"'], +            detail_info_page, 'album mid', default=None) +        if albummid: +            thumbnail_url = "http://i.gtimg.cn/music/photo/mid_album_500/%s/%s/%s.jpg" \ +                            % (albummid[-2:-1], albummid[-1], albummid) +          guid = self.m_r_get_ruin()          vkey = self._download_json( @@ -85,6 +109,7 @@ class QQMusicIE(InfoExtractor):                  'preference': details['preference'],                  'abr': details.get('abr'),              }) +        self._check_formats(formats, mid)          self._sort_formats(formats)          return { @@ -94,6 +119,7 @@ class QQMusicIE(InfoExtractor):              'upload_date': publish_time,              'creator': singer,              'description': lrc_content, +            'thumbnail': thumbnail_url,          } @@ -117,6 +143,7 @@ class QQPlaylistBaseIE(InfoExtractor):  class QQMusicSingerIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:singer' +    IE_DESC = 'QQ音乐 - 歌手'      _VALID_URL = r'http://y.qq.com/#type=singer&mid=(?P<id>[0-9A-Za-z]+)'      _TEST = {          'url': 'http://y.qq.com/#type=singer&mid=001BLpXF2DyJe2', @@ -161,39 +188,50 @@ class QQMusicSingerIE(QQPlaylistBaseIE):  class QQMusicAlbumIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:album' +    IE_DESC = 'QQ音乐 - 专辑'      _VALID_URL = r'http://y.qq.com/#type=album&mid=(?P<id>[0-9A-Za-z]+)' -    _TEST = { -        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1&play=0', +    _TESTS = [{ +        'url': 'http://y.qq.com/#type=album&mid=000gXCTb2AhRR1',          'info_dict': {              'id': '000gXCTb2AhRR1',              'title': '我们都是这样长大的', -            'description': 'md5:d216c55a2d4b3537fe4415b8767d74d6', +            'description': 'md5:179c5dce203a5931970d306aa9607ea6',          },          'playlist_count': 4, -    } +    }, { +        'url': 'http://y.qq.com/#type=album&mid=002Y5a3b3AlCu3', +        'info_dict': { +            'id': '002Y5a3b3AlCu3', +            'title': '그리고...', +            'description': 'md5:a48823755615508a95080e81b51ba729', +        }, +        'playlist_count': 8, +    }]      def _real_extract(self, url):          mid = self._match_id(url) -        album_page = self._download_webpage( -            self.qq_static_url('album', mid), mid, 'Download album page') +        album = self._download_json( +            'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg?albummid=%s&format=json' % mid, +            mid, 'Download album page')['data'] -        entries = self.get_entries_from_page(album_page) - -        album_name = self._html_search_regex( -            r"albumname\s*:\s*'([^']+)',", album_page, 'album name', -            default=None) - -        album_detail = self._html_search_regex( -            r'<div class="album_detail close_detail">\s*<p>((?:[^<>]+(?:<br />)?)+)</p>', -            album_page, 'album details', default=None) +        entries = [ +            self.url_result( +                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] +            ) for song in album['list'] +        ] +        album_name = album.get('name') +        album_detail = album.get('desc') +        if album_detail is not None: +            album_detail = album_detail.strip()          return self.playlist_result(entries, mid, album_name, album_detail)  class QQMusicToplistIE(QQPlaylistBaseIE):      IE_NAME = 'qqmusic:toplist' +    IE_DESC = 'QQ音乐 - 排行榜'      _VALID_URL = r'http://y\.qq\.com/#type=toplist&p=(?P<id>(top|global)_[0-9]+)'      _TESTS = [{ @@ -243,3 +281,37 @@ class QQMusicToplistIE(QQPlaylistBaseIE):          list_name = topinfo.get('ListName')          list_description = topinfo.get('info')          return self.playlist_result(entries, list_id, list_name, list_description) + + +class QQMusicPlaylistIE(QQPlaylistBaseIE): +    IE_NAME = 'qqmusic:playlist' +    IE_DESC = 'QQ音乐 - 歌单' +    _VALID_URL = r'http://y\.qq\.com/#type=taoge&id=(?P<id>[0-9]+)' + +    _TEST = { +        'url': 'http://y.qq.com/#type=taoge&id=3462654915', +        'info_dict': { +            'id': '3462654915', +            'title': '韩国5月新歌精选下旬', +            'description': 'md5:d2c9d758a96b9888cf4fe82f603121d4', +        }, +        'playlist_count': 40, +    } + +    def _real_extract(self, url): +        list_id = self._match_id(url) + +        list_json = self._download_json( +            'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg?type=1&json=1&utf8=1&onlysong=0&disstid=%s' +            % list_id, list_id, 'Download list page', +            transform_source=strip_jsonp)['cdlist'][0] + +        entries = [ +            self.url_result( +                'http://y.qq.com/#type=song&mid=' + song['songmid'], 'QQMusic', song['songmid'] +            ) for song in list_json['songlist'] +        ] + +        list_name = list_json.get('dissname') +        list_description = clean_html(unescapeHTML(list_json.get('desc'))) +        return self.playlist_result(entries, list_id, list_name, list_description)  | 
