diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/extractor/kuwo.py | 53 | 
1 files changed, 17 insertions, 36 deletions
diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 6a96a1aa4..82d5f3f95 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -180,71 +180,52 @@ class KuwoChartIE(InfoExtractor):  class KuwoSingerIE(InfoExtractor):      IE_NAME = 'kuwo:singer' -    _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+?)/$' -    _TEST = { +    _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+)' +    _TESTS = [{          'url': 'http://www.kuwo.cn/mingxing/bruno+mars/',          'info_dict': {              'id': 'bruno+mars',              'title': 'Bruno Mars',          },          'playlist_count': 10, -    } - -    def _real_extract(self, url): -        singer_id = self._match_id(url) -        webpage = self._download_webpage( -            url, singer_id, note='Download singer info', -            errnote='Unable to get singer info') - -        singer_name = self._html_search_regex( -            r'姓名:<span>(.+?)</span>', webpage, 'singer name') - -        entries = [ -            self.url_result("http://www.kuwo.cn/yinyue/%s/" % song_id, 'Kuwo', song_id) -            for song_id in re.findall( -                r'<a href="http://www\.kuwo\.cn/yinyue/([0-9]+)/" .+?>.+?</a>', -                webpage, flags=re.DOTALL) -        ] -        return self.playlist_result(entries, singer_id, singer_name) - - -class KuwoSingerMusicIE(InfoExtractor): -    IE_NAME = 'kuwo:singermusic' -    _VALID_URL = r'http://www\.kuwo\.cn/mingxing/(?P<id>[^/]+?)/music(_[0-9]+)?.htm' -    _TEST = { +    }, {          'url': 'http://www.kuwo.cn/mingxing/Ali/music.htm',          'info_dict': {              'id': 'Ali', -            'title': 'Ali的热门歌曲', +            'title': 'Ali',          },          'playlist_mincount': 95, -    } +    }]      def _real_extract(self, url):          singer_id = self._match_id(url) +        webpage = self._download_webpage( +            url, singer_id, note='Download singer info', +            errnote='Unable to get singer info') + +        singer_name = self._html_search_regex( +            r'<div class="title clearfix">[\n\s\t]*?<h1>(.+?)<span', webpage, 'singer name' +        ) -        list_name = None          entries = [] +        first_page_only = False if re.match(r'.+/music(?:_[0-9]+)?\.htm', url) else True          for page_num in itertools.count(1):              webpage = self._download_webpage(                  'http://www.kuwo.cn/mingxing/%s/music_%d.htm' % (singer_id, page_num),                  singer_id, note='Download song list page #%d' % page_num,                  errnote='Unable to get song list page #%d' % page_num) -            if list_name is None: -                list_name = self._html_search_regex( -                    r'<h1>([^<>]+)<span>', webpage, 'list name') -              entries.extend([                  self.url_result("http://www.kuwo.cn/yinyue/%s/" % song_id, 'Kuwo', song_id)                  for song_id in re.findall(                      r'<p class="m_name"><a href="http://www\.kuwo\.cn/yinyue/([0-9]+)/',                      webpage) -            ]) -            if not re.search(r'<a href="[^"]+">下一页</a>', webpage): +            ][:10 if first_page_only else None]) + +            if first_page_only or not re.search(r'<a href="[^"]+">下一页</a>', webpage):                  break -        return self.playlist_result(entries, singer_id, list_name) +        return self.playlist_result(entries, singer_id, singer_name)  class KuwoCategoryIE(InfoExtractor):  | 
