From c44c7895b8774fb819b0b664bfcf64a7ebeea4e8 Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 25 Sep 2015 11:28:26 +0100 Subject: [kuwo] fix title extraction and update test --- youtube_dl/extractor/kuwo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/kuwo.py') diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index fa233377d..51137a982 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -57,6 +57,7 @@ class KuwoIE(KuwoBaseIE): 'upload_date': '20080122', 'description': 'md5:ed13f58e3c3bf3f7fd9fbc4e5a7aa75c' }, + 'skip': 'this song has been offline because of copyright issues', }, { 'url': 'http://www.kuwo.cn/yinyue/6446136/', 'info_dict': { @@ -78,7 +79,7 @@ class KuwoIE(KuwoBaseIE): errnote='Unable to get song detail info') song_name = self._html_search_regex( - r']+title="([^"]+)">', webpage, 'song name') + r'(?s)class="[^"]*title[^"]*".*?]+title="([^"]+)"', webpage, 'song name') singer_name = self._html_search_regex( r']+class="s_img">\s*]+title="([^>]+)"', webpage, 'singer name', fatal=False) -- cgit v1.2.3 From 3d09aa4c82100649279d979f9910a8c84ba301ff Mon Sep 17 00:00:00 2001 From: remitamine Date: Fri, 25 Sep 2015 11:40:32 +0100 Subject: [kuwo] extract title inside element with class title exactly --- youtube_dl/extractor/kuwo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/kuwo.py') diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 51137a982..a3c260838 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -79,7 +79,7 @@ class KuwoIE(KuwoBaseIE): errnote='Unable to get song detail info') song_name = self._html_search_regex( - r'(?s)class="[^"]*title[^"]*".*?]+title="([^"]+)"', webpage, 'song name') + r'(?s)class="(?:[^" ]+ +)*title(?: +[^" ]+)*".*?]+title="([^"]+)"', webpage, 'song name') singer_name = self._html_search_regex( r']+class="s_img">\s*]+title="([^>]+)"', webpage, 'singer name', fatal=False) -- cgit v1.2.3 From 5db34f680f93917ef79ba59d501b9f82e6d44330 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 10:31:32 +0100 Subject: [kuwo] check for the offline error page --- youtube_dl/extractor/kuwo.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'youtube_dl/extractor/kuwo.py') diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index a3c260838..36cb265ab 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -77,9 +77,11 @@ class KuwoIE(KuwoBaseIE): webpage = self._download_webpage( url, song_id, note='Download song detail info', errnote='Unable to get song detail info') + if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: + raise ExtractorError('this song has been offline because of copyright issues') song_name = self._html_search_regex( - r'(?s)class="(?:[^" ]+ +)*title(?: +[^" ]+)*".*?]+title="([^"]+)"', webpage, 'song name') + r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?]+title="([^"]+)"', webpage, 'song name') singer_name = self._html_search_regex( r']+class="s_img">\s*]+title="([^>]+)"', webpage, 'singer name', fatal=False) -- cgit v1.2.3 From 7193650641f3fd84872b231e48263752d10b37b7 Mon Sep 17 00:00:00 2001 From: remitamine Date: Sat, 26 Sep 2015 11:44:35 +0100 Subject: [kuwo] treat the offline error as an expected ExtractorError --- youtube_dl/extractor/kuwo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube_dl/extractor/kuwo.py') diff --git a/youtube_dl/extractor/kuwo.py b/youtube_dl/extractor/kuwo.py index 36cb265ab..0c8ed5d07 100644 --- a/youtube_dl/extractor/kuwo.py +++ b/youtube_dl/extractor/kuwo.py @@ -78,7 +78,7 @@ class KuwoIE(KuwoBaseIE): url, song_id, note='Download song detail info', errnote='Unable to get song detail info') if '对不起,该歌曲由于版权问题已被下线,将返回网站首页' in webpage: - raise ExtractorError('this song has been offline because of copyright issues') + raise ExtractorError('this song has been offline because of copyright issues', expected=True) song_name = self._html_search_regex( r'(?s)class="(?:[^"\s]+\s+)*title(?:\s+[^"\s]+)*".*?]+title="([^"]+)"', webpage, 'song name') -- cgit v1.2.3