diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-08-31 00:29:49 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-08-31 00:29:49 +0700 |
commit | 64fc49aba018ebd51627ddcc92f8fa88f2c499cc (patch) | |
tree | 1ab4bb33b300365e31ff32413096db90b8ed7be1 | |
parent | 245023a86145f7074dacdab4c735dea268d766ce (diff) |
[bandcamp:album] Fix title extraction (Closes #10455)
-rw-r--r-- | youtube_dl/extractor/bandcamp.py | 16 |
1 files changed, 14 insertions, 2 deletions
diff --git a/youtube_dl/extractor/bandcamp.py b/youtube_dl/extractor/bandcamp.py index 991ab0676..249c3d956 100644 --- a/youtube_dl/extractor/bandcamp.py +++ b/youtube_dl/extractor/bandcamp.py @@ -162,6 +162,15 @@ class BandcampAlbumIE(InfoExtractor): 'uploader_id': 'dotscale', }, 'playlist_mincount': 7, + }, { + # with escaped quote in title + 'url': 'https://jstrecords.bandcamp.com/album/entropy-ep', + 'info_dict': { + 'title': '"Entropy" EP', + 'uploader_id': 'jstrecords', + 'id': 'entropy-ep', + }, + 'playlist_mincount': 3, }] def _real_extract(self, url): @@ -176,8 +185,11 @@ class BandcampAlbumIE(InfoExtractor): entries = [ self.url_result(compat_urlparse.urljoin(url, t_path), ie=BandcampIE.ie_key()) for t_path in tracks_paths] - title = self._search_regex( - r'album_title\s*:\s*"(.*?)"', webpage, 'title', fatal=False) + title = self._html_search_regex( + r'album_title\s*:\s*"((?:\\.|[^"\\])+?)"', + webpage, 'title', fatal=False) + if title: + title = title.replace(r'\"', '"') return { '_type': 'playlist', 'uploader_id': uploader_id, |