diff options
author | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-23 17:17:36 +0100 |
---|---|---|
committer | Jaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com> | 2014-02-23 17:17:36 +0100 |
commit | bc2f773b4f738c7fdaac1786d4796c69c5b57295 (patch) | |
tree | 657cf6fb181e1f9ddc6c9831e534b830aa68c694 /youtube_dl/extractor | |
parent | f919201ecccc6ac208b86d281439eb669d373765 (diff) |
[youtube:playlist] Fix mixes extraction (fixes #2444)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index cf0bd4ae3..f868b1929 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1488,11 +1488,16 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor): # the id of the playlist is just 'RD' + video_id url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[-11:], playlist_id) webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix') - title_span = (get_element_by_attribute('class', 'title long-title', webpage) or - get_element_by_attribute('class', 'title ', webpage)) + search_title = lambda class_name: get_element_by_attribute('class', class_name, webpage) + title_span = (search_title('playlist-title') or + search_title('title long-title') or search_title('title')) title = clean_html(title_span) - video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s' % re.escape(playlist_id) - ids = orderedSet(re.findall(video_re, webpage)) + video_re = r'''(?x)data-index="\d+".*? + data-video-username="(.*?)".*? + href="/watch\?v=([0-9A-Za-z_-]{11})&[^"]*?list=%s''' % re.escape(playlist_id) + matches = orderedSet(re.findall(video_re, webpage, flags=re.DOTALL)) + # Some of the videos may have beend deleted, their username field is empty + ids = [video_id for (username, video_id) in matches if username] url_results = self._ids_to_results(ids) return self.playlist_result(url_results, playlist_id, title) |