aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-26 21:35:03 +0100
committerJaime Marquínez Ferrándiz <jaime.marquinez.ferrandiz@gmail.com>2013-11-26 21:35:03 +0100
commit652cdaa269725dfbf9effdc18a8fd0b369100399 (patch)
treedbadaf3e287b6df6eda2cd71e3db6f86c72cfc97
parente26f8712289c727a43d74a4669aee4924b9f75f2 (diff)
downloadyoutube-dl-652cdaa269725dfbf9effdc18a8fd0b369100399.tar.xz
[youtube:playlist] Add support for YouTube mixes (fixes #1839)
-rw-r--r--test/test_youtube_lists.py9
-rw-r--r--youtube_dl/extractor/youtube.py24
2 files changed, 31 insertions, 2 deletions
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 8fd073f31..95f07d129 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -107,5 +107,14 @@ class TestYoutubeLists(unittest.TestCase):
result = ie.extract('http://www.youtube.com/show/airdisasters')
self.assertTrue(len(result) >= 3)
+ def test_youtube_mix(self):
+ dl = FakeYDL()
+ ie = YoutubePlaylistIE(dl)
+ result = ie.extract('http://www.youtube.com/watch?v=lLJf9qJHR3E&list=RDrjFaenf1T-Y')
+ entries = result['entries']
+ self.assertTrue(len(entries) >= 20)
+ original_video = entries[0]
+ self.assertEqual(original_video['id'], 'rjFaenf1T-Y')
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index a76a9071a..9ef5fecce 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -28,6 +28,7 @@ from ..utils import (
clean_html,
get_cachedir,
get_element_by_id,
+ get_element_by_attribute,
ExtractorError,
unescapeHTML,
unified_strdate,
@@ -1537,6 +1538,22 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
+ def _ids_to_results(self, ids):
+ return [self.url_result(vid_id, 'Youtube', video_id=vid_id)
+ for vid_id in ids]
+
+ def _extract_mix(self, playlist_id):
+ # The mixes are generated from a a single video
+ # the id of the playlist is just 'RD' + video_id
+ url = 'https://youtube.com/watch?v=%s&list=%s' % (playlist_id[2:], playlist_id)
+ webpage = self._download_webpage(url, playlist_id, u'Downloading Youtube mix')
+ title = clean_html(get_element_by_attribute('class', 'title long-title', webpage))
+ video_re = r'data-index="\d+".*?href="/watch\?v=([0-9A-Za-z_-]{11})&amp;[^"]*?list=%s' % re.escape(playlist_id)
+ ids = orderedSet(re.findall(video_re, webpage))
+ url_results = self._ids_to_results(ids)
+
+ return self.playlist_result(url_results, playlist_id, title)
+
def _real_extract(self, url):
# Extract playlist id
mobj = re.match(self._VALID_URL, url, re.VERBOSE)
@@ -1554,6 +1571,10 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
+ if len(playlist_id) == 13: # 'RD' + 11 characters for the video id
+ # Mixes require a custom extraction process
+ return self._extract_mix(playlist_id)
+
# Extract the video ids from the playlist pages
ids = []
@@ -1571,8 +1592,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
playlist_title = self._og_search_title(page)
- url_results = [self.url_result(vid_id, 'Youtube', video_id=vid_id)
- for vid_id in ids]
+ url_results = self._ids_to_results(ids)
return self.playlist_result(url_results, playlist_id, playlist_title)