aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--test/test_YoutubeDL.py46
-rwxr-xr-xyoutube_dl/YoutubeDL.py4
-rw-r--r--youtube_dl/extractor/ccc.py4
-rw-r--r--youtube_dl/extractor/youtube.py153
4 files changed, 96 insertions, 111 deletions
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py
index 82b827536..a13c09ef4 100644
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -12,6 +12,7 @@ import copy
from test.helper import FakeYDL, assertRegexpMatches
from youtube_dl import YoutubeDL
+from youtube_dl.compat import compat_str
from youtube_dl.extractor import YoutubeIE
from youtube_dl.postprocessor.common import PostProcessor
from youtube_dl.utils import match_filter_func
@@ -507,6 +508,51 @@ class TestYoutubeDL(unittest.TestCase):
res = get_videos(f)
self.assertEqual(res, ['1'])
+ def test_playlist_items_selection(self):
+ entries = [{
+ 'id': compat_str(i),
+ 'title': compat_str(i),
+ 'url': TEST_URL,
+ } for i in range(1, 5)]
+ playlist = {
+ '_type': 'playlist',
+ 'id': 'test',
+ 'entries': entries,
+ 'extractor': 'test:playlist',
+ 'extractor_key': 'test:playlist',
+ 'webpage_url': 'http://example.com',
+ }
+
+ def get_ids(params):
+ ydl = YDL(params)
+ # make a copy because the dictionary can be modified
+ ydl.process_ie_result(playlist.copy())
+ return [int(v['id']) for v in ydl.downloaded_info_dicts]
+
+ result = get_ids({})
+ self.assertEqual(result, [1, 2, 3, 4])
+
+ result = get_ids({'playlistend': 10})
+ self.assertEqual(result, [1, 2, 3, 4])
+
+ result = get_ids({'playlistend': 2})
+ self.assertEqual(result, [1, 2])
+
+ result = get_ids({'playliststart': 10})
+ self.assertEqual(result, [])
+
+ result = get_ids({'playliststart': 2})
+ self.assertEqual(result, [2, 3, 4])
+
+ result = get_ids({'playlist_items': '2-4'})
+ self.assertEqual(result, [2, 3, 4])
+
+ result = get_ids({'playlist_items': '2,4'})
+ self.assertEqual(result, [2, 4])
+
+ result = get_ids({'playlist_items': '10'})
+ self.assertEqual(result, [])
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 691f3e09f..5df889945 100755
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -759,7 +759,9 @@ class YoutubeDL(object):
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
if playlistitems:
- entries = [ie_entries[i - 1] for i in playlistitems]
+ entries = [
+ ie_entries[i - 1] for i in playlistitems
+ if -n_all_entries <= i - 1 < n_all_entries]
else:
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
diff --git a/youtube_dl/extractor/ccc.py b/youtube_dl/extractor/ccc.py
index 2a5d4be18..6924eac70 100644
--- a/youtube_dl/extractor/ccc.py
+++ b/youtube_dl/extractor/ccc.py
@@ -16,7 +16,7 @@ class CCCIE(InfoExtractor):
_TEST = {
'url': 'http://media.ccc.de/browse/congress/2013/30C3_-_5443_-_en_-_saal_g_-_201312281830_-_introduction_to_processor_design_-_byterazor.html#video',
- 'md5': '205a365d0d57c0b1e43a12c9ffe8f9be',
+ 'md5': '3a1eda8f3a29515d27f5adb967d7e740',
'info_dict': {
'id': '20131228183',
'ext': 'mp4',
@@ -51,7 +51,7 @@ class CCCIE(InfoExtractor):
matches = re.finditer(r'''(?xs)
<(?:span|div)\s+class='label\s+filetype'>(?P<format>.*?)</(?:span|div)>\s*
- <a\s+href='(?P<http_url>[^']+)'>\s*
+ <a\s+download\s+href='(?P<http_url>[^']+)'>\s*
(?:
.*?
<a\s+href='(?P<torrent_url>[^']+\.torrent)'
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index e58184adc..1f9940cf5 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -49,6 +49,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
# YouTube sets the expire time to about two months
expire_time=time.time() + 2 * 30 * 24 * 3600)
+ def _ids_to_results(self, ids):
+ return [
+ self.url_result(vid_id, 'Youtube', video_id=vid_id)
+ for vid_id in ids]
+
def _login(self):
"""
Attempt to log in to YouTube.
@@ -1261,11 +1266,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
def _real_initialize(self):
self._login()
- def _ids_to_results(self, ids):
- return [
- self.url_result(vid_id, 'Youtube', video_id=vid_id)
- for vid_id in ids]
-
def _extract_mix(self, playlist_id):
# The mixes are generated from a single video
# the id of the playlist is just 'RD' + video_id
@@ -1601,20 +1601,10 @@ class YoutubeShowIE(InfoExtractor):
class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
"""
- Base class for extractors that fetch info from
- http://www.youtube.com/feed_ajax
+ Base class for feed extractors
Subclasses must define the _FEED_NAME and _PLAYLIST_TITLE properties.
"""
_LOGIN_REQUIRED = True
- # use action_load_personal_feed instead of action_load_system_feed
- _PERSONAL_FEED = False
-
- @property
- def _FEED_TEMPLATE(self):
- action = 'action_load_system_feed'
- if self._PERSONAL_FEED:
- action = 'action_load_personal_feed'
- return 'https://www.youtube.com/feed_ajax?%s=1&feed_name=%s&paging=%%s' % (action, self._FEED_NAME)
@property
def IE_NAME(self):
@@ -1624,67 +1614,23 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
self._login()
def _real_extract(self, url):
- feed_entries = []
- paging = 0
- for i in itertools.count(1):
- info = self._download_json(
- self._FEED_TEMPLATE % paging,
- '%s feed' % self._FEED_NAME,
- 'Downloading page %s' % i,
- transform_source=uppercase_escape)
- feed_html = info.get('feed_html') or info.get('content_html')
- load_more_widget_html = info.get('load_more_widget_html') or feed_html
- m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
- ids = orderedSet(m.group(1) for m in m_ids)
- feed_entries.extend(
- self.url_result(video_id, 'Youtube', video_id=video_id)
- for video_id in ids)
- mobj = re.search(
- r'data-uix-load-more-href="/?[^"]+paging=(?P<paging>\d+)',
- load_more_widget_html)
- if mobj is None:
- break
- paging = mobj.group('paging')
- return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)
-
-
-class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
- IE_NAME = 'youtube:recommended'
- IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
- _FEED_NAME = 'recommended'
- _PLAYLIST_TITLE = 'Youtube Recommended videos'
-
-
-class YoutubeWatchLaterIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:watchlater'
- IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
-
- _TESTS = [] # override PlaylistIE tests
-
- def _real_extract(self, url):
- return self._extract_playlist('WL')
-
-
-class YoutubeHistoryIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:history'
- IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
- _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
- _TESTS = []
-
- def _real_extract(self, url):
- title = 'Youtube History'
- page = self._download_webpage('https://www.youtube.com/feed/history', title)
+ page = self._download_webpage(
+ 'https://www.youtube.com/feed/%s' % self._FEED_NAME, self._PLAYLIST_TITLE)
# The extraction process is the same as for playlists, but the regex
# for the video ids doesn't contain an index
ids = []
more_widget_html = content_html = page
-
for page_num in itertools.count(1):
matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
- new_ids = orderedSet(matches)
+
+ # 'recommended' feed has infinite 'load more' and each new portion spins
+ # the same videos in (sometimes) slightly different order, so we'll check
+ # for unicity and break when portion has no new videos
+ new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
+ if not new_ids:
+ break
+
ids.extend(new_ids)
mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
@@ -1692,17 +1638,25 @@ class YoutubeHistoryIE(YoutubePlaylistIE):
break
more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), title,
+ 'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
'Downloading page #%s' % page_num,
transform_source=uppercase_escape)
content_html = more['content_html']
more_widget_html = more['load_more_widget_html']
- return {
- '_type': 'playlist',
- 'title': title,
- 'entries': self._ids_to_results(ids),
- }
+ return self.playlist_result(
+ self._ids_to_results(ids), playlist_title=self._PLAYLIST_TITLE)
+
+
+class YoutubeWatchLaterIE(YoutubePlaylistIE):
+ IE_NAME = 'youtube:watchlater'
+ IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+ _VALID_URL = r'https?://www\.youtube\.com/(?:feed/watch_later|playlist\?list=WL)|:ytwatchlater'
+
+ _TESTS = [] # override PlaylistIE tests
+
+ def _real_extract(self, url):
+ return self._extract_playlist('WL')
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
@@ -1717,42 +1671,25 @@ class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
return self.url_result(playlist_id, 'YoutubePlaylist')
-class YoutubeSubscriptionsIE(YoutubePlaylistIE):
- IE_NAME = 'youtube:subscriptions'
- IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
- _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
- _TESTS = []
-
- def _real_extract(self, url):
- title = 'Youtube Subscriptions'
- page = self._download_webpage('https://www.youtube.com/feed/subscriptions', title)
-
- # The extraction process is the same as for playlists, but the regex
- # for the video ids doesn't contain an index
- ids = []
- more_widget_html = content_html = page
+class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
+ _VALID_URL = r'https?://www\.youtube\.com/feed/recommended|:ytrec(?:ommended)?'
+ _FEED_NAME = 'recommended'
+ _PLAYLIST_TITLE = 'Youtube Recommended videos'
- for page_num in itertools.count(1):
- matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
- new_ids = orderedSet(matches)
- ids.extend(new_ids)
- mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)
- if not mobj:
- break
+class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'YouTube.com subscriptions feed, "ytsubs" keyword (requires authentication)'
+ _VALID_URL = r'https?://www\.youtube\.com/feed/subscriptions|:ytsubs(?:criptions)?'
+ _FEED_NAME = 'subscriptions'
+ _PLAYLIST_TITLE = 'Youtube Subscriptions'
- more = self._download_json(
- 'https://youtube.com/%s' % mobj.group('more'), title,
- 'Downloading page #%s' % page_num,
- transform_source=uppercase_escape)
- content_html = more['content_html']
- more_widget_html = more['load_more_widget_html']
- return {
- '_type': 'playlist',
- 'title': title,
- 'entries': self._ids_to_results(ids),
- }
+class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
+ IE_DESC = 'Youtube watch history, ":ythistory" for short (requires authentication)'
+ _VALID_URL = 'https?://www\.youtube\.com/feed/history|:ythistory'
+ _FEED_NAME = 'history'
+ _PLAYLIST_TITLE = 'Youtube History'
class YoutubeTruncatedURLIE(InfoExtractor):