aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2013-11-22 22:46:46 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2013-11-22 22:46:46 +0100
commit7012b23c947fc1ed146e314a30d3c70a5fde70e7 (patch)
treec42cb6445eb221bdb416454a29b44608517351b5
parent50123be4211e2c16aa5d2fc9ebadbaf72a9becce (diff)
Match --download-archive during playlist processing (Fixes #1745)
-rw-r--r--test/test_youtube_lists.py6
-rw-r--r--youtube_dl/YoutubeDL.py43
-rw-r--r--youtube_dl/extractor/common.py4
-rw-r--r--youtube_dl/extractor/youtube.py26
4 files changed, 52 insertions, 27 deletions
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 50ad52695..938517a2d 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -84,16 +84,16 @@ class TestYoutubeLists(unittest.TestCase):
dl = FakeYDL()
ie = YoutubeChannelIE(dl)
#test paginated channel
- result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0]
+ result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')
self.assertTrue(len(result['entries']) > 90)
#test autogenerated channel
- result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0]
+ result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')
self.assertTrue(len(result['entries']) >= 18)
def test_youtube_user(self):
dl = FakeYDL()
ie = YoutubeUserIE(dl)
- result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0]
+ result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')
self.assertTrue(len(result['entries']) >= 320)
def test_youtube_safe_search(self):
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 2700051cf..beb7d0cd1 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -355,15 +355,17 @@ class YoutubeDL(object):
def _match_entry(self, info_dict):
""" Returns None iff the file should be downloaded """
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
date = info_dict.get('upload_date', None)
if date is not None:
dateRange = self.params.get('daterange', DateRange())
@@ -374,8 +376,8 @@ class YoutubeDL(object):
if age_limit < info_dict.get('age_limit', 0):
return u'Skipping "' + title + '" because it is age restricted'
if self.in_download_archive(info_dict):
- return (u'%(title)s has already been recorded in archive'
- % info_dict)
+ return (u'%s has already been recorded in archive'
+ % info_dict.get('title', info_dict.get('id', u'video')))
return None
@staticmethod
@@ -454,7 +456,7 @@ class YoutubeDL(object):
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'playlist':
- self.add_extra_info(ie_result, extra_info)
+
# We process each entry in the playlist
playlist = ie_result.get('title', None) or ie_result.get('id', None)
self.to_screen(u'[download] Downloading playlist: %s' % playlist)
@@ -484,6 +486,12 @@ class YoutubeDL(object):
'webpage_url': ie_result['webpage_url'],
'extractor_key': ie_result['extractor_key'],
}
+
+ reason = self._match_entry(entry)
+ if reason is not None:
+ self.to_screen(u'[download] ' + reason)
+ continue
+
entry_result = self.process_ie_result(entry,
download=download,
extra_info=extra)
@@ -810,7 +818,16 @@ class YoutubeDL(object):
fn = self.params.get('download_archive')
if fn is None:
return False
- vid_id = info_dict['extractor'] + u' ' + info_dict['id']
+ extractor = info_dict.get('extractor_id')
+ if extractor is None:
+ if 'id' in info_dict:
+ extractor = info_dict.get('ie_key') # key in a playlist
+ if extractor is None:
+ return False # Incomplete video information
+ # Future-proof against any change in case
+ # and backwards compatibility with prior versions
+ extractor = extractor.lower()
+ vid_id = extractor + u' ' + info_dict['id']
try:
with locked_file(fn, 'r', encoding='utf-8') as archive_file:
for line in archive_file:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index eb3435c77..3cebeaf29 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -229,12 +229,14 @@ class InfoExtractor(object):
self.to_screen(u'Logging in')
#Methods for following #608
- def url_result(self, url, ie=None):
+ def url_result(self, url, ie=None, video_id=None):
"""Returns a url that points to a page that should be processed"""
#TODO: ie should be the class used for getting the info
video_info = {'_type': 'url',
'url': url,
'ie_key': ie}
+ if video_id is not None:
+ video_info['id'] = video_id
return video_info
def playlist_result(self, entries, playlist_id=None, playlist_title=None):
"""Returns a playlist"""
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 9b09793eb..126688652 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1552,7 +1552,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
video_id = query_dict['v'][0]
if self._downloader.params.get('noplaylist'):
self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id)
- return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube')
+ return self.url_result(video_id, 'Youtube', video_id=video_id)
else:
self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id))
@@ -1571,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):
playlist_title = self._og_search_title(page)
- url_results = [self.url_result(vid, 'Youtube') for vid in ids]
+ url_results = [self.url_result(video_id, 'Youtube', video_id=video_id)
+ for video_id in ids]
return self.playlist_result(url_results, playlist_id, playlist_title)
@@ -1626,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):
self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids)))
- urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids]
- url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls]
- return [self.playlist_result(url_entries, channel_id)]
+ url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id)
+ for video_id in video_ids]
+ return self.playlist_result(url_entries, channel_id)
class YoutubeUserIE(InfoExtractor):
@@ -1692,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):
if len(ids_in_page) < self._GDATA_PAGE_SIZE:
break
- urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids]
- url_results = [self.url_result(rurl, 'Youtube') for rurl in urls]
- return [self.playlist_result(url_results, playlist_title = username)]
+ url_results = [
+ self.url_result(video_id, 'Youtube', video_id=video_id)
+ for video_id in video_ids]
+ return self.playlist_result(url_results, playlist_title=username)
+
class YoutubeSearchIE(SearchInfoExtractor):
IE_DESC = u'YouTube.com searches'
@@ -1735,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):
if len(video_ids) > n:
video_ids = video_ids[:n]
- videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids]
+ videos = [self.url_result(video_id, 'Youtube', video_id=video_id)
+ for video_id in video_ids]
return self.playlist_result(videos, query)
class YoutubeSearchDateIE(YoutubeSearchIE):
@@ -1795,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
feed_html = info['feed_html']
m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)
ids = orderedSet(m.group(1) for m in m_ids)
- feed_entries.extend(self.url_result(id, 'Youtube') for id in ids)
+ feed_entries.extend(
+ self.url_result(video_id, 'Youtube', video_id=video_id)
+ for video_id in ids)
if info['paging'] is None:
break
return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE)