diff options
| -rw-r--r-- | test/test_youtube_lists.py | 6 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 43 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 4 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 26 | 
4 files changed, 52 insertions, 27 deletions
| diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 50ad52695..938517a2d 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -84,16 +84,16 @@ class TestYoutubeLists(unittest.TestCase):          dl = FakeYDL()          ie = YoutubeChannelIE(dl)          #test paginated channel -        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')[0] +        result = ie.extract('https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w')          self.assertTrue(len(result['entries']) > 90)          #test autogenerated channel -        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')[0] +        result = ie.extract('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')          self.assertTrue(len(result['entries']) >= 18)      def test_youtube_user(self):          dl = FakeYDL()          ie = YoutubeUserIE(dl) -        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] +        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')          self.assertTrue(len(result['entries']) >= 320)      def test_youtube_safe_search(self): diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 2700051cf..beb7d0cd1 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -355,15 +355,17 @@ class YoutubeDL(object):      def _match_entry(self, info_dict):          """ Returns None iff the file should be downloaded """ -        title = info_dict['title'] -        matchtitle = self.params.get('matchtitle', False) -        if matchtitle: -            if not re.search(matchtitle, title, re.IGNORECASE): -                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' -        rejecttitle = self.params.get('rejecttitle', False) -        if rejecttitle: -            if re.search(rejecttitle, title, re.IGNORECASE): -                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' +        if 'title' in info_dict: +            # This can happen when we're just evaluating the playlist +            title = info_dict['title'] +            matchtitle = self.params.get('matchtitle', False) +            if matchtitle: +                if not re.search(matchtitle, title, re.IGNORECASE): +                    return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' +            rejecttitle = self.params.get('rejecttitle', False) +            if rejecttitle: +                if re.search(rejecttitle, title, re.IGNORECASE): +                    return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'          date = info_dict.get('upload_date', None)          if date is not None:              dateRange = self.params.get('daterange', DateRange()) @@ -374,8 +376,8 @@ class YoutubeDL(object):              if age_limit < info_dict.get('age_limit', 0):                  return u'Skipping "' + title + '" because it is age restricted'          if self.in_download_archive(info_dict): -            return (u'%(title)s has already been recorded in archive' -                    % info_dict) +            return (u'%s has already been recorded in archive' +                    % info_dict.get('title', info_dict.get('id', u'video')))          return None      @staticmethod @@ -454,7 +456,7 @@ class YoutubeDL(object):                                       ie_key=ie_result.get('ie_key'),                                       extra_info=extra_info)          elif result_type == 'playlist': -            self.add_extra_info(ie_result, extra_info) +              # We process each entry in the playlist              playlist = ie_result.get('title', None) or ie_result.get('id', None)              self.to_screen(u'[download] Downloading playlist: %s' % playlist) @@ -484,6 +486,12 @@ class YoutubeDL(object):                      'webpage_url': ie_result['webpage_url'],                      'extractor_key': ie_result['extractor_key'],                  } + +                reason = self._match_entry(entry) +                if reason is not None: +                    self.to_screen(u'[download] ' + reason) +                    continue +                  entry_result = self.process_ie_result(entry,                                                        download=download,                                                        extra_info=extra) @@ -810,7 +818,16 @@ class YoutubeDL(object):          fn = self.params.get('download_archive')          if fn is None:              return False -        vid_id = info_dict['extractor'] + u' ' + info_dict['id'] +        extractor = info_dict.get('extractor_id') +        if extractor is None: +            if 'id' in info_dict: +                extractor = info_dict.get('ie_key')  # key in a playlist +        if extractor is None: +            return False  # Incomplete video information +        # Future-proof against any change in case +        # and backwards compatibility with prior versions +        extractor = extractor.lower() +        vid_id = extractor + u' ' + info_dict['id']          try:              with locked_file(fn, 'r', encoding='utf-8') as archive_file:                  for line in archive_file: diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index eb3435c77..3cebeaf29 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -229,12 +229,14 @@ class InfoExtractor(object):          self.to_screen(u'Logging in')      #Methods for following #608 -    def url_result(self, url, ie=None): +    def url_result(self, url, ie=None, video_id=None):          """Returns a url that points to a page that should be processed"""          #TODO: ie should be the class used for getting the info          video_info = {'_type': 'url',                        'url': url,                        'ie_key': ie} +        if video_id is not None: +            video_info['id'] = video_id          return video_info      def playlist_result(self, entries, playlist_id=None, playlist_title=None):          """Returns a playlist""" diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 9b09793eb..126688652 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1552,7 +1552,7 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):              video_id = query_dict['v'][0]              if self._downloader.params.get('noplaylist'):                  self.to_screen(u'Downloading just video %s because of --no-playlist' % video_id) -                return self.url_result('https://www.youtube.com/watch?v=' + video_id, 'Youtube') +                return self.url_result(video_id, 'Youtube', video_id=video_id)              else:                  self.to_screen(u'Downloading playlist PL%s - add --no-playlist to just download video %s' % (playlist_id, video_id)) @@ -1571,7 +1571,8 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          playlist_title = self._og_search_title(page) -        url_results = [self.url_result(vid, 'Youtube') for vid in ids] +        url_results = [self.url_result(video_id, 'Youtube', video_id=video_id) +                       for video_id in ids]          return self.playlist_result(url_results, playlist_id, playlist_title) @@ -1626,9 +1627,9 @@ class YoutubeChannelIE(InfoExtractor):          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) -        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] -        url_entries = [self.url_result(eurl, 'Youtube') for eurl in urls] -        return [self.playlist_result(url_entries, channel_id)] +        url_entries = [self.url_result(video_id, 'Youtube', video_id=video_id) +                       for video_id in video_ids] +        return self.playlist_result(url_entries, channel_id)  class YoutubeUserIE(InfoExtractor): @@ -1692,9 +1693,11 @@ class YoutubeUserIE(InfoExtractor):              if len(ids_in_page) < self._GDATA_PAGE_SIZE:                  break -        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] -        url_results = [self.url_result(rurl, 'Youtube') for rurl in urls] -        return [self.playlist_result(url_results, playlist_title = username)] +        url_results = [ +            self.url_result(video_id, 'Youtube', video_id=video_id) +            for video_id in video_ids] +        return self.playlist_result(url_results, playlist_title=username) +  class YoutubeSearchIE(SearchInfoExtractor):      IE_DESC = u'YouTube.com searches' @@ -1735,7 +1738,8 @@ class YoutubeSearchIE(SearchInfoExtractor):          if len(video_ids) > n:              video_ids = video_ids[:n] -        videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] +        videos = [self.url_result(video_id, 'Youtube', video_id=video_id) +                  for video_id in video_ids]          return self.playlist_result(videos, query)  class YoutubeSearchDateIE(YoutubeSearchIE): @@ -1795,7 +1799,9 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):              feed_html = info['feed_html']              m_ids = re.finditer(r'"/watch\?v=(.*?)["&]', feed_html)              ids = orderedSet(m.group(1) for m in m_ids) -            feed_entries.extend(self.url_result(id, 'Youtube') for id in ids) +            feed_entries.extend( +                self.url_result(video_id, 'Youtube', video_id=video_id) +                for video_id in ids)              if info['paging'] is None:                  break          return self.playlist_result(feed_entries, playlist_title=self._PLAYLIST_TITLE) | 
