diff options
| -rw-r--r-- | test/test_youtube_lists.py | 43 | ||||
| -rw-r--r-- | youtube_dl/FileDownloader.py | 113 | ||||
| -rwxr-xr-x | youtube_dl/InfoExtractors.py | 47 | 
3 files changed, 131 insertions, 72 deletions
| diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index f4705bc5b..9e91484f8 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -10,6 +10,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))  from youtube_dl.InfoExtractors import YoutubeUserIE, YoutubePlaylistIE, YoutubeIE  from youtube_dl.utils import * +from youtube_dl.FileDownloader import FileDownloader  PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "parameters.json")  with io.open(PARAMETERS_FILE, encoding='utf-8') as pf: @@ -22,7 +23,7 @@ proxy_handler = compat_urllib_request.ProxyHandler()  opener = compat_urllib_request.build_opener(proxy_handler, cookie_processor, YoutubeDLHandler())  compat_urllib_request.install_opener(opener) -class FakeDownloader(object): +class FakeDownloader(FileDownloader):      def __init__(self):          self.result = []          self.params = parameters @@ -30,35 +31,42 @@ class FakeDownloader(object):          print(s)      def trouble(self, s):          raise Exception(s) -    def download(self, x): -        self.result.append(x) +    def extract_info(self, url): +        self.result.append(url) +        return url  class TestYoutubeLists(unittest.TestCase): +    def assertIsPlaylist(self,info): +        """Make sure the info has '_type' set to 'playlist'""" +        self.assertEqual(info['_type'], 'playlist') +      def test_youtube_playlist(self):          dl = FakeDownloader()          ie = YoutubePlaylistIE(dl) -        ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re') -        ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result] +        result = ie.extract('https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re')[0] +        self.assertIsPlaylist(result) +        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]          self.assertEqual(ytie_results, [ 'bV9L5Ht9LgY', 'FXxLjLQi3Fg', 'tU3Bgo5qJZE'])      def test_issue_673(self):          dl = FakeDownloader()          ie = YoutubePlaylistIE(dl) -        ie.extract('PLBB231211A4F62143') -        self.assertTrue(len(dl.result) > 40) +        result = ie.extract('PLBB231211A4F62143')[0] +        self.assertTrue(len(result['entries']) > 40)      def test_youtube_playlist_long(self):          dl = FakeDownloader()          ie = YoutubePlaylistIE(dl) -        ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') -        self.assertTrue(len(dl.result) >= 799) +        result = ie.extract('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')[0] +        self.assertIsPlaylist(result) +        self.assertTrue(len(result['entries']) >= 799)      def test_youtube_playlist_with_deleted(self):          #651          dl = FakeDownloader()          ie = YoutubePlaylistIE(dl) -        ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') -        ytie_results = [YoutubeIE()._extract_id(r[0]) for r in dl.result] +        result = ie.extract('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')[0] +        ytie_results = [YoutubeIE()._extract_id(url['url']) for url in result['entries']]          self.assertFalse('pElCt5oNDuI' in ytie_results)          self.assertFalse('KdPEApIVdWM' in ytie_results) @@ -66,10 +74,11 @@ class TestYoutubeLists(unittest.TestCase):          dl = FakeDownloader()          ie = YoutubePlaylistIE(dl)          # TODO find a > 100 (paginating?) videos course -        ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') -        self.assertEqual(YoutubeIE()._extract_id(dl.result[0][0]), 'j9WZyLZCBzs') -        self.assertEqual(len(dl.result), 25) -        self.assertEqual(YoutubeIE()._extract_id(dl.result[-1][0]), 'rYefUsYuEp0') +        result = ie.extract('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')[0] +        entries = result['entries'] +        self.assertEqual(YoutubeIE()._extract_id(entries[0]['url']), 'j9WZyLZCBzs') +        self.assertEqual(len(entries), 25) +        self.assertEqual(YoutubeIE()._extract_id(entries[-1]['url']), 'rYefUsYuEp0')      def test_youtube_channel(self):          # I give up, please find a channel that does paginate and test this like test_youtube_playlist_long @@ -78,8 +87,8 @@ class TestYoutubeLists(unittest.TestCase):      def test_youtube_user(self):          dl = FakeDownloader()          ie = YoutubeUserIE(dl) -        ie.extract('https://www.youtube.com/user/TheLinuxFoundation') -        self.assertTrue(len(dl.result) >= 320) +        result = ie.extract('https://www.youtube.com/user/TheLinuxFoundation')[0] +        self.assertTrue(len(result['entries']) >= 320)  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 725d4a016..6af2acbee 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -396,6 +396,72 @@ class FileDownloader(object):              if re.search(rejecttitle, title, re.IGNORECASE):                  return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'          return None +         +    def extract_info(self, url): +        ''' +        Returns a list with a dictionary for each video we find. +         ''' +        suitable_found = False +        for ie in self._ies: +            # Go to next InfoExtractor if not suitable +            if not ie.suitable(url): +                continue + +            # Warn if the _WORKING attribute is False +            if not ie.working(): +                self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, ' +                               u'and will probably not work. If you want to go on, use the -i option.') + +            # Suitable InfoExtractor found +            suitable_found = True + +            # Extract information from URL and process it +            try: +                ie_results = ie.extract(url) +                results = self.process_ie_results(ie_results, ie) +                return results +            except ExtractorError as de: # An error we somewhat expected +                self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) +                break +            except Exception as e: +                if self.params.get('ignoreerrors', False): +                    self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) +                    break +                else: +                    raise +        if not suitable_found: +                self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) +    def extract_info_iterable(self, urls): +        ''' +            Return the videos founded for the urls +        ''' +        results = [] +        for url in urls: +            results.extend(self.extract_info(url)) +        return results +         +    def process_ie_results(self, ie_results, ie): +        """ +        Take the results of the ie and return a list of videos. +        For url elements it will seartch the suitable ie and get the videos +        For playlist elements it will process each of the elements of the 'entries' key +        """ +        results = []  +        for result in ie_results or []: +            result_type = result.get('_type', 'video') #If not given we suppose it's a video, support the dafault old system +            if result_type == 'video': +                if not 'extractor' in result: +                    #The extractor has already been set somewhere else +                    result['extractor'] = ie.IE_NAME +                results.append(result) +            elif result_type == 'url': +                #We get the videos pointed by the url +                results.extend(self.extract_info(result['url'])) +            elif result_type == 'playlist': +                #We process each entry in the playlist +                entries_result = self.process_ie_results(result['entries'], ie) +                results.extend(entries_result) +        return results      def process_info(self, info_dict):          """Process a single dictionary returned by an InfoExtractor.""" @@ -528,49 +594,14 @@ class FileDownloader(object):              raise SameFileError(self.params['outtmpl'])          for url in url_list: -            suitable_found = False -            for ie in self._ies: -                # Go to next InfoExtractor if not suitable -                if not ie.suitable(url): -                    continue - -                # Warn if the _WORKING attribute is False -                if not ie.working(): -                    self.report_warning(u'the program functionality for this site has been marked as broken, ' -                                        u'and will probably not work. If you want to go on, use the -i option.') +            videos = self.extract_info(url) -                # Suitable InfoExtractor found -                suitable_found = True - -                # Extract information from URL and process it +            for video in videos or []:                  try: -                    videos = ie.extract(url) -                except ExtractorError as de: # An error we somewhat expected -                    self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) -                    break -                except Exception as e: -                    if self.params.get('ignoreerrors', False): -                        self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) -                        break -                    else: -                        raise - -                if len(videos or []) > 1 and self.fixed_template(): -                    raise SameFileError(self.params['outtmpl']) - -                for video in videos or []: -                    video['extractor'] = ie.IE_NAME -                    try: -                        self.increment_downloads() -                        self.process_info(video) -                    except UnavailableVideoError: -                        self.trouble(u'\nERROR: unable to download video') - -                # Suitable InfoExtractor had been found; go to next URL -                break - -            if not suitable_found: -                self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) +                    self.increment_downloads() +                    self.process_info(video) +                except UnavailableVideoError: +                    self.trouble(u'\nERROR: unable to download video')          return self._download_retcode diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 835428f32..dd4a776e4 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -134,6 +134,24 @@ class InfoExtractor(object):              encoding = 'utf-8'          webpage_bytes = urlh.read()          return webpage_bytes.decode(encoding, 'replace') +         +    #Methods for following #608 +    #They set the correct value of the '_type' key +    def video_result(self, video_info): +        """Returns a video""" +        video_info['_type'] = 'video' +        return video_info +    def url_result(self, url, ie=None): +        """Returns a url that points to a page that should be processed""" +        #TODO: ie should be the class used for getting the info +        video_info = {'_type': 'url', +                      'url': url} +        return video_info +    def playlist_result(self, entries): +        """Returns a playlist""" +        video_info = {'_type': 'playlist', +                      'entries': entries} +        return video_info  class YoutubeIE(InfoExtractor): @@ -1331,7 +1349,7 @@ class GenericIE(InfoExtractor):          self._downloader.to_screen(u'[redirect] Following redirect to %s' % new_url)      def _test_redirect(self, url): -        """Check if it is a redirect, like url shorteners, in case restart chain.""" +        """Check if it is a redirect, like url shorteners, in case return the new url."""          class HeadRequest(compat_urllib_request.Request):              def get_method(self):                  return "HEAD" @@ -1382,11 +1400,11 @@ class GenericIE(InfoExtractor):              return False          self.report_following_redirect(new_url) -        self._downloader.download([new_url]) -        return True +        return new_url      def _real_extract(self, url): -        if self._test_redirect(url): return +        new_url = self._test_redirect(url) +        if new_url: return [self.url_result(new_url)]          video_id = url.split('/')[-1]          try: @@ -1789,9 +1807,8 @@ class YoutubePlaylistIE(InfoExtractor):          else:              self._downloader.to_screen(u'[youtube] PL %s: Found %i videos, downloading %i' % (playlist_id, total, len(videos))) -        for video in videos: -            self._downloader.download([video]) -        return +        url_results = [self.url_result(url) for url in videos] +        return [self.playlist_result(url_results)]  class YoutubeChannelIE(InfoExtractor): @@ -1841,9 +1858,9 @@ class YoutubeChannelIE(InfoExtractor):          self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) -        for id in video_ids: -            self._downloader.download(['http://www.youtube.com/watch?v=%s' % id]) -        return +        urls = ['http://www.youtube.com/watch?v=%s' % id for id in video_ids] +        url_entries = [self.url_result(url) for url in urls] +        return [self.playlist_result(url_entries)]  class YoutubeUserIE(InfoExtractor): @@ -1925,8 +1942,9 @@ class YoutubeUserIE(InfoExtractor):          self._downloader.to_screen(u"[youtube] user %s: Collected %d video ids (downloading %d of them)" %                  (username, all_ids_count, len(video_ids))) -        for video_id in video_ids: -            self._downloader.download(['http://www.youtube.com/watch?v=%s' % video_id]) +        urls = ['http://www.youtube.com/watch?v=%s' % video_id for video_id in video_ids] +        url_results = [self.url_result(url) for url in urls] +        return [self.playlist_result(url_results)]  class BlipTVUserIE(InfoExtractor): @@ -2016,8 +2034,9 @@ class BlipTVUserIE(InfoExtractor):          self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" %                  (self.IE_NAME, username, all_ids_count, len(video_ids))) -        for video_id in video_ids: -            self._downloader.download([u'http://blip.tv/'+video_id]) +        urls = [u'http://blip.tv/%s' % video_id for video_id in video_ids] +        url_entries = [self.url_result(url) for url in urls] +        return [self.playlist_result(url_entries)]  class DepositFilesIE(InfoExtractor): | 
