diff options
| author | Sergey M․ <dstftw@gmail.com> | 2021-01-16 01:46:41 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2021-01-16 01:46:41 +0700 | 
| commit | 9c9b45814582cc763d45cda2742f9574f254c6a0 (patch) | |
| tree | ddc3d721ec3943f4c72a1f5c4484d37ce940ba9d | |
| parent | 9d50f862323ed3d7c1ccd014c16d5d25b05de925 (diff) | |
[YoutubeDL] Protect from infinite recursion due to recursively nested playlists (closes #27833)
| -rwxr-xr-x | youtube_dl/YoutubeDL.py | 236 | 
1 files changed, 129 insertions, 107 deletions
| diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 0ed4bc6ba..efd42fa63 100755 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -338,6 +338,8 @@ class YoutubeDL(object):      _pps = []      _download_retcode = None      _num_downloads = None +    _playlist_level = 0 +    _playlist_urls = set()      _screen_file = None      def __init__(self, params=None, auto_init=True): @@ -906,115 +908,23 @@ class YoutubeDL(object):              return self.process_ie_result(                  new_result, download=download, extra_info=extra_info)          elif result_type in ('playlist', 'multi_video'): -            # We process each entry in the playlist -            playlist = ie_result.get('title') or ie_result.get('id') -            self.to_screen('[download] Downloading playlist: %s' % playlist) - -            playlist_results = [] - -            playliststart = self.params.get('playliststart', 1) - 1 -            playlistend = self.params.get('playlistend') -            # For backwards compatibility, interpret -1 as whole list -            if playlistend == -1: -                playlistend = None - -            playlistitems_str = self.params.get('playlist_items') -            playlistitems = None -            if playlistitems_str is not None: -                def iter_playlistitems(format): -                    for string_segment in format.split(','): -                        if '-' in string_segment: -                            start, end = string_segment.split('-') -                            for item in range(int(start), int(end) + 1): -                                yield int(item) -                        else: -                            yield int(string_segment) -                playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) - -            ie_entries = ie_result['entries'] - -            def make_playlistitems_entries(list_ie_entries): -                num_entries = len(list_ie_entries) -                return [ -                    list_ie_entries[i - 1] for i in playlistitems -                    if -num_entries <= i - 1 < num_entries] - -            def report_download(num_entries): +            # Protect from infinite recursion due to recursively nested playlists +            # (see https://github.com/ytdl-org/youtube-dl/issues/27833) +            webpage_url = ie_result['webpage_url'] +            if webpage_url in self._playlist_urls:                  self.to_screen( -                    '[%s] playlist %s: Downloading %d videos' % -                    (ie_result['extractor'], playlist, num_entries)) - -            if isinstance(ie_entries, list): -                n_all_entries = len(ie_entries) -                if playlistitems: -                    entries = make_playlistitems_entries(ie_entries) -                else: -                    entries = ie_entries[playliststart:playlistend] -                n_entries = len(entries) -                self.to_screen( -                    '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % -                    (ie_result['extractor'], playlist, n_all_entries, n_entries)) -            elif isinstance(ie_entries, PagedList): -                if playlistitems: -                    entries = [] -                    for item in playlistitems: -                        entries.extend(ie_entries.getslice( -                            item - 1, item -                        )) -                else: -                    entries = ie_entries.getslice( -                        playliststart, playlistend) -                n_entries = len(entries) -                report_download(n_entries) -            else:  # iterable -                if playlistitems: -                    entries = make_playlistitems_entries(list(itertools.islice( -                        ie_entries, 0, max(playlistitems)))) -                else: -                    entries = list(itertools.islice( -                        ie_entries, playliststart, playlistend)) -                n_entries = len(entries) -                report_download(n_entries) - -            if self.params.get('playlistreverse', False): -                entries = entries[::-1] - -            if self.params.get('playlistrandom', False): -                random.shuffle(entries) - -            x_forwarded_for = ie_result.get('__x_forwarded_for_ip') - -            for i, entry in enumerate(entries, 1): -                self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) -                # This __x_forwarded_for_ip thing is a bit ugly but requires -                # minimal changes -                if x_forwarded_for: -                    entry['__x_forwarded_for_ip'] = x_forwarded_for -                extra = { -                    'n_entries': n_entries, -                    'playlist': playlist, -                    'playlist_id': ie_result.get('id'), -                    'playlist_title': ie_result.get('title'), -                    'playlist_uploader': ie_result.get('uploader'), -                    'playlist_uploader_id': ie_result.get('uploader_id'), -                    'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart, -                    'extractor': ie_result['extractor'], -                    'webpage_url': ie_result['webpage_url'], -                    'webpage_url_basename': url_basename(ie_result['webpage_url']), -                    'extractor_key': ie_result['extractor_key'], -                } - -                reason = self._match_entry(entry, incomplete=True) -                if reason is not None: -                    self.to_screen('[download] ' + reason) -                    continue +                    '[download] Skipping already downloaded playlist: %s' +                    % ie_result.get('title') or ie_result.get('id')) +                return -                entry_result = self.__process_iterable_entry(entry, download, extra) -                # TODO: skip failed (empty) entries? -                playlist_results.append(entry_result) -            ie_result['entries'] = playlist_results -            self.to_screen('[download] Finished downloading playlist: %s' % playlist) -            return ie_result +            self._playlist_level += 1 +            self._playlist_urls.add(webpage_url) +            try: +                return self.__process_playlist(ie_result, download) +            finally: +                self._playlist_level -= 1 +                if not self._playlist_level: +                    self._playlist_urls.clear()          elif result_type == 'compat_list':              self.report_warning(                  'Extractor %s returned a compat_list result. ' @@ -1039,6 +949,118 @@ class YoutubeDL(object):          else:              raise Exception('Invalid result type: %s' % result_type) +    def __process_playlist(self, ie_result, download): +        # We process each entry in the playlist +        playlist = ie_result.get('title') or ie_result.get('id') + +        self.to_screen('[download] Downloading playlist: %s' % playlist) + +        playlist_results = [] + +        playliststart = self.params.get('playliststart', 1) - 1 +        playlistend = self.params.get('playlistend') +        # For backwards compatibility, interpret -1 as whole list +        if playlistend == -1: +            playlistend = None + +        playlistitems_str = self.params.get('playlist_items') +        playlistitems = None +        if playlistitems_str is not None: +            def iter_playlistitems(format): +                for string_segment in format.split(','): +                    if '-' in string_segment: +                        start, end = string_segment.split('-') +                        for item in range(int(start), int(end) + 1): +                            yield int(item) +                    else: +                        yield int(string_segment) +            playlistitems = orderedSet(iter_playlistitems(playlistitems_str)) + +        ie_entries = ie_result['entries'] + +        def make_playlistitems_entries(list_ie_entries): +            num_entries = len(list_ie_entries) +            return [ +                list_ie_entries[i - 1] for i in playlistitems +                if -num_entries <= i - 1 < num_entries] + +        def report_download(num_entries): +            self.to_screen( +                '[%s] playlist %s: Downloading %d videos' % +                (ie_result['extractor'], playlist, num_entries)) + +        if isinstance(ie_entries, list): +            n_all_entries = len(ie_entries) +            if playlistitems: +                entries = make_playlistitems_entries(ie_entries) +            else: +                entries = ie_entries[playliststart:playlistend] +            n_entries = len(entries) +            self.to_screen( +                '[%s] playlist %s: Collected %d video ids (downloading %d of them)' % +                (ie_result['extractor'], playlist, n_all_entries, n_entries)) +        elif isinstance(ie_entries, PagedList): +            if playlistitems: +                entries = [] +                for item in playlistitems: +                    entries.extend(ie_entries.getslice( +                        item - 1, item +                    )) +            else: +                entries = ie_entries.getslice( +                    playliststart, playlistend) +            n_entries = len(entries) +            report_download(n_entries) +        else:  # iterable +            if playlistitems: +                entries = make_playlistitems_entries(list(itertools.islice( +                    ie_entries, 0, max(playlistitems)))) +            else: +                entries = list(itertools.islice( +                    ie_entries, playliststart, playlistend)) +            n_entries = len(entries) +            report_download(n_entries) + +        if self.params.get('playlistreverse', False): +            entries = entries[::-1] + +        if self.params.get('playlistrandom', False): +            random.shuffle(entries) + +        x_forwarded_for = ie_result.get('__x_forwarded_for_ip') + +        for i, entry in enumerate(entries, 1): +            self.to_screen('[download] Downloading video %s of %s' % (i, n_entries)) +            # This __x_forwarded_for_ip thing is a bit ugly but requires +            # minimal changes +            if x_forwarded_for: +                entry['__x_forwarded_for_ip'] = x_forwarded_for +            extra = { +                'n_entries': n_entries, +                'playlist': playlist, +                'playlist_id': ie_result.get('id'), +                'playlist_title': ie_result.get('title'), +                'playlist_uploader': ie_result.get('uploader'), +                'playlist_uploader_id': ie_result.get('uploader_id'), +                'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart, +                'extractor': ie_result['extractor'], +                'webpage_url': ie_result['webpage_url'], +                'webpage_url_basename': url_basename(ie_result['webpage_url']), +                'extractor_key': ie_result['extractor_key'], +            } + +            reason = self._match_entry(entry, incomplete=True) +            if reason is not None: +                self.to_screen('[download] ' + reason) +                continue + +            entry_result = self.__process_iterable_entry(entry, download, extra) +            # TODO: skip failed (empty) entries? +            playlist_results.append(entry_result) +        ie_result['entries'] = playlist_results +        self.to_screen('[download] Finished downloading playlist: %s' % playlist) +        return ie_result +      @__handle_extraction_exceptions      def __process_iterable_entry(self, entry, download, extra_info):          return self.process_ie_result( | 
