diff options
author | Simon Sawicki <contact@grub4k.xyz> | 2024-11-27 00:05:07 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-27 00:05:07 +0100 |
commit | e0500cbf796323551bbabe5b8ed8c75a511ba47a (patch) | |
tree | f95e6d4c36bafc7e471c9a9fb14453a109533d16 | |
parent | 4b5eec0aaa7c02627f27a386591b735b90e681a8 (diff) |
[ie] Handle fragmented formats in `_remove_duplicate_formats` (#11637)
Authored by: Grub4K
-rw-r--r-- | yt_dlp/extractor/common.py | 20 |
1 files changed, 17 insertions, 3 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 28a3adf93..ce79e0b62 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1854,12 +1854,26 @@ class InfoExtractor: @staticmethod def _remove_duplicate_formats(formats): - format_urls = set() + seen_urls = set() + seen_fragment_urls = set() unique_formats = [] for f in formats: - if f['url'] not in format_urls: - format_urls.add(f['url']) + fragments = f.get('fragments') + if callable(fragments): unique_formats.append(f) + + elif fragments: + fragment_urls = frozenset( + fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path']) + for fragment in fragments) + if fragment_urls not in seen_fragment_urls: + seen_fragment_urls.add(fragment_urls) + unique_formats.append(f) + + elif f['url'] not in seen_urls: + seen_urls.add(f['url']) + unique_formats.append(f) + formats[:] = unique_formats def _is_valid_url(self, url, video_id, item='video', headers={}): |