aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Sawicki <contact@grub4k.xyz>2024-11-27 00:05:07 +0100
committerGitHub <noreply@github.com>2024-11-27 00:05:07 +0100
commite0500cbf796323551bbabe5b8ed8c75a511ba47a (patch)
treef95e6d4c36bafc7e471c9a9fb14453a109533d16
parent4b5eec0aaa7c02627f27a386591b735b90e681a8 (diff)
[ie] Handle fragmented formats in `_remove_duplicate_formats` (#11637)
Authored by: Grub4K
-rw-r--r--yt_dlp/extractor/common.py20
1 files changed, 17 insertions, 3 deletions
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 28a3adf93..ce79e0b62 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -1854,12 +1854,26 @@ class InfoExtractor:
@staticmethod
def _remove_duplicate_formats(formats):
- format_urls = set()
+ seen_urls = set()
+ seen_fragment_urls = set()
unique_formats = []
for f in formats:
- if f['url'] not in format_urls:
- format_urls.add(f['url'])
+ fragments = f.get('fragments')
+ if callable(fragments):
unique_formats.append(f)
+
+ elif fragments:
+ fragment_urls = frozenset(
+ fragment.get('url') or urljoin(f['fragment_base_url'], fragment['path'])
+ for fragment in fragments)
+ if fragment_urls not in seen_fragment_urls:
+ seen_fragment_urls.add(fragment_urls)
+ unique_formats.append(f)
+
+ elif f['url'] not in seen_urls:
+ seen_urls.add(f['url'])
+ unique_formats.append(f)
+
formats[:] = unique_formats
def _is_valid_url(self, url, video_id, item='video', headers={}):