diff options
author | remitamine <remitamine@gmail.com> | 2016-02-09 17:15:41 +0100 |
---|---|---|
committer | remitamine <remitamine@gmail.com> | 2016-02-09 17:15:41 +0100 |
commit | d413095f7e14f146f66d5f0cb828fb55aa4cb323 (patch) | |
tree | 963026344873dfb3d755d23719da29e2acb7d79e /youtube_dl/extractor/common.py | |
parent | 1bedf4de06300ea67655aaec8d83cf4af36a156f (diff) |
[extractor/common] remove duplicated formats and subtiles in smil manifests
Diffstat (limited to 'youtube_dl/extractor/common.py')
-rw-r--r-- | youtube_dl/extractor/common.py | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c0421de7..1143f6dbb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1186,6 +1186,7 @@ class InfoExtractor(object): http_count = 0 m3u8_count = 0 + src_urls = [] videos = smil.findall(self._xpath_ns('.//video', namespace)) for video in videos: src = video.get('src') @@ -1222,6 +1223,9 @@ class InfoExtractor(object): continue src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src) + if src_url in src_urls: + continue + src_urls.append(src_url) if proto == 'm3u8' or src_ext == 'm3u8': m3u8_formats = self._extract_m3u8_formats( @@ -1267,11 +1271,13 @@ class InfoExtractor(object): return formats def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'): + urls = [] subtitles = {} for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))): src = textstream.get('src') - if not src: + if not src or src in urls: continue + urls.append(src) ext = textstream.get('ext') or determine_ext(src) if not ext: type_ = textstream.get('type') |