aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor
diff options
context:
space:
mode:
authorremitamine <remitamine@gmail.com>2016-02-09 17:15:41 +0100
committerremitamine <remitamine@gmail.com>2016-02-09 17:15:41 +0100
commitd413095f7e14f146f66d5f0cb828fb55aa4cb323 (patch)
tree963026344873dfb3d755d23719da29e2acb7d79e /youtube_dl/extractor
parent1bedf4de06300ea67655aaec8d83cf4af36a156f (diff)
[extractor/common] remove duplicated formats and subtiles in smil manifests
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r--youtube_dl/extractor/common.py8
1 files changed, 7 insertions, 1 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9c0421de7..1143f6dbb 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1186,6 +1186,7 @@ class InfoExtractor(object):
http_count = 0
m3u8_count = 0
+ src_urls = []
videos = smil.findall(self._xpath_ns('.//video', namespace))
for video in videos:
src = video.get('src')
@@ -1222,6 +1223,9 @@ class InfoExtractor(object):
continue
src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
+ if src_url in src_urls:
+ continue
+ src_urls.append(src_url)
if proto == 'm3u8' or src_ext == 'm3u8':
m3u8_formats = self._extract_m3u8_formats(
@@ -1267,11 +1271,13 @@ class InfoExtractor(object):
return formats
def _parse_smil_subtitles(self, smil, namespace=None, subtitles_lang='en'):
+ urls = []
subtitles = {}
for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
src = textstream.get('src')
- if not src:
+ if not src or src in urls:
continue
+ urls.append(src)
ext = textstream.get('ext') or determine_ext(src)
if not ext:
type_ = textstream.get('type')