aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2016-09-24 14:20:42 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2016-09-24 14:20:42 +0800
commit5968d7d2fe619e85eb424d6e47d000f0b295d4a2 (patch)
tree4904d66b01becf7fb71e5e3cf0c91a5448ae9feb
parente6332059ac66bfc91ed18e5b15d9238e4283ee7a (diff)
[extractor/common] Improved support for HTML5 subtitles
Ref: #10625 In a strict sense, <track>s with kind=captions are not subtitles. [1] openload misuses this attribute, and I guess there will be more examples, so I add it to common.py. Also allow extracting information for subtitles-only <video> or <audio> tags, which is the case of openload. [1] https://www.w3.org/TR/html5/embedded-content-0.html#attr-track-kind
-rw-r--r--ChangeLog6
-rw-r--r--youtube_dl/extractor/common.py4
2 files changed, 8 insertions, 2 deletions
diff --git a/ChangeLog b/ChangeLog
index a1c4df479..ebe4ff0e8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+vesion <unreleased>
+
+Core
++ Improved support for HTML5 subtitles
+
+
version 2016.09.24
Core
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 9c8991542..5cb4479ec 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1828,7 +1828,7 @@ class InfoExtractor(object):
for track_tag in re.findall(r'<track[^>]+>', media_content):
track_attributes = extract_attributes(track_tag)
kind = track_attributes.get('kind')
- if not kind or kind == 'subtitles':
+ if not kind or kind in ('subtitles', 'captions'):
src = track_attributes.get('src')
if not src:
continue
@@ -1836,7 +1836,7 @@ class InfoExtractor(object):
media_info['subtitles'].setdefault(lang, []).append({
'url': absolute_url(src),
})
- if media_info['formats']:
+ if media_info['formats'] or media_info['subtitles']:
entries.append(media_info)
return entries