diff options
| author | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-24 14:20:42 +0800 | 
|---|---|---|
| committer | Yen Chi Hsuan <yan12125@gmail.com> | 2016-09-24 14:20:42 +0800 | 
| commit | 5968d7d2fe619e85eb424d6e47d000f0b295d4a2 (patch) | |
| tree | 4904d66b01becf7fb71e5e3cf0c91a5448ae9feb | |
| parent | e6332059ac66bfc91ed18e5b15d9238e4283ee7a (diff) | |
[extractor/common] Improved support for HTML5 subtitles
Ref: #10625
In a strict sense, <track>s with kind=captions are not subtitles. [1]
openload misuses this attribute, and I guess there will be more
examples, so I add it to common.py.
Also allow extracting information for subtitles-only <video> or <audio>
tags, which is the case of openload.
[1] https://www.w3.org/TR/html5/embedded-content-0.html#attr-track-kind
| -rw-r--r-- | ChangeLog | 6 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 4 | 
2 files changed, 8 insertions, 2 deletions
| @@ -1,3 +1,9 @@ +vesion <unreleased> + +Core ++ Improved support for HTML5 subtitles + +  version 2016.09.24  Core diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 9c8991542..5cb4479ec 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1828,7 +1828,7 @@ class InfoExtractor(object):                  for track_tag in re.findall(r'<track[^>]+>', media_content):                      track_attributes = extract_attributes(track_tag)                      kind = track_attributes.get('kind') -                    if not kind or kind == 'subtitles': +                    if not kind or kind in ('subtitles', 'captions'):                          src = track_attributes.get('src')                          if not src:                              continue @@ -1836,7 +1836,7 @@ class InfoExtractor(object):                          media_info['subtitles'].setdefault(lang, []).append({                              'url': absolute_url(src),                          }) -            if media_info['formats']: +            if media_info['formats'] or media_info['subtitles']:                  entries.append(media_info)          return entries | 
