aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2020-12-12 04:48:56 +0700
committerSergey M․ <dstftw@gmail.com>2020-12-12 04:52:35 +0700
commit3ae02d4a626b6966ba9678827c5bfc0e8804a2aa (patch)
tree974dfa554e3e7ec5d55affce48bc09bed6cf6a61
parent59fea63dc2636b2bde10d56116c273cd5d516eed (diff)
[wdr] Extent subtitles extraction and improve overall extraction (closes #22672, closes #22723)
-rw-r--r--youtube_dl/extractor/wdr.py21
1 files changed, 17 insertions, 4 deletions
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py
index ba97d983b..2903d189e 100644
--- a/youtube_dl/extractor/wdr.py
+++ b/youtube_dl/extractor/wdr.py
@@ -17,6 +17,7 @@ from ..utils import (
unified_strdate,
update_url_query,
urlhandle_detect_ext,
+ url_or_none,
)
@@ -42,16 +43,20 @@ class WDRIE(InfoExtractor):
is_live = metadata.get('mediaType') == 'live'
tracker_data = metadata['trackerData']
+ title = tracker_data['trackerClipTitle']
+
media_resource = metadata['mediaResource']
formats = []
# check if the metadata contains a direct URL to a file
- for kind, media_resource in media_resource.items():
+ for kind, media in media_resource.items():
+ if not isinstance(media, dict):
+ continue
if kind not in ('dflt', 'alt'):
continue
- for tag_name, medium_url in media_resource.items():
+ for tag_name, medium_url in media.items():
if tag_name not in ('videoURL', 'audioURL'):
continue
@@ -88,8 +93,16 @@ class WDRIE(InfoExtractor):
'url': caption_url,
'ext': 'ttml',
}]
-
- title = tracker_data['trackerClipTitle']
+ captions_hash = media_resource.get('captionsHash')
+ if isinstance(captions_hash, dict):
+ for ext, format_url in captions_hash.items():
+ format_url = url_or_none(format_url)
+ if not format_url:
+ continue
+ subtitles.setdefault('de', []).append({
+ 'url': format_url,
+ 'ext': determine_ext(format_url, None) or ext,
+ })
return {
'id': tracker_data.get('trackerClipId', video_id),