diff options
| author | Sergey M․ <dstftw@gmail.com> | 2020-12-12 04:48:56 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2020-12-12 04:52:35 +0700 | 
| commit | 3ae02d4a626b6966ba9678827c5bfc0e8804a2aa (patch) | |
| tree | 974dfa554e3e7ec5d55affce48bc09bed6cf6a61 | |
| parent | 59fea63dc2636b2bde10d56116c273cd5d516eed (diff) | |
[wdr] Extent subtitles extraction and improve overall extraction (closes #22672, closes #22723)
| -rw-r--r-- | youtube_dl/extractor/wdr.py | 21 | 
1 files changed, 17 insertions, 4 deletions
diff --git a/youtube_dl/extractor/wdr.py b/youtube_dl/extractor/wdr.py index ba97d983b..2903d189e 100644 --- a/youtube_dl/extractor/wdr.py +++ b/youtube_dl/extractor/wdr.py @@ -17,6 +17,7 @@ from ..utils import (      unified_strdate,      update_url_query,      urlhandle_detect_ext, +    url_or_none,  ) @@ -42,16 +43,20 @@ class WDRIE(InfoExtractor):          is_live = metadata.get('mediaType') == 'live'          tracker_data = metadata['trackerData'] +        title = tracker_data['trackerClipTitle'] +          media_resource = metadata['mediaResource']          formats = []          # check if the metadata contains a direct URL to a file -        for kind, media_resource in media_resource.items(): +        for kind, media in media_resource.items(): +            if not isinstance(media, dict): +                continue              if kind not in ('dflt', 'alt'):                  continue -            for tag_name, medium_url in media_resource.items(): +            for tag_name, medium_url in media.items():                  if tag_name not in ('videoURL', 'audioURL'):                      continue @@ -88,8 +93,16 @@ class WDRIE(InfoExtractor):                  'url': caption_url,                  'ext': 'ttml',              }] - -        title = tracker_data['trackerClipTitle'] +        captions_hash = media_resource.get('captionsHash') +        if isinstance(captions_hash, dict): +            for ext, format_url in captions_hash.items(): +                format_url = url_or_none(format_url) +                if not format_url: +                    continue +                subtitles.setdefault('de', []).append({ +                    'url': format_url, +                    'ext': determine_ext(format_url, None) or ext, +                })          return {              'id': tracker_data.get('trackerClipId', video_id),  | 
