diff options
author | Sergey M․ <dstftw@gmail.com> | 2017-04-18 22:21:38 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2017-04-18 22:21:38 +0700 |
commit | bae1404893341ed89f4c9b556aa4068c13ed9f7a (patch) | |
tree | d257ba09528378febbc582d920afd1a146e2d621 /youtube_dl/extractor | |
parent | 06d0ad9a4e2266b1cc74b45a59a53fad3f23fe15 (diff) |
[extractor/common] Add support for video of WebPage context in _json_ld (closes #12778)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/common.py | 33 |
1 files changed, 21 insertions, 12 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 12e010a0d..61d97ab72 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -976,6 +976,22 @@ class InfoExtractor(object): return info if isinstance(json_ld, dict): json_ld = [json_ld] + + def extract_video_object(e): + assert e['@type'] == 'VideoObject' + info.update({ + 'url': e.get('contentUrl'), + 'title': unescapeHTML(e.get('name')), + 'description': unescapeHTML(e.get('description')), + 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), + 'duration': parse_duration(e.get('duration')), + 'timestamp': unified_timestamp(e.get('uploadDate')), + 'filesize': float_or_none(e.get('contentSize')), + 'tbr': int_or_none(e.get('bitrate')), + 'width': int_or_none(e.get('width')), + 'height': int_or_none(e.get('height')), + }) + for e in json_ld: if e.get('@context') == 'http://schema.org': item_type = e.get('@type') @@ -1000,18 +1016,11 @@ class InfoExtractor(object): 'description': unescapeHTML(e.get('articleBody')), }) elif item_type == 'VideoObject': - info.update({ - 'url': e.get('contentUrl'), - 'title': unescapeHTML(e.get('name')), - 'description': unescapeHTML(e.get('description')), - 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'), - 'duration': parse_duration(e.get('duration')), - 'timestamp': unified_timestamp(e.get('uploadDate')), - 'filesize': float_or_none(e.get('contentSize')), - 'tbr': int_or_none(e.get('bitrate')), - 'width': int_or_none(e.get('width')), - 'height': int_or_none(e.get('height')), - }) + extract_video_object(e) + elif item_type == 'WebPage': + video = e.get('video') + if isinstance(video, dict) and video.get('@type') == 'VideoObject': + extract_video_object(video) break return dict((k, v) for k, v in info.items() if v is not None) |