aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-04-18 22:21:38 +0700
committerSergey M․ <dstftw@gmail.com>2017-04-18 22:21:38 +0700
commitbae1404893341ed89f4c9b556aa4068c13ed9f7a (patch)
treed257ba09528378febbc582d920afd1a146e2d621
parent06d0ad9a4e2266b1cc74b45a59a53fad3f23fe15 (diff)
downloadyoutube-dl-bae1404893341ed89f4c9b556aa4068c13ed9f7a.tar.xz
[extractor/common] Add support for video of WebPage context in _json_ld (closes #12778)
-rw-r--r--youtube_dl/extractor/common.py33
1 files changed, 21 insertions, 12 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 12e010a0d..61d97ab72 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -976,6 +976,22 @@ class InfoExtractor(object):
return info
if isinstance(json_ld, dict):
json_ld = [json_ld]
+
+ def extract_video_object(e):
+ assert e['@type'] == 'VideoObject'
+ info.update({
+ 'url': e.get('contentUrl'),
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('uploadDate')),
+ 'filesize': float_or_none(e.get('contentSize')),
+ 'tbr': int_or_none(e.get('bitrate')),
+ 'width': int_or_none(e.get('width')),
+ 'height': int_or_none(e.get('height')),
+ })
+
for e in json_ld:
if e.get('@context') == 'http://schema.org':
item_type = e.get('@type')
@@ -1000,18 +1016,11 @@ class InfoExtractor(object):
'description': unescapeHTML(e.get('articleBody')),
})
elif item_type == 'VideoObject':
- info.update({
- 'url': e.get('contentUrl'),
- 'title': unescapeHTML(e.get('name')),
- 'description': unescapeHTML(e.get('description')),
- 'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
- 'duration': parse_duration(e.get('duration')),
- 'timestamp': unified_timestamp(e.get('uploadDate')),
- 'filesize': float_or_none(e.get('contentSize')),
- 'tbr': int_or_none(e.get('bitrate')),
- 'width': int_or_none(e.get('width')),
- 'height': int_or_none(e.get('height')),
- })
+ extract_video_object(e)
+ elif item_type == 'WebPage':
+ video = e.get('video')
+ if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+ extract_video_object(video)
break
return dict((k, v) for k, v in info.items() if v is not None)