[extractor/common] Add support for video of WebPage context in _json_ld (closes #12778)

author: Sergey M․ <dstftw@gmail.com> 2017-04-18 22:21:38 +0700
committer: Sergey M․ <dstftw@gmail.com> 2017-04-18 22:21:38 +0700
commit: bae1404893341ed89f4c9b556aa4068c13ed9f7a (patch)
tree: d257ba09528378febbc582d920afd1a146e2d621 /youtube_dl/extractor/common.py
parent: 06d0ad9a4e2266b1cc74b45a59a53fad3f23fe15 (diff)
1 files changed, 21 insertions, 12 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 12e010a0d..61d97ab72 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -976,6 +976,22 @@ class InfoExtractor(object):
             return info
         if isinstance(json_ld, dict):
             json_ld = [json_ld]
+
+        def extract_video_object(e):
+            assert e['@type'] == 'VideoObject'
+            info.update({
+                'url': e.get('contentUrl'),
+                'title': unescapeHTML(e.get('name')),
+                'description': unescapeHTML(e.get('description')),
+                'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
+                'duration': parse_duration(e.get('duration')),
+                'timestamp': unified_timestamp(e.get('uploadDate')),
+                'filesize': float_or_none(e.get('contentSize')),
+                'tbr': int_or_none(e.get('bitrate')),
+                'width': int_or_none(e.get('width')),
+                'height': int_or_none(e.get('height')),
+            })
+
         for e in json_ld:
             if e.get('@context') == 'http://schema.org':
                 item_type = e.get('@type')
@@ -1000,18 +1016,11 @@ class InfoExtractor(object):
                         'description': unescapeHTML(e.get('articleBody')),
                     })
                 elif item_type == 'VideoObject':
-                    info.update({
-                        'url': e.get('contentUrl'),
-                        'title': unescapeHTML(e.get('name')),
-                        'description': unescapeHTML(e.get('description')),
-                        'thumbnail': e.get('thumbnailUrl') or e.get('thumbnailURL'),
-                        'duration': parse_duration(e.get('duration')),
-                        'timestamp': unified_timestamp(e.get('uploadDate')),
-                        'filesize': float_or_none(e.get('contentSize')),
-                        'tbr': int_or_none(e.get('bitrate')),
-                        'width': int_or_none(e.get('width')),
-                        'height': int_or_none(e.get('height')),
-                    })
+                    extract_video_object(e)
+                elif item_type == 'WebPage':
+                    video = e.get('video')
+                    if isinstance(video, dict) and video.get('@type') == 'VideoObject':
+                        extract_video_object(video)
                 break
         return dict((k, v) for k, v in info.items() if v is not None)
author	Sergey M․ <dstftw@gmail.com>	2017-04-18 22:21:38 +0700
committer	Sergey M․ <dstftw@gmail.com>	2017-04-18 22:21:38 +0700
commit	bae1404893341ed89f4c9b556aa4068c13ed9f7a (patch)
tree	d257ba09528378febbc582d920afd1a146e2d621 /youtube_dl/extractor/common.py
parent	06d0ad9a4e2266b1cc74b45a59a53fad3f23fe15 (diff)