aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2016-08-05 23:14:32 +0700
committerSergey M․ <dstftw@gmail.com>2016-08-05 23:14:32 +0700
commit46933a15d69a0079e66a04475659d0cbd5e5f08d (patch)
tree61f54d851b3cfb2b21a36b4cda9e04d211562df9
parent3859ebeee6d6448240176ef5e4c20f6b1d1db795 (diff)
downloadyoutube-dl-46933a15d69a0079e66a04475659d0cbd5e5f08d.tar.xz
[extractor/common] Support root JSON-LD lists (Closes #10203)
-rw-r--r--youtube_dl/extractor/common.py76
1 files changed, 41 insertions, 35 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 2d337d614..70909fc1c 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -828,41 +828,47 @@ class InfoExtractor(object):
if not json_ld:
return {}
info = {}
- if json_ld.get('@context') == 'http://schema.org':
- item_type = json_ld.get('@type')
- if expected_type is not None and expected_type != item_type:
- return info
- if item_type == 'TVEpisode':
- info.update({
- 'episode': unescapeHTML(json_ld.get('name')),
- 'episode_number': int_or_none(json_ld.get('episodeNumber')),
- 'description': unescapeHTML(json_ld.get('description')),
- })
- part_of_season = json_ld.get('partOfSeason')
- if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
- info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
- part_of_series = json_ld.get('partOfSeries')
- if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
- info['series'] = unescapeHTML(part_of_series.get('name'))
- elif item_type == 'Article':
- info.update({
- 'timestamp': parse_iso8601(json_ld.get('datePublished')),
- 'title': unescapeHTML(json_ld.get('headline')),
- 'description': unescapeHTML(json_ld.get('articleBody')),
- })
- elif item_type == 'VideoObject':
- info.update({
- 'url': json_ld.get('contentUrl'),
- 'title': unescapeHTML(json_ld.get('name')),
- 'description': unescapeHTML(json_ld.get('description')),
- 'thumbnail': json_ld.get('thumbnailUrl'),
- 'duration': parse_duration(json_ld.get('duration')),
- 'timestamp': unified_timestamp(json_ld.get('uploadDate')),
- 'filesize': float_or_none(json_ld.get('contentSize')),
- 'tbr': int_or_none(json_ld.get('bitrate')),
- 'width': int_or_none(json_ld.get('width')),
- 'height': int_or_none(json_ld.get('height')),
- })
+ if not isinstance(json_ld, (list, tuple, dict)):
+ return info
+ if isinstance(json_ld, dict):
+ json_ld = [json_ld]
+ for e in json_ld:
+ if e.get('@context') == 'http://schema.org':
+ item_type = e.get('@type')
+ if expected_type is not None and expected_type != item_type:
+ return info
+ if item_type == 'TVEpisode':
+ info.update({
+ 'episode': unescapeHTML(e.get('name')),
+ 'episode_number': int_or_none(e.get('episodeNumber')),
+ 'description': unescapeHTML(e.get('description')),
+ })
+ part_of_season = e.get('partOfSeason')
+ if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason':
+ info['season_number'] = int_or_none(part_of_season.get('seasonNumber'))
+ part_of_series = e.get('partOfSeries')
+ if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries':
+ info['series'] = unescapeHTML(part_of_series.get('name'))
+ elif item_type == 'Article':
+ info.update({
+ 'timestamp': parse_iso8601(e.get('datePublished')),
+ 'title': unescapeHTML(e.get('headline')),
+ 'description': unescapeHTML(e.get('articleBody')),
+ })
+ elif item_type == 'VideoObject':
+ info.update({
+ 'url': e.get('contentUrl'),
+ 'title': unescapeHTML(e.get('name')),
+ 'description': unescapeHTML(e.get('description')),
+ 'thumbnail': e.get('thumbnailUrl'),
+ 'duration': parse_duration(e.get('duration')),
+ 'timestamp': unified_timestamp(e.get('uploadDate')),
+ 'filesize': float_or_none(e.get('contentSize')),
+ 'tbr': int_or_none(e.get('bitrate')),
+ 'width': int_or_none(e.get('width')),
+ 'height': int_or_none(e.get('height')),
+ })
+ break
return dict((k, v) for k, v in info.items() if v is not None)
@staticmethod