diff options
author | Sergey M․ <dstftw@gmail.com> | 2016-08-05 23:14:32 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2016-08-05 23:14:32 +0700 |
commit | 46933a15d69a0079e66a04475659d0cbd5e5f08d (patch) | |
tree | 61f54d851b3cfb2b21a36b4cda9e04d211562df9 /youtube_dl/extractor | |
parent | 3859ebeee6d6448240176ef5e4c20f6b1d1db795 (diff) |
[extractor/common] Support root JSON-LD lists (Closes #10203)
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/common.py | 76 |
1 files changed, 41 insertions, 35 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2d337d614..70909fc1c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -828,41 +828,47 @@ class InfoExtractor(object): if not json_ld: return {} info = {} - if json_ld.get('@context') == 'http://schema.org': - item_type = json_ld.get('@type') - if expected_type is not None and expected_type != item_type: - return info - if item_type == 'TVEpisode': - info.update({ - 'episode': unescapeHTML(json_ld.get('name')), - 'episode_number': int_or_none(json_ld.get('episodeNumber')), - 'description': unescapeHTML(json_ld.get('description')), - }) - part_of_season = json_ld.get('partOfSeason') - if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': - info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) - part_of_series = json_ld.get('partOfSeries') - if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': - info['series'] = unescapeHTML(part_of_series.get('name')) - elif item_type == 'Article': - info.update({ - 'timestamp': parse_iso8601(json_ld.get('datePublished')), - 'title': unescapeHTML(json_ld.get('headline')), - 'description': unescapeHTML(json_ld.get('articleBody')), - }) - elif item_type == 'VideoObject': - info.update({ - 'url': json_ld.get('contentUrl'), - 'title': unescapeHTML(json_ld.get('name')), - 'description': unescapeHTML(json_ld.get('description')), - 'thumbnail': json_ld.get('thumbnailUrl'), - 'duration': parse_duration(json_ld.get('duration')), - 'timestamp': unified_timestamp(json_ld.get('uploadDate')), - 'filesize': float_or_none(json_ld.get('contentSize')), - 'tbr': int_or_none(json_ld.get('bitrate')), - 'width': int_or_none(json_ld.get('width')), - 'height': int_or_none(json_ld.get('height')), - }) + if not isinstance(json_ld, (list, tuple, dict)): + return info + if isinstance(json_ld, dict): + json_ld = [json_ld] + for e in json_ld: + if e.get('@context') == 'http://schema.org': + item_type = e.get('@type') + if expected_type is not None and expected_type != item_type: + return info + if item_type == 'TVEpisode': + info.update({ + 'episode': unescapeHTML(e.get('name')), + 'episode_number': int_or_none(e.get('episodeNumber')), + 'description': unescapeHTML(e.get('description')), + }) + part_of_season = e.get('partOfSeason') + if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': + info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) + part_of_series = e.get('partOfSeries') + if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': + info['series'] = unescapeHTML(part_of_series.get('name')) + elif item_type == 'Article': + info.update({ + 'timestamp': parse_iso8601(e.get('datePublished')), + 'title': unescapeHTML(e.get('headline')), + 'description': unescapeHTML(e.get('articleBody')), + }) + elif item_type == 'VideoObject': + info.update({ + 'url': e.get('contentUrl'), + 'title': unescapeHTML(e.get('name')), + 'description': unescapeHTML(e.get('description')), + 'thumbnail': e.get('thumbnailUrl'), + 'duration': parse_duration(e.get('duration')), + 'timestamp': unified_timestamp(e.get('uploadDate')), + 'filesize': float_or_none(e.get('contentSize')), + 'tbr': int_or_none(e.get('bitrate')), + 'width': int_or_none(e.get('width')), + 'height': int_or_none(e.get('height')), + }) + break return dict((k, v) for k, v in info.items() if v is not None) @staticmethod |