diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-08-05 23:14:32 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-08-05 23:14:32 +0700 | 
| commit | 46933a15d69a0079e66a04475659d0cbd5e5f08d (patch) | |
| tree | 61f54d851b3cfb2b21a36b4cda9e04d211562df9 /youtube_dl/extractor/common.py | |
| parent | 3859ebeee6d6448240176ef5e4c20f6b1d1db795 (diff) | |
[extractor/common] Support root JSON-LD lists (Closes #10203)
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 76 | 
1 files changed, 41 insertions, 35 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 2d337d614..70909fc1c 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -828,41 +828,47 @@ class InfoExtractor(object):          if not json_ld:              return {}          info = {} -        if json_ld.get('@context') == 'http://schema.org': -            item_type = json_ld.get('@type') -            if expected_type is not None and expected_type != item_type: -                return info -            if item_type == 'TVEpisode': -                info.update({ -                    'episode': unescapeHTML(json_ld.get('name')), -                    'episode_number': int_or_none(json_ld.get('episodeNumber')), -                    'description': unescapeHTML(json_ld.get('description')), -                }) -                part_of_season = json_ld.get('partOfSeason') -                if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': -                    info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) -                part_of_series = json_ld.get('partOfSeries') -                if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': -                    info['series'] = unescapeHTML(part_of_series.get('name')) -            elif item_type == 'Article': -                info.update({ -                    'timestamp': parse_iso8601(json_ld.get('datePublished')), -                    'title': unescapeHTML(json_ld.get('headline')), -                    'description': unescapeHTML(json_ld.get('articleBody')), -                }) -            elif item_type == 'VideoObject': -                info.update({ -                    'url': json_ld.get('contentUrl'), -                    'title': unescapeHTML(json_ld.get('name')), -                    'description': unescapeHTML(json_ld.get('description')), -                    'thumbnail': json_ld.get('thumbnailUrl'), -                    'duration': parse_duration(json_ld.get('duration')), -                    'timestamp': unified_timestamp(json_ld.get('uploadDate')), -                    'filesize': float_or_none(json_ld.get('contentSize')), -                    'tbr': int_or_none(json_ld.get('bitrate')), -                    'width': int_or_none(json_ld.get('width')), -                    'height': int_or_none(json_ld.get('height')), -                }) +        if not isinstance(json_ld, (list, tuple, dict)): +            return info +        if isinstance(json_ld, dict): +            json_ld = [json_ld] +        for e in json_ld: +            if e.get('@context') == 'http://schema.org': +                item_type = e.get('@type') +                if expected_type is not None and expected_type != item_type: +                    return info +                if item_type == 'TVEpisode': +                    info.update({ +                        'episode': unescapeHTML(e.get('name')), +                        'episode_number': int_or_none(e.get('episodeNumber')), +                        'description': unescapeHTML(e.get('description')), +                    }) +                    part_of_season = e.get('partOfSeason') +                    if isinstance(part_of_season, dict) and part_of_season.get('@type') == 'TVSeason': +                        info['season_number'] = int_or_none(part_of_season.get('seasonNumber')) +                    part_of_series = e.get('partOfSeries') +                    if isinstance(part_of_series, dict) and part_of_series.get('@type') == 'TVSeries': +                        info['series'] = unescapeHTML(part_of_series.get('name')) +                elif item_type == 'Article': +                    info.update({ +                        'timestamp': parse_iso8601(e.get('datePublished')), +                        'title': unescapeHTML(e.get('headline')), +                        'description': unescapeHTML(e.get('articleBody')), +                    }) +                elif item_type == 'VideoObject': +                    info.update({ +                        'url': e.get('contentUrl'), +                        'title': unescapeHTML(e.get('name')), +                        'description': unescapeHTML(e.get('description')), +                        'thumbnail': e.get('thumbnailUrl'), +                        'duration': parse_duration(e.get('duration')), +                        'timestamp': unified_timestamp(e.get('uploadDate')), +                        'filesize': float_or_none(e.get('contentSize')), +                        'tbr': int_or_none(e.get('bitrate')), +                        'width': int_or_none(e.get('width')), +                        'height': int_or_none(e.get('height')), +                    }) +                break          return dict((k, v) for k, v in info.items() if v is not None)      @staticmethod  | 
