diff options
| author | Remita Amine <remitamine@gmail.com> | 2020-11-16 18:57:33 +0100 | 
|---|---|---|
| committer | Remita Amine <remitamine@gmail.com> | 2020-11-16 18:57:56 +0100 | 
| commit | 9448a203126105d6462299bddbe3a6a32bc017fd (patch) | |
| tree | b1dfad92a58fdf5413e241bd8996ad820e2262f4 | |
| parent | 3f1748b9445e9d9367d29221c4b7bf9b88895e4e (diff) | |
[condenast] fix extraction and extract subtitles
| -rw-r--r-- | youtube_dl/extractor/condenast.py | 27 | 
1 files changed, 23 insertions, 4 deletions
diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py index ed278fefc..d5e77af32 100644 --- a/youtube_dl/extractor/condenast.py +++ b/youtube_dl/extractor/condenast.py @@ -16,6 +16,8 @@ from ..utils import (      mimetype2ext,      orderedSet,      parse_iso8601, +    strip_or_none, +    try_get,  ) @@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):              'uploader': 'gq',              'upload_date': '20170321',              'timestamp': 1490126427, +            'description': 'How much grimmer would things be if these people were competent?',          },      }, {          # JS embed @@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):              'title': '3D printed TSA Travel Sentry keys really do open TSA locks',              'uploader': 'arstechnica',              'upload_date': '20150916', -            'timestamp': 1442434955, +            'timestamp': 1442434920,          }      }, {          'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player', @@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):              })          self._sort_formats(formats) +        subtitles = {} +        for t, caption in video_info.get('captions', {}).items(): +            caption_url = caption.get('src') +            if not (t in ('vtt', 'srt', 'tml') and caption_url): +                continue +            subtitles.setdefault('en', []).append({'url': caption_url}) +          return {              'id': video_id,              'formats': formats, @@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):              'season': video_info.get('season_title'),              'timestamp': parse_iso8601(video_info.get('premiere_date')),              'categories': video_info.get('categories'), +            'subtitles': subtitles,          }      def _real_extract(self, url): @@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):          if url_type == 'series':              return self._extract_series(url, webpage)          else: -            params = self._extract_video_params(webpage, display_id) -            info = self._search_json_ld( -                webpage, display_id, fatal=False) +            video = try_get(self._parse_json(self._search_regex( +                r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage, +                'preload state', '{}'), display_id), +                lambda x: x['transformed']['video']) +            if video: +                params = {'videoId': video['id']} +                info = {'description': strip_or_none(video.get('description'))} +            else: +                params = self._extract_video_params(webpage, display_id) +                info = self._search_json_ld( +                    webpage, display_id, fatal=False)              info.update(self._extract_video(params))              return info  | 
