[condenast] fix extraction and extract subtitles

author: Remita Amine <remitamine@gmail.com> 2020-11-16 18:57:33 +0100
committer: Remita Amine <remitamine@gmail.com> 2020-11-16 18:57:56 +0100
commit: 9448a203126105d6462299bddbe3a6a32bc017fd (patch)
tree: b1dfad92a58fdf5413e241bd8996ad820e2262f4
parent: 3f1748b9445e9d9367d29221c4b7bf9b88895e4e (diff)
1 files changed, 23 insertions, 4 deletions
diff --git a/youtube_dl/extractor/condenast.py b/youtube_dl/extractor/condenast.py
index ed278fefc..d5e77af32 100644
--- a/youtube_dl/extractor/condenast.py
+++ b/youtube_dl/extractor/condenast.py
@@ -16,6 +16,8 @@ from ..utils import (
     mimetype2ext,
     orderedSet,
     parse_iso8601,
+    strip_or_none,
+    try_get,
 )
 
 
@@ -82,6 +84,7 @@ class CondeNastIE(InfoExtractor):
             'uploader': 'gq',
             'upload_date': '20170321',
             'timestamp': 1490126427,
+            'description': 'How much grimmer would things be if these people were competent?',
         },
     }, {
         # JS embed
@@ -93,7 +96,7 @@ class CondeNastIE(InfoExtractor):
             'title': '3D printed TSA Travel Sentry keys really do open TSA locks',
             'uploader': 'arstechnica',
             'upload_date': '20150916',
-            'timestamp': 1442434955,
+            'timestamp': 1442434920,
         }
     }, {
         'url': 'https://player.cnevids.com/inline/video/59138decb57ac36b83000005.js?target=js-cne-player',
@@ -196,6 +199,13 @@ class CondeNastIE(InfoExtractor):
             })
         self._sort_formats(formats)
 
+        subtitles = {}
+        for t, caption in video_info.get('captions', {}).items():
+            caption_url = caption.get('src')
+            if not (t in ('vtt', 'srt', 'tml') and caption_url):
+                continue
+            subtitles.setdefault('en', []).append({'url': caption_url})
+
         return {
             'id': video_id,
             'formats': formats,
@@ -208,6 +218,7 @@ class CondeNastIE(InfoExtractor):
             'season': video_info.get('season_title'),
             'timestamp': parse_iso8601(video_info.get('premiere_date')),
             'categories': video_info.get('categories'),
+            'subtitles': subtitles,
         }
 
     def _real_extract(self, url):
@@ -225,8 +236,16 @@ class CondeNastIE(InfoExtractor):
         if url_type == 'series':
             return self._extract_series(url, webpage)
         else:
-            params = self._extract_video_params(webpage, display_id)
-            info = self._search_json_ld(
-                webpage, display_id, fatal=False)
+            video = try_get(self._parse_json(self._search_regex(
+                r'__PRELOADED_STATE__\s*=\s*({.+?});', webpage,
+                'preload state', '{}'), display_id),
+                lambda x: x['transformed']['video'])
+            if video:
+                params = {'videoId': video['id']}
+                info = {'description': strip_or_none(video.get('description'))}
+            else:
+                params = self._extract_video_params(webpage, display_id)
+                info = self._search_json_ld(
+                    webpage, display_id, fatal=False)
             info.update(self._extract_video(params))
             return info
author	Remita Amine <remitamine@gmail.com>	2020-11-16 18:57:33 +0100
committer	Remita Amine <remitamine@gmail.com>	2020-11-16 18:57:56 +0100
commit	9448a203126105d6462299bddbe3a6a32bc017fd (patch)
tree	b1dfad92a58fdf5413e241bd8996ad820e2262f4
parent	3f1748b9445e9d9367d29221c4b7bf9b88895e4e (diff)