aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgarret <garret1317@yandex.com>2023-10-07 00:05:48 +0100
committerGitHub <noreply@github.com>2023-10-06 23:05:48 +0000
commite831c80e8b2fc025b3b67d82974cc59e3526fdc8 (patch)
treee07fad5c45252a953217959abef19c57574329b0
parent0e722f2f3ca42e634fd7b06ee70b16bf833ce132 (diff)
[ie/nhk] Fix VOD extraction (#8249)
Closes #8242 Authored by: garret1317
-rw-r--r--yt_dlp/extractor/nhk.py46
1 files changed, 43 insertions, 3 deletions
diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py
index fbd6a18f6..bcbc2279f 100644
--- a/yt_dlp/extractor/nhk.py
+++ b/yt_dlp/extractor/nhk.py
@@ -28,6 +28,44 @@ class NhkBaseIE(InfoExtractor):
m_id, lang, '/all' if is_video else ''),
m_id, query={'apikey': 'EJfK8jdS57GqlupFgAfAAwr573q01y6k'})['data']['episodes'] or []
+ def _get_api_info(self, refresh=True):
+ if not refresh:
+ return self.cache.load('nhk', 'api_info')
+
+ self.cache.store('nhk', 'api_info', {})
+ movie_player_js = self._download_webpage(
+ 'https://movie-a.nhk.or.jp/world/player/js/movie-player.js', None,
+ note='Downloading stream API information')
+ api_info = {
+ 'url': self._search_regex(
+ r'prod:[^;]+\bapiUrl:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API url'),
+ 'token': self._search_regex(
+ r'prod:[^;]+\btoken:\s*[\'"]([^\'"]+)[\'"]', movie_player_js, None, 'stream API token'),
+ }
+ self.cache.store('nhk', 'api_info', api_info)
+ return api_info
+
+ def _extract_formats_and_subtitles(self, vod_id):
+ for refresh in (False, True):
+ api_info = self._get_api_info(refresh)
+ if not api_info:
+ continue
+
+ api_url = api_info.pop('url')
+ stream_url = traverse_obj(
+ self._download_json(
+ api_url, vod_id, 'Downloading stream url info', fatal=False, query={
+ **api_info,
+ 'type': 'json',
+ 'optional_id': vod_id,
+ 'active_flg': 1,
+ }),
+ ('meta', 0, 'movie_url', ('mb_auto', 'auto_sp', 'auto_pc'), {url_or_none}), get_all=False)
+ if stream_url:
+ return self._extract_m3u8_formats_and_subtitles(stream_url, vod_id)
+
+ raise ExtractorError('Unable to extract stream url')
+
def _extract_episode_info(self, url, episode=None):
fetch_episode = episode is None
lang, m_type, episode_id = NhkVodIE._match_valid_url(url).groups()
@@ -67,12 +105,14 @@ class NhkBaseIE(InfoExtractor):
}
if is_video:
vod_id = episode['vod_id']
+ formats, subs = self._extract_formats_and_subtitles(vod_id)
+
info.update({
- '_type': 'url_transparent',
- 'ie_key': 'Piksel',
- 'url': 'https://movie-s.nhk.or.jp/v/refid/nhkworld/prefid/' + vod_id,
'id': vod_id,
+ 'formats': formats,
+ 'subtitles': subs,
})
+
else:
if fetch_episode:
audio_path = episode['audio']['audio']