aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHobbyistDev <105957301+HobbyistDev@users.noreply.github.com>2024-01-19 08:27:25 +0900
committerGitHub <noreply@github.com>2024-01-19 00:27:25 +0100
commitcf6413e840476c15e5b166dc2f7cc2a90a4a9aad (patch)
treee1173cfb68602d992b5bc1abaec0b95d80378eb8
parent5498729c59b03a9511c64552da3ba2f802166f8d (diff)
[ie/BiliIntl] Fix and improve subtitles extraction (#7077)
Closes #7075, Closes #6664 Authored by: HobbyistDev, itachi-19, dirkf, seproDev Co-authored-by: itachi-19 <16500619+itachi-19@users.noreply.github.com> Co-authored-by: dirkf <fieldhouse@gmx.net> Co-authored-by: sepro <4618135+seproDev@users.noreply.github.com>
-rw-r--r--yt_dlp/extractor/bilibili.py42
1 files changed, 29 insertions, 13 deletions
diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
index bc25dc75e..5475b3650 100644
--- a/yt_dlp/extractor/bilibili.py
+++ b/yt_dlp/extractor/bilibili.py
@@ -18,6 +18,7 @@ from ..utils import (
OnDemandPagedList,
bool_or_none,
clean_html,
+ determine_ext,
filter_dict,
float_or_none,
format_field,
@@ -1658,19 +1659,34 @@ class BiliIntlBaseIE(InfoExtractor):
'aid': aid,
})) or {}
subtitles = {}
- for sub in sub_json.get('subtitles') or []:
- sub_url = sub.get('url')
- if not sub_url:
- continue
- sub_data = self._download_json(
- sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
- note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
- if not sub_data:
- continue
- subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
- 'ext': 'srt',
- 'data': self.json2srt(sub_data)
- })
+ fetched_urls = set()
+ for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
+ for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
+ if url in fetched_urls:
+ continue
+ fetched_urls.add(url)
+ sub_ext = determine_ext(url)
+ sub_lang = sub.get('lang_key') or 'en'
+
+ if sub_ext == 'ass':
+ subtitles.setdefault(sub_lang, []).append({
+ 'ext': 'ass',
+ 'url': url,
+ })
+ elif sub_ext == 'json':
+ sub_data = self._download_json(
+ url, ep_id or aid, fatal=False,
+ note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
+ errnote='Unable to download subtitles')
+
+ if sub_data:
+ subtitles.setdefault(sub_lang, []).append({
+ 'ext': 'srt',
+ 'data': self.json2srt(sub_data),
+ })
+ else:
+ self.report_warning('Unexpected subtitle extension', ep_id or aid)
+
return subtitles
def _get_formats(self, *, ep_id=None, aid=None):