aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authordirkf <fieldhouse@gmx.net>2025-09-28 07:09:31 +0100
committerdirkf <fieldhouse@gmx.net>2025-09-29 02:15:05 +0100
commit4222c6d78b63440849ad6a886a2ff5b607f3feae (patch)
tree3fcc54e2aabf37b9ecfbe3987a35e48971bda458
parent2735d1bf1d1f947891776330cb58d792d03cf436 (diff)
[YouTube] Extract fallback title and description from initial data
Based on yt-dlp/yt-dlp#14078, thx bashonly
-rw-r--r--youtube_dl/extractor/youtube.py31
1 files changed, 25 insertions, 6 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index d32c9df99..dea109eae 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2849,6 +2849,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
initial_data = self._call_api(
'next', {'videoId': video_id}, video_id, fatal=False)
+ initial_sdcr = None
if initial_data:
chapters = self._extract_chapters_from_json(
initial_data, video_id, duration)
@@ -2976,12 +2977,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
info['track'] = mrr_contents_text
# this is not extraction but spelunking!
- carousel_lockups = traverse_obj(
- initial_data,
- ('engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
- 'content', 'structuredDescriptionContentRenderer', 'items', Ellipsis,
- 'videoDescriptionMusicSectionRenderer', 'carouselLockups', Ellipsis),
- expected_type=dict) or []
+ initial_sdcr = traverse_obj(initial_data, (
+ 'engagementPanels', Ellipsis, 'engagementPanelSectionListRenderer',
+ 'content', 'structuredDescriptionContentRenderer', T(dict)),
+ get_all=False)
+ carousel_lockups = traverse_obj(initial_sdcr, (
+ 'items', Ellipsis, 'videoDescriptionMusicSectionRenderer',
+ 'carouselLockups', Ellipsis, T(dict))) or []
# try to reproduce logic from metadataRowContainerRenderer above (if it still is)
fields = (('ALBUM', 'album'), ('ARTIST', 'artist'), ('SONG', 'track'), ('LICENSES', 'license'))
# multiple_songs ?
@@ -3006,6 +3008,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
self.mark_watched(video_id, player_response)
+ # Fallbacks for missing metadata
+ if initial_sdcr:
+ if info.get('description') is None:
+ info['description'] = traverse_obj(initial_sdcr, (
+ 'items', Ellipsis, 'expandableVideoDescriptionBodyRenderer',
+ 'attributedDescriptionBodyText', 'content', T(compat_str)),
+ get_all=False)
+ # videoDescriptionHeaderRenderer also has publishDate/channel/handle/ucid, but not needed
+ if info.get('title') is None:
+ info['title'] = traverse_obj(
+ (initial_sdcr, initial_data),
+ (0, 'items', Ellipsis, 'videoDescriptionHeaderRenderer', T(dict)),
+ (1, 'playerOverlays', 'playerOverlayRenderer', 'videoDetails',
+ 'playerOverlayVideoDetailsRenderer', T(dict)),
+ expected_type=lambda x: self._get_text(x, 'title'),
+ get_all=False)
+
return merge_dicts(
info, {
'uploader_id': self._extract_uploader_id(owner_profile_url),