diff options
author | Sergey M․ <dstftw@gmail.com> | 2020-11-20 23:21:52 +0700 |
---|---|---|
committer | Sergey M․ <dstftw@gmail.com> | 2020-11-20 23:21:52 +0700 |
commit | b31b5f4434b52816f3a5a1ae2cbe1d162be0fbd0 (patch) | |
tree | 34ca3998ccf5d8b12a35b728106234833e8ce7cb /youtube_dl | |
parent | 86f2fa1590991fffae7b1daacae9164771312c0b (diff) |
[youtube] Improve yt initial data extraction (closes #27093)
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 79f87aa85..a85aede8e 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -283,6 +283,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): }, } + _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' + def _call_api(self, ep, query, video_id): data = self._DEFAULT_API_DATA.copy() data.update(query) @@ -299,8 +301,8 @@ class YoutubeBaseInfoExtractor(InfoExtractor): def _extract_yt_initial_data(self, video_id, webpage): return self._parse_json( self._search_regex( - r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;', - webpage, 'yt initial data'), + (r'%s\s*\n' % self._YT_INITIAL_DATA_RE, + self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), video_id) @@ -1066,6 +1068,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'skip_download': True, }, }, + { + # with '};' inside yt initial data (see https://github.com/ytdl-org/youtube-dl/issues/27093) + 'url': 'https://www.youtube.com/watch?v=CHqg6qOn4no', + 'info_dict': { + 'id': 'CHqg6qOn4no', + 'ext': 'mp4', + 'title': 'Part 77 Sort a list of simple types in c#', + 'description': 'md5:b8746fa52e10cdbf47997903f13b20dc', + 'upload_date': '20130831', + 'uploader_id': 'kudvenkat', + 'uploader': 'kudvenkat', + }, + 'params': { + 'skip_download': True, + }, + }, ] def __init__(self, *args, **kwargs): |