diff options
Diffstat (limited to 'yt_dlp/extractor/youtube/_search.py')
-rw-r--r-- | yt_dlp/extractor/youtube/_search.py | 167 |
1 files changed, 167 insertions, 0 deletions
diff --git a/yt_dlp/extractor/youtube/_search.py b/yt_dlp/extractor/youtube/_search.py new file mode 100644 index 000000000..be10a20da --- /dev/null +++ b/yt_dlp/extractor/youtube/_search.py @@ -0,0 +1,167 @@ +import urllib.parse + +from ._tab import YoutubeTabBaseInfoExtractor +from ..common import SearchInfoExtractor +from ...utils import join_nonempty, parse_qs + + +class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): + IE_DESC = 'YouTube search' + IE_NAME = 'youtube:search' + _SEARCH_KEY = 'ytsearch' + _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only + _TESTS = [{ + 'url': 'ytsearch5:youtube-dl test video', + 'playlist_count': 5, + 'info_dict': { + 'id': 'youtube-dl test video', + 'title': 'youtube-dl test video', + }, + }, { + 'note': 'Suicide/self-harm search warning', + 'url': 'ytsearch1:i hate myself and i wanna die', + 'playlist_count': 1, + 'info_dict': { + 'id': 'i hate myself and i wanna die', + 'title': 'i hate myself and i wanna die', + }, + }] + + +class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): + IE_NAME = YoutubeSearchIE.IE_NAME + ':date' + _SEARCH_KEY = 'ytsearchdate' + IE_DESC = 'YouTube search, newest videos first' + _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date + _TESTS = [{ + 'url': 'ytsearchdate5:youtube-dl test video', + 'playlist_count': 5, + 'info_dict': { + 'id': 'youtube-dl test video', + 'title': 'youtube-dl test video', + }, + }] + + +class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): + IE_DESC = 'YouTube search URLs with sorting and filter support' + IE_NAME = YoutubeSearchIE.IE_NAME + '_url' + _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)' + _TESTS = [{ + 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'youtube-dl test video', + 'title': 'youtube-dl test video', + }, + }, { + 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', + 'playlist_mincount': 5, + 'info_dict': { + 'id': 'python', + 'title': 'python', + }, + }, { + 'url': 'https://www.youtube.com/results?search_query=%23cats', + 'playlist_mincount': 1, + 'info_dict': { + 'id': '#cats', + 'title': '#cats', + # The test suite does not have support for nested playlists + # 'entries': [{ + # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats', + # 'title': '#cats', + # }], + }, + }, { + # Channel results + 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D', + 'info_dict': { + 'id': 'kurzgesagt', + 'title': 'kurzgesagt', + }, + 'playlist': [{ + 'info_dict': { + '_type': 'url', + 'id': 'UCsXVk37bltHxD1rDPwtNM8Q', + 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q', + 'ie_key': 'YoutubeTab', + 'channel': 'Kurzgesagt – In a Nutshell', + 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc', + 'title': 'Kurzgesagt – In a Nutshell', + 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q', + # No longer available for search as it is set to the handle. + # 'playlist_count': int, + 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q', + 'thumbnails': list, + 'uploader_id': '@kurzgesagt', + 'uploader_url': 'https://www.youtube.com/@kurzgesagt', + 'uploader': 'Kurzgesagt – In a Nutshell', + 'channel_is_verified': True, + 'channel_follower_count': int, + }, + }], + 'params': {'extract_flat': True, 'playlist_items': '1'}, + 'playlist_mincount': 1, + }, { + 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB', + 'only_matching': True, + }] + + def _real_extract(self, url): + qs = parse_qs(url) + query = (qs.get('search_query') or qs.get('q'))[0] + return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query) + + +class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): + IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs' + IE_NAME = 'youtube:music:search_url' + _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)' + _TESTS = [{ + 'url': 'https://music.youtube.com/search?q=royalty+free+music', + 'playlist_count': 16, + 'info_dict': { + 'id': 'royalty free music', + 'title': 'royalty free music', + }, + }, { + 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D', + 'playlist_mincount': 30, + 'info_dict': { + 'id': 'royalty free music - songs', + 'title': 'royalty free music - songs', + }, + 'params': {'extract_flat': 'in_playlist'}, + }, { + 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists', + 'playlist_mincount': 30, + 'info_dict': { + 'id': 'royalty free music - community playlists', + 'title': 'royalty free music - community playlists', + }, + 'params': {'extract_flat': 'in_playlist'}, + }] + + _SECTIONS = { + 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==', + 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==', + 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF', + 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==', + 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==', + 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==', + } + + def _real_extract(self, url): + qs = parse_qs(url) + query = (qs.get('search_query') or qs.get('q'))[0] + params = qs.get('sp', (None,))[0] + if params: + section = next((k for k, v in self._SECTIONS.items() if v == params), params) + else: + section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower() + params = self._SECTIONS.get(section) + if not params: + section = None + title = join_nonempty(query, section, delim=' - ') + return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title) |