aboutsummaryrefslogtreecommitdiff
path: root/yt_dlp/extractor/youtube/_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'yt_dlp/extractor/youtube/_search.py')
-rw-r--r--yt_dlp/extractor/youtube/_search.py167
1 files changed, 167 insertions, 0 deletions
diff --git a/yt_dlp/extractor/youtube/_search.py b/yt_dlp/extractor/youtube/_search.py
new file mode 100644
index 000000000..be10a20da
--- /dev/null
+++ b/yt_dlp/extractor/youtube/_search.py
@@ -0,0 +1,167 @@
+import urllib.parse
+
+from ._tab import YoutubeTabBaseInfoExtractor
+from ..common import SearchInfoExtractor
+from ...utils import join_nonempty, parse_qs
+
+
+class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
+ IE_DESC = 'YouTube search'
+ IE_NAME = 'youtube:search'
+ _SEARCH_KEY = 'ytsearch'
+ _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only
+ _TESTS = [{
+ 'url': 'ytsearch5:youtube-dl test video',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ },
+ }, {
+ 'note': 'Suicide/self-harm search warning',
+ 'url': 'ytsearch1:i hate myself and i wanna die',
+ 'playlist_count': 1,
+ 'info_dict': {
+ 'id': 'i hate myself and i wanna die',
+ 'title': 'i hate myself and i wanna die',
+ },
+ }]
+
+
+class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
+ IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
+ _SEARCH_KEY = 'ytsearchdate'
+ IE_DESC = 'YouTube search, newest videos first'
+ _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date
+ _TESTS = [{
+ 'url': 'ytsearchdate5:youtube-dl test video',
+ 'playlist_count': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ },
+ }]
+
+
+class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
+ IE_DESC = 'YouTube search URLs with sorting and filter support'
+ IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
+ _TESTS = [{
+ 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'youtube-dl test video',
+ 'title': 'youtube-dl test video',
+ },
+ }, {
+ 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D',
+ 'playlist_mincount': 5,
+ 'info_dict': {
+ 'id': 'python',
+ 'title': 'python',
+ },
+ }, {
+ 'url': 'https://www.youtube.com/results?search_query=%23cats',
+ 'playlist_mincount': 1,
+ 'info_dict': {
+ 'id': '#cats',
+ 'title': '#cats',
+ # The test suite does not have support for nested playlists
+ # 'entries': [{
+ # 'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
+ # 'title': '#cats',
+ # }],
+ },
+ }, {
+ # Channel results
+ 'url': 'https://www.youtube.com/results?search_query=kurzgesagt&sp=EgIQAg%253D%253D',
+ 'info_dict': {
+ 'id': 'kurzgesagt',
+ 'title': 'kurzgesagt',
+ },
+ 'playlist': [{
+ 'info_dict': {
+ '_type': 'url',
+ 'id': 'UCsXVk37bltHxD1rDPwtNM8Q',
+ 'url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
+ 'ie_key': 'YoutubeTab',
+ 'channel': 'Kurzgesagt – In a Nutshell',
+ 'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
+ 'title': 'Kurzgesagt – In a Nutshell',
+ 'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
+ # No longer available for search as it is set to the handle.
+ # 'playlist_count': int,
+ 'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
+ 'thumbnails': list,
+ 'uploader_id': '@kurzgesagt',
+ 'uploader_url': 'https://www.youtube.com/@kurzgesagt',
+ 'uploader': 'Kurzgesagt – In a Nutshell',
+ 'channel_is_verified': True,
+ 'channel_follower_count': int,
+ },
+ }],
+ 'params': {'extract_flat': True, 'playlist_items': '1'},
+ 'playlist_mincount': 1,
+ }, {
+ 'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
+ 'only_matching': True,
+ }]
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ query = (qs.get('search_query') or qs.get('q'))[0]
+ return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
+
+
+class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
+ IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
+ IE_NAME = 'youtube:music:search_url'
+ _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
+ _TESTS = [{
+ 'url': 'https://music.youtube.com/search?q=royalty+free+music',
+ 'playlist_count': 16,
+ 'info_dict': {
+ 'id': 'royalty free music',
+ 'title': 'royalty free music',
+ },
+ }, {
+ 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
+ 'playlist_mincount': 30,
+ 'info_dict': {
+ 'id': 'royalty free music - songs',
+ 'title': 'royalty free music - songs',
+ },
+ 'params': {'extract_flat': 'in_playlist'},
+ }, {
+ 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
+ 'playlist_mincount': 30,
+ 'info_dict': {
+ 'id': 'royalty free music - community playlists',
+ 'title': 'royalty free music - community playlists',
+ },
+ 'params': {'extract_flat': 'in_playlist'},
+ }]
+
+ _SECTIONS = {
+ 'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
+ 'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
+ 'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
+ 'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
+ 'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
+ 'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
+ }
+
+ def _real_extract(self, url):
+ qs = parse_qs(url)
+ query = (qs.get('search_query') or qs.get('q'))[0]
+ params = qs.get('sp', (None,))[0]
+ if params:
+ section = next((k for k, v in self._SECTIONS.items() if v == params), params)
+ else:
+ section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower()
+ params = self._SECTIONS.get(section)
+ if not params:
+ section = None
+ title = join_nonempty(query, section, delim=' - ')
+ return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)