diff options
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 112 | 
1 files changed, 76 insertions, 36 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 02f3ab61a..bd1515380 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -3181,54 +3181,94 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):      _MAX_RESULTS = float('inf')      IE_NAME = 'youtube:search'      _SEARCH_KEY = 'ytsearch' -    _EXTRA_QUERY_ARGS = {} +    _SEARCH_PARAMS = None      _TESTS = [] -    def _get_n_results(self, query, n): -        """Get a specified number of results for a query""" - -        videos = [] -        limit = n - -        url_query = { -            'search_query': query.encode('utf-8'), +    def _entries(self, query, n): +        data = { +            'context': { +                'client': { +                    'clientName': 'WEB', +                    'clientVersion': '2.20201021.03.00', +                } +            }, +            'query': query,          } -        url_query.update(self._EXTRA_QUERY_ARGS) -        result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) - -        for pagenum in itertools.count(1): -            data = self._download_json( -                result_url, video_id='query "%s"' % query, -                note='Downloading page %s' % pagenum, -                errnote='Unable to download API page', -                query={'spf': 'navigate'}) -            html_content = data[1]['body']['content'] - -            if 'class="search-message' in html_content: -                raise ExtractorError( -                    '[youtube] No video results', expected=True) - -            new_videos = list(self._process_page(html_content)) -            videos += new_videos -            if not new_videos or len(videos) > limit: +        if self._SEARCH_PARAMS: +            data['params'] = self._SEARCH_PARAMS +        total = 0 +        for page_num in itertools.count(1): +            search = self._download_json( +                'https://www.youtube.com/youtubei/v1/search?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', +                video_id='query "%s"' % query, +                note='Downloading page %s' % page_num, +                errnote='Unable to download API page', fatal=False, +                data=json.dumps(data).encode('utf8'), +                headers={'content-type': 'application/json'}) +            if not search:                  break -            next_link = self._html_search_regex( -                r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', -                html_content, 'next link', default=None) -            if next_link is None: +            slr_contents = try_get( +                search, +                (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], +                 lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), +                list) +            if not slr_contents:                  break -            result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) +            isr_contents = try_get( +                slr_contents, +                lambda x: x[0]['itemSectionRenderer']['contents'], +                list) +            if not isr_contents: +                break +            for content in isr_contents: +                if not isinstance(content, dict): +                    continue +                video = content.get('videoRenderer') +                if not isinstance(video, dict): +                    continue +                video_id = video.get('videoId') +                if not video_id: +                    continue +                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) +                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) +                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) +                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' +                view_count = int_or_none(self._search_regex( +                    r'^(\d+)', re.sub(r'\s', '', view_count_text), +                    'view count', default=None)) +                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) +                total += 1 +                yield { +                    '_type': 'url_transparent', +                    'ie_key': YoutubeIE.ie_key(), +                    'id': video_id, +                    'url': video_id, +                    'title': title, +                    'description': description, +                    'duration': duration, +                    'view_count': view_count, +                    'uploader': uploader, +                } +                if total == n: +                    return +            token = try_get( +                slr_contents, +                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], +                compat_str) +            if not token: +                break +            data['continuation'] = token -        if len(videos) > n: -            videos = videos[:n] -        return self.playlist_result(videos, query) +    def _get_n_results(self, query, n): +        """Get a specified number of results for a query""" +        return self.playlist_result(self._entries(query, n), query)  class YoutubeSearchDateIE(YoutubeSearchIE):      IE_NAME = YoutubeSearchIE.IE_NAME + ':date'      _SEARCH_KEY = 'ytsearchdate'      IE_DESC = 'YouTube.com searches, newest videos first' -    _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} +    _SEARCH_PARAMS = 'CAI%3D'  class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor): | 
