diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 51 | 
1 files changed, 26 insertions, 25 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b2ae08418..f1a5c0077 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1458,54 +1458,55 @@ class YoutubeUserIE(YoutubeChannelIE):              return super(YoutubeUserIE, cls).suitable(url) -class YoutubeSearchIE(SearchInfoExtractor): +class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):      IE_DESC = 'YouTube.com searches' -    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc' -    _MAX_RESULTS = 1000 +    # there doesn't appear to be a real limit, for example if you search for +    # 'python' you get more than 8.000.000 results +    _MAX_RESULTS = float('inf')      IE_NAME = 'youtube:search'      _SEARCH_KEY = 'ytsearch' +    _EXTRA_QUERY_ARGS = {}      def _get_n_results(self, query, n):          """Get a specified number of results for a query""" -        video_ids = [] -        pagenum = 0 +        videos = []          limit = n -        PAGE_SIZE = 50 -        while (PAGE_SIZE * pagenum) < limit: -            result_url = self._API_URL % ( -                compat_urllib_parse.quote_plus(query.encode('utf-8')), -                max((PAGE_SIZE * pagenum) + 1), 2) -            data_json = self._download_webpage( +        for pagenum in itertools.count(1): +            url_query = { +                'search_query': query, +                'page': pagenum, +                'spf': 'navigate', +            } +            url_query.update(self._EXTRA_QUERY_ARGS) +            result_url = 'https://www.youtube.com/results?' + compat_urllib_parse.urlencode(url_query) +            data = self._download_json(                  result_url, video_id='query "%s"' % query, -                note='Downloading page %s' % (pagenum + 1), +                note='Downloading page %s' % pagenum,                  errnote='Unable to download API page') -            data = json.loads(data_json) -            api_response = data['data'] +            html_content = data[1]['body']['content'] -            if 'items' not in api_response: +            if 'class="search-message' in html_content:                  raise ExtractorError(                      '[youtube] No video results', expected=True) -            new_ids = list(video['id'] for video in api_response['items']) -            video_ids += new_ids - -            limit = min(n, api_response['totalItems']) -            pagenum += 1 +            new_videos = self._ids_to_results(orderedSet(re.findall( +                r'href="/watch\?v=(.{11})', html_content))) +            videos += new_videos +            if not new_videos or len(videos) > limit: +                break -        if len(video_ids) > n: -            video_ids = video_ids[:n] -        videos = [self.url_result(video_id, 'Youtube', video_id=video_id) -                  for video_id in video_ids] +        if len(videos) > n: +            videos = videos[:n]          return self.playlist_result(videos, query)  class YoutubeSearchDateIE(YoutubeSearchIE):      IE_NAME = YoutubeSearchIE.IE_NAME + ':date' -    _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'      _SEARCH_KEY = 'ytsearchdate'      IE_DESC = 'YouTube.com searches, newest videos first' +    _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}  class YoutubeSearchURLIE(InfoExtractor): | 
