diff options
author | Yen Chi Hsuan <yan12125@gmail.com> | 2017-02-03 01:28:24 +0800 |
---|---|---|
committer | Yen Chi Hsuan <yan12125@gmail.com> | 2017-02-03 01:28:24 +0800 |
commit | a22b2fd19bd8c08d50f884d1903486d4f00f76ec (patch) | |
tree | 5b1b33f9e525f1f170f3e347229981eea7a1471e /youtube_dl/extractor | |
parent | c54c01f82dba6d3e982c73c81ad71c49f31d8af1 (diff) |
[youtube] Fix ytsearch* when cookies are provided
Closes #11924
The API with `page` is no longer used in browsers, and YouTube always
returns {'reload': 'now'} when cookies are provided.
See http://youtube.github.io/spfjs/documentation/start/ for how SPF
works. Basically appending static link with a `spf` parameter yields the
corresponding dynamic link.
Diffstat (limited to 'youtube_dl/extractor')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 0e67fdd12..f2f751104 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos = [] limit = n + url_query = { + 'search_query': query.encode('utf-8'), + } + url_query.update(self._EXTRA_QUERY_ARGS) + result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) + for pagenum in itertools.count(1): - url_query = { - 'search_query': query.encode('utf-8'), - 'page': pagenum, - 'spf': 'navigate', - } - url_query.update(self._EXTRA_QUERY_ARGS) - result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query) data = self._download_json( result_url, video_id='query "%s"' % query, note='Downloading page %s' % pagenum, - errnote='Unable to download API page') + errnote='Unable to download API page', + query={'spf': 'navigate'}) html_content = data[1]['body']['content'] if 'class="search-message' in html_content: @@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): videos += new_videos if not new_videos or len(videos) > limit: break + next_link = self._html_search_regex( + r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next', + html_content, 'next link', default=None) + if next_link is None: + break + result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link) if len(videos) > n: videos = videos[:n] |