aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYen Chi Hsuan <yan12125@gmail.com>2017-02-03 01:28:24 +0800
committerYen Chi Hsuan <yan12125@gmail.com>2017-02-03 01:28:24 +0800
commita22b2fd19bd8c08d50f884d1903486d4f00f76ec (patch)
tree5b1b33f9e525f1f170f3e347229981eea7a1471e
parentc54c01f82dba6d3e982c73c81ad71c49f31d8af1 (diff)
[youtube] Fix ytsearch* when cookies are provided
Closes #11924 The API with `page` is no longer used in browsers, and YouTube always returns {'reload': 'now'} when cookies are provided. See http://youtube.github.io/spfjs/documentation/start/ for how SPF works. Basically appending static link with a `spf` parameter yields the corresponding dynamic link.
-rw-r--r--ChangeLog1
-rw-r--r--youtube_dl/extractor/youtube.py22
2 files changed, 15 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index c27907f51..c80126cfb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,6 +1,7 @@
version <unreleased>
Extractors
+* [youtube] Fix ytsearch when cookies are provided (#11924)
+ [bilibili] Support new Bangumi URLs (#11845)
version 2017.02.01
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 0e67fdd12..f2f751104 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -2348,18 +2348,18 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
videos = []
limit = n
+ url_query = {
+ 'search_query': query.encode('utf-8'),
+ }
+ url_query.update(self._EXTRA_QUERY_ARGS)
+ result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
+
for pagenum in itertools.count(1):
- url_query = {
- 'search_query': query.encode('utf-8'),
- 'page': pagenum,
- 'spf': 'navigate',
- }
- url_query.update(self._EXTRA_QUERY_ARGS)
- result_url = 'https://www.youtube.com/results?' + compat_urllib_parse_urlencode(url_query)
data = self._download_json(
result_url, video_id='query "%s"' % query,
note='Downloading page %s' % pagenum,
- errnote='Unable to download API page')
+ errnote='Unable to download API page',
+ query={'spf': 'navigate'})
html_content = data[1]['body']['content']
if 'class="search-message' in html_content:
@@ -2371,6 +2371,12 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
videos += new_videos
if not new_videos or len(videos) > limit:
break
+ next_link = self._html_search_regex(
+ r'href="(/results\?[^"]*\bsp=[^"]+)"[^>]*>\s*<span[^>]+class="[^"]*\byt-uix-button-content\b[^"]*"[^>]*>Next',
+ html_content, 'next link', default=None)
+ if next_link is None:
+ break
+ result_url = compat_urlparse.urljoin('https://www.youtube.com/', next_link)
if len(videos) > n:
videos = videos[:n]