aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilipp Hagemeister <phihag@phihag.de>2014-03-04 03:32:28 +0100
committerPhilipp Hagemeister <phihag@phihag.de>2014-03-04 03:32:28 +0100
commitc9ae7b95659df18a296752f31162e57d29777a80 (patch)
tree38134be9e17934b41849e69872479201761f97c3
parent86fb4347f7decdc2c6e58a4ec4cb14986536ea67 (diff)
downloadyoutube-dl-c9ae7b95659df18a296752f31162e57d29777a80.tar.xz
[youtube] Add support for search result URLs (Fixes #2495)
-rw-r--r--test/test_all_urls.py4
-rw-r--r--test/test_youtube_lists.py10
-rw-r--r--youtube_dl/YoutubeDL.py2
-rw-r--r--youtube_dl/extractor/__init__.py15
-rw-r--r--youtube_dl/extractor/youtube.py40
5 files changed, 62 insertions, 9 deletions
diff --git a/test/test_all_urls.py b/test/test_all_urls.py
index 7a78005a3..5c17a39fb 100644
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):
def test_youtube_truncated(self):
self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url'])
+ def test_youtube_search_matching(self):
+ self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url'])
+ self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url'])
+
def test_justin_tv_channelid_matching(self):
self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))
self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv"))
diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py
index 5eccc11ff..7d3b9c705 100644
--- a/test/test_youtube_lists.py
+++ b/test/test_youtube_lists.py
@@ -16,6 +16,7 @@ from youtube_dl.extractor import (
YoutubeChannelIE,
YoutubeShowIE,
YoutubeTopListIE,
+ YoutubeSearchURLIE,
)
@@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):
entries = result['entries']
self.assertTrue(len(entries) >= 5)
+ def test_youtube_search_url(self):
+ dl = FakeYDL()
+ ie = YoutubeSearchURLIE(dl)
+ result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video')
+ entries = result['entries']
+ self.assertIsPlaylist(result)
+ self.assertEqual(result['title'], 'youtube-dl test video')
+ self.assertTrue(len(entries) >= 5)
+
if __name__ == '__main__':
unittest.main()
diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py
index 47205efae..bbfdfb444 100644
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -675,7 +675,7 @@ class YoutubeDL(object):
info_dict['playlist'] = None
info_dict['playlist_index'] = None
- if 'display_id' not in info_dict:
+ if 'display_id' not in info_dict and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
# This extractors handle format selection themselves
diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py
index e6755151c..f35ee4941 100644
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -285,19 +285,20 @@ from .youku import YoukuIE
from .youporn import YouPornIE
from .youtube import (
YoutubeIE,
+ YoutubeChannelIE,
+ YoutubeFavouritesIE,
+ YoutubeHistoryIE,
YoutubePlaylistIE,
- YoutubeSearchIE,
+ YoutubeRecommendedIE,
YoutubeSearchDateIE,
- YoutubeUserIE,
- YoutubeChannelIE,
+ YoutubeSearchIE,
+ YoutubeSearchURLIE,
YoutubeShowIE,
YoutubeSubscriptionsIE,
- YoutubeRecommendedIE,
+ YoutubeTopListIE,
YoutubeTruncatedURLIE,
+ YoutubeUserIE,
YoutubeWatchLaterIE,
- YoutubeFavouritesIE,
- YoutubeHistoryIE,
- YoutubeTopListIE,
)
from .zdf import ZDFIE
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index b40a45384..166a0cf70 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1645,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor):
class YoutubeUserIE(InfoExtractor):
IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)'
- _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
+ _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'
_TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'
_GDATA_PAGE_SIZE = 50
_GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json'
@@ -1744,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor):
for video_id in video_ids]
return self.playlist_result(videos, query)
+
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'
_SEARCH_KEY = 'ytsearchdate'
IE_DESC = u'YouTube.com searches, newest videos first'
+
+class YoutubeSearchURLIE(InfoExtractor):
+ IE_DESC = u'YouTube.com search URLs'
+ IE_NAME = u'youtube:search_url'
+ _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)'
+
+ def _real_extract(self, url):
+ mobj = re.match(self._VALID_URL, url)
+ query = compat_urllib_parse.unquote_plus(mobj.group('query'))
+
+ webpage = self._download_webpage(url, query)
+ result_code = self._search_regex(
+ r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML')
+
+ part_codes = re.findall(
+ r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code)
+ entries = []
+ for part_code in part_codes:
+ part_title = self._html_search_regex(
+ r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False)
+ part_url_snippet = self._html_search_regex(
+ r'(?s)href="([^"]+)"', part_code, 'item URL')
+ part_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com/', part_url_snippet)
+ entries.append({
+ '_type': 'url',
+ 'url': part_url,
+ 'title': part_title,
+ })
+
+ return {
+ '_type': 'playlist',
+ 'entries': entries,
+ 'title': query,
+ }
+
+
class YoutubeShowIE(InfoExtractor):
IE_DESC = u'YouTube.com (multi-season) shows'
_VALID_URL = r'https?://www\.youtube\.com/show/(.*)'