diff options
| -rw-r--r-- | test/test_all_urls.py | 4 | ||||
| -rw-r--r-- | test/test_youtube_lists.py | 10 | ||||
| -rw-r--r-- | youtube_dl/YoutubeDL.py | 2 | ||||
| -rw-r--r-- | youtube_dl/extractor/__init__.py | 15 | ||||
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 40 | 
5 files changed, 62 insertions, 9 deletions
diff --git a/test/test_all_urls.py b/test/test_all_urls.py index 7a78005a3..5c17a39fb 100644 --- a/test/test_all_urls.py +++ b/test/test_all_urls.py @@ -71,6 +71,10 @@ class TestAllURLsMatching(unittest.TestCase):      def test_youtube_truncated(self):          self.assertMatch('http://www.youtube.com/watch?', ['youtube:truncated_url']) +    def test_youtube_search_matching(self): +        self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) +        self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) +      def test_justin_tv_channelid_matching(self):          self.assertTrue(JustinTVIE.suitable(u"justin.tv/vanillatv"))          self.assertTrue(JustinTVIE.suitable(u"twitch.tv/vanillatv")) diff --git a/test/test_youtube_lists.py b/test/test_youtube_lists.py index 5eccc11ff..7d3b9c705 100644 --- a/test/test_youtube_lists.py +++ b/test/test_youtube_lists.py @@ -16,6 +16,7 @@ from youtube_dl.extractor import (      YoutubeChannelIE,      YoutubeShowIE,      YoutubeTopListIE, +    YoutubeSearchURLIE,  ) @@ -133,5 +134,14 @@ class TestYoutubeLists(unittest.TestCase):          entries = result['entries']          self.assertTrue(len(entries) >= 5) +    def test_youtube_search_url(self): +        dl = FakeYDL() +        ie = YoutubeSearchURLIE(dl) +        result = ie.extract('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video') +        entries = result['entries'] +        self.assertIsPlaylist(result) +        self.assertEqual(result['title'], 'youtube-dl test video') +        self.assertTrue(len(entries) >= 5) +  if __name__ == '__main__':      unittest.main() diff --git a/youtube_dl/YoutubeDL.py b/youtube_dl/YoutubeDL.py index 47205efae..bbfdfb444 100644 --- a/youtube_dl/YoutubeDL.py +++ b/youtube_dl/YoutubeDL.py @@ -675,7 +675,7 @@ class YoutubeDL(object):              info_dict['playlist'] = None              info_dict['playlist_index'] = None -        if 'display_id' not in info_dict: +        if 'display_id' not in info_dict and 'id' in info_dict:              info_dict['display_id'] = info_dict['id']          # This extractors handle format selection themselves diff --git a/youtube_dl/extractor/__init__.py b/youtube_dl/extractor/__init__.py index e6755151c..f35ee4941 100644 --- a/youtube_dl/extractor/__init__.py +++ b/youtube_dl/extractor/__init__.py @@ -285,19 +285,20 @@ from .youku import YoukuIE  from .youporn import YouPornIE  from .youtube import (      YoutubeIE, +    YoutubeChannelIE, +    YoutubeFavouritesIE, +    YoutubeHistoryIE,      YoutubePlaylistIE, -    YoutubeSearchIE, +    YoutubeRecommendedIE,      YoutubeSearchDateIE, -    YoutubeUserIE, -    YoutubeChannelIE, +    YoutubeSearchIE, +    YoutubeSearchURLIE,      YoutubeShowIE,      YoutubeSubscriptionsIE, -    YoutubeRecommendedIE, +    YoutubeTopListIE,      YoutubeTruncatedURLIE, +    YoutubeUserIE,      YoutubeWatchLaterIE, -    YoutubeFavouritesIE, -    YoutubeHistoryIE, -    YoutubeTopListIE,  )  from .zdf import ZDFIE diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index b40a45384..166a0cf70 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1645,7 +1645,7 @@ class YoutubeChannelIE(InfoExtractor):  class YoutubeUserIE(InfoExtractor):      IE_DESC = u'YouTube.com user videos (URL or "ytuser" keyword)' -    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)' +    _VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:user/)?(?!(?:attribution_link|watch|results)(?:$|[^a-z_A-Z0-9-])))|ytuser:)(?!feed/)([A-Za-z0-9_-]+)'      _TEMPLATE_URL = 'https://gdata.youtube.com/feeds/api/users/%s'      _GDATA_PAGE_SIZE = 50      _GDATA_URL = 'https://gdata.youtube.com/feeds/api/users/%s/uploads?max-results=%d&start-index=%d&alt=json' @@ -1744,12 +1744,50 @@ class YoutubeSearchIE(SearchInfoExtractor):                    for video_id in video_ids]          return self.playlist_result(videos, query) +  class YoutubeSearchDateIE(YoutubeSearchIE):      IE_NAME = YoutubeSearchIE.IE_NAME + ':date'      _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published'      _SEARCH_KEY = 'ytsearchdate'      IE_DESC = u'YouTube.com searches, newest videos first' + +class YoutubeSearchURLIE(InfoExtractor): +    IE_DESC = u'YouTube.com search URLs' +    IE_NAME = u'youtube:search_url' +    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?search_query=(?P<query>[^&]+)(?:[&]|$)' + +    def _real_extract(self, url): +        mobj = re.match(self._VALID_URL, url) +        query = compat_urllib_parse.unquote_plus(mobj.group('query')) + +        webpage = self._download_webpage(url, query) +        result_code = self._search_regex( +            r'(?s)<ol id="search-results"(.*?)</ol>', webpage, u'result HTML') + +        part_codes = re.findall( +            r'(?s)<h3 class="yt-lockup-title">(.*?)</h3>', result_code) +        entries = [] +        for part_code in part_codes: +            part_title = self._html_search_regex( +                r'(?s)title="([^"]+)"', part_code, 'item title', fatal=False) +            part_url_snippet = self._html_search_regex( +                r'(?s)href="([^"]+)"', part_code, 'item URL') +            part_url = compat_urlparse.urljoin( +                'https://www.youtube.com/', part_url_snippet) +            entries.append({ +                '_type': 'url', +                'url': part_url, +                'title': part_title, +            }) + +        return { +            '_type': 'playlist', +            'entries': entries, +            'title': query, +        } + +  class YoutubeShowIE(InfoExtractor):      IE_DESC = u'YouTube.com (multi-season) shows'      _VALID_URL = r'https?://www\.youtube\.com/show/(.*)'  | 
