diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 52 |
1 files changed, 17 insertions, 35 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index dc601de52..c992cba97 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -340,18 +340,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): } }, { - u"url": u"http://www.youtube.com/watch?v=1ltcDfZMA3U", - u"file": u"1ltcDfZMA3U.mp4", - u"note": u"Test VEVO video (#897)", - u"info_dict": { - u"upload_date": u"20070518", - u"title": u"Maps - It Will Find You", - u"description": u"Music video by Maps performing It Will Find You.", - u"uploader": u"MuteUSA", - u"uploader_id": u"MuteUSA" - } - }, - { u"url": u"http://www.youtube.com/watch?v=UxxajLWwzqY", u"file": u"UxxajLWwzqY.mp4", u"note": u"Test generic use_cipher_signature video (#897)", @@ -1094,7 +1082,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): else: raise ExtractorError(u'Unable to decrypt signature, key length %d not supported; retrying might work' % (len(s))) - def _get_available_subtitles(self, video_id): + def _get_available_subtitles(self, video_id, webpage): try: sub_list = self._download_webpage( 'http://video.google.com/timedtext?hl=en&type=list&v=%s' % video_id, @@ -1497,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor): 'subtitles': video_subtitles, 'duration': video_duration, 'age_limit': 18 if age_gate else 0, - 'annotations': video_annotations + 'annotations': video_annotations, + 'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id, }) return results @@ -1583,7 +1572,6 @@ class YoutubePlaylistIE(InfoExtractor): class YoutubeChannelIE(InfoExtractor): IE_DESC = u'YouTube.com channels' _VALID_URL = r"^(?:https?://)?(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/([0-9A-Za-z_-]+)" - _TEMPLATE_URL = 'http://www.youtube.com/channel/%s/videos?sort=da&flow=list&view=0&page=%s&gl=US&hl=en' _MORE_PAGES_INDICATOR = 'yt-uix-load-more' _MORE_PAGES_URL = 'http://www.youtube.com/c4_browse_ajax?action_load_more_videos=1&flow=list&paging=%s&view=0&sort=da&channel_id=%s' IE_NAME = u'youtube:channel' @@ -1604,30 +1592,20 @@ class YoutubeChannelIE(InfoExtractor): # Download channel page channel_id = mobj.group(1) video_ids = [] - pagenum = 1 - - url = self._TEMPLATE_URL % (channel_id, pagenum) - page = self._download_webpage(url, channel_id, - u'Downloading page #%s' % pagenum) - # Extract video identifiers - ids_in_page = self.extract_videos_from_page(page) - video_ids.extend(ids_in_page) + # Download all channel pages using the json-based channel_ajax query + for pagenum in itertools.count(1): + url = self._MORE_PAGES_URL % (pagenum, channel_id) + page = self._download_webpage(url, channel_id, + u'Downloading page #%s' % pagenum) - # Download any subsequent channel pages using the json-based channel_ajax query - if self._MORE_PAGES_INDICATOR in page: - for pagenum in itertools.count(1): - url = self._MORE_PAGES_URL % (pagenum, channel_id) - page = self._download_webpage(url, channel_id, - u'Downloading page #%s' % pagenum) + page = json.loads(page) - page = json.loads(page) - - ids_in_page = self.extract_videos_from_page(page['content_html']) - video_ids.extend(ids_in_page) + ids_in_page = self.extract_videos_from_page(page['content_html']) + video_ids.extend(ids_in_page) - if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: - break + if self._MORE_PAGES_INDICATOR not in page['load_more_widget_html']: + break self._downloader.to_screen(u'[youtube] Channel %s: Found %i videos' % (channel_id, len(video_ids))) @@ -1743,6 +1721,10 @@ class YoutubeSearchIE(SearchInfoExtractor): videos = [self.url_result('http://www.youtube.com/watch?v=%s' % id, 'Youtube') for id in video_ids] return self.playlist_result(videos, query) +class YoutubeSearchDateIE(YoutubeSearchIE): + _API_URL = 'https://gdata.youtube.com/feeds/api/videos?q=%s&start-index=%i&max-results=50&v=2&alt=jsonc&orderby=published' + _SEARCH_KEY = 'ytsearchdate' + IE_DESC = u'YouTube.com searches, newest videos first' class YoutubeShowIE(InfoExtractor): IE_DESC = u'YouTube.com (multi-season) shows' |