diff options
Diffstat (limited to 'youtube_dl/InfoExtractors.py')
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 65 |
1 files changed, 48 insertions, 17 deletions
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 7c9f09f77..6a6545c9b 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -152,6 +152,10 @@ class InfoExtractor(object): """Report information extraction.""" self.to_screen(u'%s: Extracting information' % id_or_name) + def report_age_confirmation(self): + """Report attempt to confirm age.""" + self.to_screen(u'Confirming age') + #Methods for following #608 #They set the correct value of the '_type' key def video_result(self, video_info): @@ -250,10 +254,6 @@ class YoutubeIE(InfoExtractor): """Report attempt to log in.""" self.to_screen(u'Logging in') - def report_age_confirmation(self): - """Report attempt to confirm age.""" - self.to_screen(u'Confirming age') - def report_video_webpage_download(self, video_id): """Report attempt to download video webpage.""" self.to_screen(u'%s: Downloading video webpage' % video_id) @@ -622,8 +622,7 @@ class YoutubeIE(InfoExtractor): format_list = available_formats existing_formats = [x for x in format_list if x in url_map] if len(existing_formats) == 0: - self._downloader.report_error(u'no known formats available for video') - return + raise ExtractorError(u'no known formats available for video') if self._downloader.params.get('listformats', None): self._print_formats(existing_formats) return @@ -643,11 +642,9 @@ class YoutubeIE(InfoExtractor): video_url_list = [(rf, url_map[rf])] break if video_url_list is None: - self._downloader.report_error(u'requested format not available') - return + raise ExtractorError(u'requested format not available') else: - self._downloader.report_error(u'no conn or url_encoded_fmt_stream_map information found in video info') - return + raise ExtractorError(u'no conn or url_encoded_fmt_stream_map information found in video info') results = [] for format_param, video_real_url in video_url_list: @@ -690,10 +687,6 @@ class MetacafeIE(InfoExtractor): """Report disclaimer retrieval.""" self.to_screen(u'Retrieving disclaimer') - def report_age_confirmation(self): - """Report attempt to confirm age.""" - self.to_screen(u'Confirming age') - def report_download_webpage(self, video_id): """Report webpage download.""" self.to_screen(u'%s: Downloading webpage' % video_id) @@ -3659,10 +3652,13 @@ class SteamIE(InfoExtractor): def _real_extract(self, url): m = re.match(self._VALID_URL, url, re.VERBOSE) - urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," gameID = m.group('gameID') - videourl = 'http://store.steampowered.com/video/%s/' % gameID + videourl = 'http://store.steampowered.com/agecheck/video/%s/?snr=1_agecheck_agecheck__age-gate&ageDay=1&ageMonth=January&ageYear=1970' % gameID + self.report_age_confirmation() webpage = self._download_webpage(videourl, gameID) + game_title = re.search(r'<h2 class="pageheader">(?P<game_title>.*?)</h2>', webpage).group('game_title') + + urlRE = r"'movie_(?P<videoID>\d+)': \{\s*FILENAME: \"(?P<videoURL>[\w:/\.\?=]+)\"(,\s*MOVIE_NAME: \"(?P<videoName>[\w:/\.\?=\+-]+)\")?\s*\}," mweb = re.finditer(urlRE, webpage) namesRE = r'<span class="title">(?P<videoName>.+?)</span>' titles = re.finditer(namesRE, webpage) @@ -3684,7 +3680,7 @@ class SteamIE(InfoExtractor): 'thumbnail': video_thumb } videos.append(info) - return videos + return [self.playlist_result(videos, gameID, game_title)] class UstreamIE(InfoExtractor): _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' @@ -4331,6 +4327,40 @@ class ARDIE(InfoExtractor): info["url"] = stream["video_url"] return [info] +class TumblrIE(InfoExtractor): + _VALID_URL = r'http://(?P<blog_name>.*?).tumblr.com/((post)|(video))/(?P<id>\d*)/(.*?)' + + def _real_extract(self, url): + m_url = re.match(self._VALID_URL, url) + video_id = m_url.group('id') + blog = m_url.group('blog_name') + + url = 'http://%s.tumblr.com/post/%s/' % (blog, video_id) + webpage = self._download_webpage(url, video_id) + + re_video = r'src=\\x22(?P<video_url>http://%s.tumblr.com/video_file/%s/(.*?))\\x22 type=\\x22video/(?P<ext>.*?)\\x22' % (blog, video_id) + video = re.search(re_video, webpage) + if video is None: + self.to_screen("No video founded") + return [] + video_url = video.group('video_url') + ext = video.group('ext') + + re_thumb = r'posters(.*?)\[\\x22(?P<thumb>.*?)\\x22' # We pick the first poster + thumb = re.search(re_thumb, webpage).group('thumb').replace('\\', '') + + # The only place where you can get a title, it's not complete, + # but searching in other places doesn't work for all videos + re_title = r'<title>(.*?) - (?P<title>.*?)</title>' + title = unescapeHTML(re.search(re_title, webpage).group('title')) + + return [{'id': video_id, + 'url': video_url, + 'title': title, + 'thumbnail': thumb, + 'ext': ext + }] + def gen_extractors(): """ Return a list of an instance of every supported extractor. @@ -4385,6 +4415,7 @@ def gen_extractors(): SpiegelIE(), LiveLeakIE(), ARDIE(), + TumblrIE(), GenericIE() ] |