diff options
| -rw-r--r-- | LATEST_VERSION | 2 | ||||
| -rwxr-xr-x | youtube-dl | 158 | 
2 files changed, 159 insertions, 1 deletions
| diff --git a/LATEST_VERSION b/LATEST_VERSION index 382e966d8..7ed3afa00 100644 --- a/LATEST_VERSION +++ b/LATEST_VERSION @@ -1 +1 @@ -2010.01.06 +2010.01.15 diff --git a/youtube-dl b/youtube-dl index 640ed4bdc..732ebce1d 100755 --- a/youtube-dl +++ b/youtube-dl @@ -2,6 +2,7 @@  # -*- coding: utf-8 -*-  # Author: Ricardo Garcia Gonzalez  # Author: Danny Colligan +# Author: Benjamin Johnson  # License: Public domain code  import htmlentitydefs  import httplib @@ -936,6 +937,159 @@ class MetacafeIE(InfoExtractor):  			self._downloader.trouble(u'ERROR: format not available for video') +class GoogleIE(InfoExtractor): +	"""Information extractor for video.google.com.""" + +	_VALID_URL = r'(?:http://)?video\.google\.com/videoplay\?docid=([^\&]+).*' + +	def __init__(self, downloader=None): +		InfoExtractor.__init__(self, downloader) + +	@staticmethod +	def suitable(url): +		return (re.match(GoogleIE._VALID_URL, url) is not None) + +	def report_download_webpage(self, video_id): +		"""Report webpage download.""" +		self._downloader.to_stdout(u'[video.google] %s: Downloading webpage' % video_id) + +	def report_extraction(self, video_id): +		"""Report information extraction.""" +		self._downloader.to_stdout(u'[video.google] %s: Extracting information' % video_id) + +	def _real_initialize(self): +		return + +	def _real_extract(self, url): +		# Extract id from URL +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) +			return + +		video_id = mobj.group(1) + +		video_extension = 'mp4' + +		# Retrieve video webpage to extract further information +		request = urllib2.Request('http://video.google.com/videoplay?docid=%s' % video_id) +		try: +			self.report_download_webpage(video_id) +			webpage = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) +			return + +		# Extract URL, uploader, and title from webpage +		self.report_extraction(video_id) +		mobj = re.search(r"download_url:'(.*)'", webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract media URL') +			return +		mediaURL = urllib.unquote(mobj.group(1)) +		mediaURL = mediaURL.replace('\\x3d', '\x3d') +		mediaURL = mediaURL.replace('\\x26', '\x26') + +		video_url = mediaURL + +		mobj = re.search(r'<title>(.*)</title>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract title') +			return +		video_title = mobj.group(1).decode('utf-8') + +		# Google Video doesn't show uploader nicknames? +		video_uploader = 'uploader' + +		try: +			# Process video information +			self._downloader.process_info({ +				'id':		video_id.decode('utf-8'), +				'url':		video_url.decode('utf-8'), +				'uploader':	video_uploader.decode('utf-8'), +				'title':	video_title.decode('utf-8'), +				'stitle':	video_title.decode('utf-8'), +				'ext':		video_extension.decode('utf-8'), +			}) +		except UnavailableFormatError: +			self._downloader.trouble(u'ERROR: format not available for video') + + +class PhotobucketIE(InfoExtractor): +	"""Information extractor for photobucket.com.""" + +	_VALID_URL = r'(?:http://)?(?:[a-z0-9]+\.)?photobucket\.com/.*[\?\&]current=(.*\.flv)' + +	def __init__(self, downloader=None): +		InfoExtractor.__init__(self, downloader) + +	@staticmethod +	def suitable(url): +		return (re.match(PhotobucketIE._VALID_URL, url) is not None) + +	def report_download_webpage(self, video_id): +		"""Report webpage download.""" +		self._downloader.to_stdout(u'[photobucket] %s: Downloading webpage' % video_id) + +	def report_extraction(self, video_id): +		"""Report information extraction.""" +		self._downloader.to_stdout(u'[photobucket] %s: Extracting information' % video_id) + +	def _real_initialize(self): +		return + +	def _real_extract(self, url): +		# Extract id from URL +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: Invalid URL: %s' % url) +			return + +		video_id = mobj.group(1) + +		video_extension = 'flv' + +		# Retrieve video webpage to extract further information +		request = urllib2.Request(url) +		try: +			self.report_download_webpage(video_id) +			webpage = urllib2.urlopen(request).read() +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) +			return + +		# Extract URL, uploader, and title from webpage +		self.report_extraction(video_id) +		mobj = re.search(r'<link rel="video_src" href=".*\?file=([^"]+)" />', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract media URL') +			return +		mediaURL = urllib.unquote(mobj.group(1)) + +		video_url = mediaURL + +		mobj = re.search(r'<title>(.*) video by (.*) - Photobucket</title>', webpage) +		if mobj is None: +			self._downloader.trouble(u'ERROR: unable to extract title') +			return +		video_title = mobj.group(1).decode('utf-8') + +		video_uploader = mobj.group(2).decode('utf-8') + +		try: +			# Process video information +			self._downloader.process_info({ +				'id':		video_id.decode('utf-8'), +				'url':		video_url.decode('utf-8'), +				'uploader':	video_uploader.decode('utf-8'), +				'title':	video_title.decode('utf-8'), +				'stitle':	video_title.decode('utf-8'), +				'ext':		video_extension.decode('utf-8'), +			}) +		except UnavailableFormatError: +			self._downloader.trouble(u'ERROR: format not available for video') + +  class YoutubeSearchIE(InfoExtractor):  	"""Information Extractor for YouTube search queries."""  	_VALID_QUERY = r'ytsearch(\d+|all)?:[\s\S]+' @@ -1314,6 +1468,8 @@ if __name__ == '__main__':  		youtube_pl_ie = YoutubePlaylistIE(youtube_ie)  		youtube_user_ie = YoutubeUserIE(youtube_ie)  		youtube_search_ie = YoutubeSearchIE(youtube_ie) +		google_ie = GoogleIE() +		photobucket_ie = PhotobucketIE()  		# File downloader  		fd = FileDownloader({ @@ -1339,6 +1495,8 @@ if __name__ == '__main__':  		fd.add_info_extractor(youtube_user_ie)  		fd.add_info_extractor(metacafe_ie)  		fd.add_info_extractor(youtube_ie) +		fd.add_info_extractor(google_ie) +		fd.add_info_extractor(photobucket_ie)  		# Update version  		if opts.update_self: | 
