diff options
| author | Philipp Hagemeister <phihag@phihag.de> | 2012-08-01 12:26:00 -0700 | 
|---|---|---|
| committer | Philipp Hagemeister <phihag@phihag.de> | 2012-08-01 12:26:00 -0700 | 
| commit | a171dbfc2703b8d7c726024cdc21875b628b09d5 (patch) | |
| tree | 78876e704130151e1c82a08f0d7b417bcea1f359 | |
| parent | dee5d76923b15ed82ba5fbd1cdde3cf113e0477c (diff) | |
| parent | 11a141dec91a28a883203bf2c97750438b932efa (diff) | |
Merge pull request #386 from FiloSottile/blip
Blip.tv
| -rw-r--r-- | youtube_dl/InfoExtractors.py | 95 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 1 | 
2 files changed, 95 insertions, 1 deletions
| diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 0835c447c..ddb4aa16b 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -1622,6 +1622,98 @@ class YoutubeUserIE(InfoExtractor):  			self._downloader.download(['http://www.youtube.com/watch?v=%s' % video_id]) +class BlipTVUserIE(InfoExtractor): +	"""Information Extractor for blip.tv users.""" + +	_VALID_URL = r'(?:(?:(?:https?://)?(?:\w+\.)?blip\.tv/)|bliptvuser:)([^/]+)/*$' +	_PAGE_SIZE = 12 +	IE_NAME = u'blip.tv:user' + +	def __init__(self, downloader=None): +		InfoExtractor.__init__(self, downloader) + +	def report_download_page(self, username, pagenum): +		"""Report attempt to download user page.""" +		self._downloader.to_screen(u'[%s] user %s: Downloading video ids from page %d' % +				(self.IE_NAME, username, pagenum)) + +	def _real_extract(self, url): +		# Extract username +		mobj = re.match(self._VALID_URL, url) +		if mobj is None: +			self._downloader.trouble(u'ERROR: invalid url: %s' % url) +			return + +		username = mobj.group(1) + +		page_base = 'http://m.blip.tv/pr/show_get_full_episode_list?users_id=%s&lite=0&esi=1' + +		request = urllib2.Request(url) + +		try: +			page = urllib2.urlopen(request).read().decode('utf-8') +			mobj = re.search(r'data-users-id="([^"]+)"', page) +			page_base = page_base % mobj.group(1) +		except (urllib2.URLError, httplib.HTTPException, socket.error), err: +			self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) +			return + + +		# Download video ids using BlipTV Ajax calls. Result size per +		# query is limited (currently to 12 videos) so we need to query +		# page by page until there are no video ids - it means we got +		# all of them. + +		video_ids = [] +		pagenum = 1 + +		while True: +			self.report_download_page(username, pagenum) + +			request = urllib2.Request( page_base + "&page=" + str(pagenum) ) + +			try: +				page = urllib2.urlopen(request).read().decode('utf-8') +			except (urllib2.URLError, httplib.HTTPException, socket.error), err: +				self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) +				return + +			# Extract video identifiers +			ids_in_page = [] + +			for mobj in re.finditer(r'href="/([^"]+)"', page): +				if mobj.group(1) not in ids_in_page: +					ids_in_page.append(unescapeHTML(mobj.group(1))) + +			video_ids.extend(ids_in_page) + +			# A little optimization - if current page is not +			# "full", ie. does not contain PAGE_SIZE video ids then +			# we can assume that this page is the last one - there +			# are no more ids on further pages - no need to query +			# again. + +			if len(ids_in_page) < self._PAGE_SIZE: +				break + +			pagenum += 1 + +		all_ids_count = len(video_ids) +		playliststart = self._downloader.params.get('playliststart', 1) - 1 +		playlistend = self._downloader.params.get('playlistend', -1) + +		if playlistend == -1: +			video_ids = video_ids[playliststart:] +		else: +			video_ids = video_ids[playliststart:playlistend] + +		self._downloader.to_screen(u"[%s] user %s: Collected %d video ids (downloading %d of them)" % +				(self.IE_NAME, username, all_ids_count, len(video_ids))) + +		for video_id in video_ids: +			self._downloader.download([u'http://blip.tv/'+video_id]) + +  class DepositFilesIE(InfoExtractor):  	"""Information extractor for depositfiles.com""" @@ -1920,7 +2012,7 @@ class BlipTVIE(InfoExtractor):  		else:  			cchar = '?'  		json_url = url + cchar + 'skin=json&version=2&no_wrap=1' -		request = urllib2.Request(json_url) +		request = urllib2.Request(json_url.encode('utf-8'))  		self.report_extraction(mobj.group(1))  		info = None  		try: @@ -1978,6 +2070,7 @@ class BlipTVIE(InfoExtractor):  				self._downloader.trouble(u'ERROR: unable to parse video information: %s' % repr(err))  				return +		std_headers['User-Agent'] = 'iTunes/10.6.1'  		return [info] diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index f10822db1..86951840d 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -338,6 +338,7 @@ def gen_extractors():  		YahooSearchIE(),  		DepositFilesIE(),  		FacebookIE(), +		BlipTVUserIE(),  		BlipTVIE(),  		VimeoIE(),  		MyVideoIE(), | 
