diff options
| -rw-r--r-- | README.md | 4 | ||||
| -rw-r--r-- | test/test_download.py | 127 | ||||
| -rw-r--r-- | test/test_utils.py | 12 | ||||
| -rw-r--r-- | youtube-dl.1 | 4 | ||||
| -rw-r--r-- | youtube_dl/FileDownloader.py | 10 | ||||
| -rw-r--r-- | youtube_dl/InfoExtractors.py | 22 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 4 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 13 | 
8 files changed, 154 insertions, 42 deletions
| @@ -47,8 +47,8 @@ which means you can modify it, redistribute it or use it however you like.                               %(extractor)s for the provider (youtube, metacafe,                               etc), %(id)s for the video id and %% for a literal                               percent. Use - to output to stdout. -    --restrict-filenames     Avoid some characters such as "&" and spaces in -                             filenames +    --restrict-filenames     Restrict filenames to only ASCII characters, and +                             avoid "&" and spaces in filenames      -a, --batch-file FILE    file containing URLs to download ('-' for stdin)      -w, --no-overwrites      do not overwrite files      -c, --continue           resume partially downloaded files diff --git a/test/test_download.py b/test/test_download.py index 545afb922..d1d6b119b 100644 --- a/test/test_download.py +++ b/test/test_download.py @@ -7,6 +7,9 @@ import json  from youtube_dl.FileDownloader import FileDownloader  from youtube_dl.InfoExtractors  import YoutubeIE, DailymotionIE  from youtube_dl.InfoExtractors import  MetacafeIE, BlipTVIE +from youtube_dl.InfoExtractors import  XVideosIE, VimeoIE +from youtube_dl.InfoExtractors import  SoundcloudIE, StanfordOpenClassroomIE +from youtube_dl.InfoExtractors import  CollegeHumorIE, XNXXIE  class DownloadTest(unittest.TestCase): @@ -30,10 +33,33 @@ class DownloadTest(unittest.TestCase):  	BLIP_URL = "http://blip.tv/cbr/cbr-exclusive-gotham-city-imposters-bats-vs-jokerz-short-3-5796352"  	BLIP_FILE = "5779306.m4v" -	XVIDEO_MD5 = "" -	XVIDEO_URL = "" -	XVIDEO_FILE = "" +	XVIDEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf" +	XVIDEO_URL = "http://www.xvideos.com/video939581/funny_porns_by_s_-1" +	XVIDEO_FILE = "939581.flv" +	VIMEO_MD5 = "1ab4dedc01f771cb2a65e91caa801aaf" +	VIMEO_URL = "http://vimeo.com/14160053" +	VIMEO_FILE = "" + +	VIMEO2_MD5 = "" +	VIMEO2_URL = "http://player.vimeo.com/video/47019590" +	VIMEO2_FILE = "" + +	SOUNDCLOUD_MD5 = "ce3775768ebb6432fa8495d446a078ed" +	SOUNDCLOUD_URL = "http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy" +	SOUNDCLOUD_FILE = "n6FLbx6ZzMiu.mp3" + +	STANDFORD_MD5 = "22c8206291368c4e2c9c1a307f0ea0f4" +	STANDFORD_URL = "http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100" +	STANDFORD_FILE = "PracticalUnix_intro-environment.mp4" + +	COLLEGEHUMOR_MD5 = "" +	COLLEGEHUMOR_URL = "http://www.collegehumor.com/video/6830834/mitt-romney-style-gangnam-style-parody" +	COLLEGEHUMOR_FILE = "" + +	XNXX_MD5 = "5f0469c8d1dfd1bc38c8e6deb5e0a21d" +	XNXX_URL = "http://video.xnxx.com/video1135332/lida_naked_funny_actress_5_" +	XNXX_FILE = "1135332.flv"  	def test_youtube(self):  		#let's download a file from youtube @@ -72,6 +98,73 @@ class DownloadTest(unittest.TestCase):  		md5_down_file = md5_for_file(DownloadTest.BLIP_FILE)  		self.assertEqual(md5_down_file, DownloadTest.BLIP_MD5) +	def test_xvideo(self): +		with open(DownloadTest.PARAMETERS_FILE) as f: +			fd = FileDownloader(json.load(f)) +		fd.add_info_extractor(XVideosIE()) +		fd.download([DownloadTest.XVIDEO_URL]) +		self.assertTrue(os.path.exists(DownloadTest.XVIDEO_FILE)) +		md5_down_file = md5_for_file(DownloadTest.XVIDEO_FILE) +		self.assertEqual(md5_down_file, DownloadTest.XVIDEO_MD5) + +	def test_vimeo(self): +		#skipped for the moment produce an error +		return +		with open(DownloadTest.PARAMETERS_FILE) as f: +			fd = FileDownloader(json.load(f)) +		fd.add_info_extractor(VimeoIE()) +		fd.download([DownloadTest.VIMEO_URL]) +		self.assertTrue(os.path.exists(DownloadTest.VIMEO_FILE)) +		md5_down_file = md5_for_file(DownloadTest.VIMEO_FILE) +		self.assertEqual(md5_down_file, DownloadTest.VIMEO_MD5) + +	def test_vimeo2(self): +		#skipped for the moment produce an error +		return +		with open(DownloadTest.PARAMETERS_FILE) as f: +			fd = FileDownloader(json.load(f)) +		fd.add_info_extractor(VimeoIE()) +		fd.download([DownloadTest.VIMEO2_URL]) +		self.assertTrue(os.path.exists(DownloadTest.VIMEO2_FILE)) +		md5_down_file = md5_for_file(DownloadTest.VIMEO2_FILE) +		self.assertEqual(md5_down_file, DownloadTest.VIMEO2_MD5) + +	def test_soundcloud(self): +		with open(DownloadTest.PARAMETERS_FILE) as f: +			fd = FileDownloader(json.load(f)) +		fd.add_info_extractor(SoundcloudIE()) +		fd.download([DownloadTest.SOUNDCLOUD_URL]) +		self.assertTrue(os.path.exists(DownloadTest.SOUNDCLOUD_FILE)) +		md5_down_file = md5_for_file(DownloadTest.SOUNDCLOUD_FILE) +		self.assertEqual(md5_down_file, DownloadTest.SOUNDCLOUD_MD5) + +	def test_standford(self): +		with open(DownloadTest.PARAMETERS_FILE) as f: +			fd = FileDownloader(json.load(f)) +		fd.add_info_extractor(StanfordOpenClassroomIE()) +		fd.download([DownloadTest.STANDFORD_URL]) +		self.assertTrue(os.path.exists(DownloadTest.STANDFORD_FILE)) +		md5_down_file = md5_for_file(DownloadTest.STANDFORD_FILE) +		self.assertEqual(md5_down_file, DownloadTest.STANDFORD_MD5) + +	def test_collegehumor(self): +		with open(DownloadTest.PARAMETERS_FILE) as f: +			fd = FileDownloader(json.load(f)) +		fd.add_info_extractor(CollegeHumorIE()) +		fd.download([DownloadTest.COLLEGEHUMOR_URL]) +		self.assertTrue(os.path.exists(DownloadTest.COLLEGEHUMOR_FILE)) +		md5_down_file = md5_for_file(DownloadTest.COLLEGEHUMOR_FILE) +		self.assertEqual(md5_down_file, DownloadTest.COLLEGEHUMOR_MD5) + +	def test_xnxx(self): +		with open(DownloadTest.PARAMETERS_FILE) as f: +			fd = FileDownloader(json.load(f)) +		fd.add_info_extractor(XNXXIE()) +		fd.download([DownloadTest.XNXX_URL]) +		self.assertTrue(os.path.exists(DownloadTest.XNXX_FILE)) +		md5_down_file = md5_for_file(DownloadTest.XNXX_FILE) +		self.assertEqual(md5_down_file, DownloadTest.XNXX_MD5) +  	def tearDown(self):  		if os.path.exists(DownloadTest.YOUTUBE_FILE):  			os.remove(DownloadTest.YOUTUBE_FILE) @@ -81,13 +174,25 @@ class DownloadTest(unittest.TestCase):  			os.remove(DownloadTest.METACAFE_FILE)  		if os.path.exists(DownloadTest.BLIP_FILE):  			os.remove(DownloadTest.BLIP_FILE) +		if os.path.exists(DownloadTest.XVIDEO_FILE): +			os.remove(DownloadTest.XVIDEO_FILE) +		if os.path.exists(DownloadTest.VIMEO_FILE): +			os.remove(DownloadTest.VIMEO_FILE) +		if os.path.exists(DownloadTest.SOUNDCLOUD_FILE): +			os.remove(DownloadTest.SOUNDCLOUD_FILE) +		if os.path.exists(DownloadTest.STANDFORD_FILE): +			os.remove(DownloadTest.STANDFORD_FILE) +		if os.path.exists(DownloadTest.COLLEGEHUMOR_FILE): +			os.remove(DownloadTest.COLLEGEHUMOR_FILE) +		if os.path.exists(DownloadTest.XNXX_FILE): +			os.remove(DownloadTest.XNXX_FILE)  def md5_for_file(filename, block_size=2**20): -    with open(filename) as f: -        md5 = hashlib.md5() -        while True: -            data = f.read(block_size) -            if not data: -                break -            md5.update(data) -            return md5.hexdigest() +	with open(filename) as f: +		md5 = hashlib.md5() +		while True: +			data = f.read(block_size) +			if not data: +				break +			md5.update(data) +			return md5.hexdigest() diff --git a/test/test_utils.py b/test/test_utils.py index 0a435ddc5..4208ee653 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -22,10 +22,10 @@ class TestUtil(unittest.TestCase):  		self.assertEqual(sanitize_filename(u'123'), u'123') -		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de')) +		self.assertEqual(u'abc_de', sanitize_filename(u'abc/de'))  		self.assertFalse(u'/' in sanitize_filename(u'abc/de///')) -		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de')) +		self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de'))  		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|'))  		self.assertEqual(u'yes no', sanitize_filename(u'yes? no'))  		self.assertEqual(u'this - that', sanitize_filename(u'this: that')) @@ -45,17 +45,19 @@ class TestUtil(unittest.TestCase):  		self.assertEqual(sanitize_filename(u'123', restricted=True), u'123') -		self.assertEqual(u'abc-de', sanitize_filename(u'abc/de', restricted=True)) +		self.assertEqual(u'abc_de', sanitize_filename(u'abc/de', restricted=True))  		self.assertFalse(u'/' in sanitize_filename(u'abc/de///', restricted=True)) -		self.assertEqual(u'abc-de', sanitize_filename(u'abc/<>\\*|de', restricted=True)) +		self.assertEqual(u'abc_de', sanitize_filename(u'abc/<>\\*|de', restricted=True))  		self.assertEqual(u'xxx', sanitize_filename(u'xxx/<>\\*|', restricted=True))  		self.assertEqual(u'yes_no', sanitize_filename(u'yes? no', restricted=True))  		self.assertEqual(u'this_-_that', sanitize_filename(u'this: that', restricted=True)) +		self.assertEqual(sanitize_filename(u'aäb', restricted=True), u'a_b') +		self.assertTrue(sanitize_filename(u'ö', restricted=True) != u'') # No empty filename +  		forbidden = u'"\0\\/&: \'\t\n'  		for fc in forbidden: -			print('input: ' + fc + ', result: ' + repr(sanitize_filename(fc, restricted=True)))  			for fbc in forbidden:  				self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) diff --git a/youtube-dl.1 b/youtube-dl.1 index 64120a8d2..ae303b672 100644 --- a/youtube-dl.1 +++ b/youtube-dl.1 @@ -59,8 +59,8 @@ redistribute it or use it however you like.  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ %(extractor)s\ for\ the\ provider\ (youtube,\ metacafe,  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ etc),\ %(id)s\ for\ the\ video\ id\ and\ %%\ for\ a\ literal  \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ percent.\ Use\ -\ to\ output\ to\ stdout. ---restrict-filenames\ \ \ \ \ Avoid\ some\ characters\ such\ as\ "&"\ and\ spaces\ in -\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ filenames +--restrict-filenames\ \ \ \ \ Restrict\ filenames\ to\ only\ ASCII\ characters,\ and +\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ avoid\ "&"\ and\ spaces\ in\ filenames  -a,\ --batch-file\ FILE\ \ \ \ file\ containing\ URLs\ to\ download\ (\[aq]-\[aq]\ for\ stdin)  -w,\ --no-overwrites\ \ \ \ \ \ do\ not\ overwrite\ files  -c,\ --continue\ \ \ \ \ \ \ \ \ \ \ resume\ partially\ downloaded\ files diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 4c79be432..b6aebe4ac 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -94,6 +94,9 @@ class FileDownloader(object):  		self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]  		self.params = params +		if '%(stitle)s' in self.params['outtmpl']: +			self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') +  	@staticmethod  	def format_bytes(bytes):  		if bytes is None: @@ -322,9 +325,8 @@ class FileDownloader(object):  		"""Generate the output filename."""  		try:  			template_dict = dict(info_dict) -			template_dict['epoch'] = unicode(long(time.time())) +			template_dict['epoch'] = unicode(int(time.time()))  			template_dict['autonumber'] = unicode('%05d' % self._num_downloads) -			template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility  			filename = self.params['outtmpl'] % template_dict  			return filename  		except (ValueError, KeyError), err: @@ -350,7 +352,8 @@ class FileDownloader(object):  	def process_info(self, info_dict):  		"""Process a single dictionary returned by an InfoExtractor.""" -		info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames')) +		# Keep for backwards compatibility +		info_dict['stitle'] = info_dict['title']  		reason = self._match_entry(info_dict)  		if reason is not None: @@ -363,6 +366,7 @@ class FileDownloader(object):  				raise MaxDownloadsReached()  		filename = self.prepare_filename(info_dict) +		filename = sanitize_filename(filename, self.params.get('restrictfilenames'))  		# Forced printings  		if self.params.get('forcetitle', False): diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index e9d8ad5b3..13b04ab5b 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2291,7 +2291,6 @@ class ComedyCentralIE(InfoExtractor):  			print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) -  	def _real_extract(self, url):  		mobj = re.match(self._VALID_URL, url)  		if mobj is None: @@ -2395,8 +2394,8 @@ class ComedyCentralIE(InfoExtractor):  				continue  			if self._downloader.params.get('listformats', None): -			    self._print_formats([i[0] for i in turls]) -		            return +				self._print_formats([i[0] for i in turls]) +				return  			# For now, just pick the highest bitrate  			format,video_url = turls[-1] @@ -2406,20 +2405,17 @@ class ComedyCentralIE(InfoExtractor):  			# Select format if we can find one  			for f,v in turls: -			    if f == req_format: -			      format, video_url = f, v -			      break - -			# Patch to download from alternative CDN, which does not  -                        # break on current RTMPDump builds -             +				if f == req_format: +					format, video_url = f, v +					break +			# Patch to download from alternative CDN, which does not +			# break on current RTMPDump builds  			broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/"  			better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/" -             +  			if video_url.startswith(broken_cdn): -                            video_url = video_url.replace(broken_cdn, better_cdn) -                     +				video_url = video_url.replace(broken_cdn, better_cdn)  			effTitle = showId + u'-' + epTitle  			info = { diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index cbf1dd1a7..7cc17af93 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -274,7 +274,7 @@ def parseOpts():  			dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.')  	filesystem.add_option('--restrict-filenames',  			action='store_true', dest='restrictfilenames', -			help='Avoid some characters such as "&" and spaces in filenames', default=False) +			help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False)  	filesystem.add_option('-a', '--batch-file',  			dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)')  	filesystem.add_option('-w', '--no-overwrites', @@ -532,7 +532,7 @@ def _real_main():  			parser.error(u'you must provide at least one URL')  		else:  			sys.exit() -	 +  	try:  		retcode = fd.download(all_urls)  	except MaxDownloadsReached: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 1f60d34ae..3339f56ec 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -207,15 +207,20 @@ def sanitize_filename(s, restricted=False):  		elif char == ':':  			return '_-' if restricted else ' -'  		elif char in '\\/|*<>': -			return '-' +			return '_'  		if restricted and (char in '&\'' or char.isspace()):  			return '_' +		if restricted and ord(char) > 127: +			return '_'  		return char  	result = u''.join(map(replace_insane, s)) -	while '--' in result: -		result = result.replace('--', '-') -	return result.strip('-') +	while '__' in result: +		result = result.replace('__', '_') +	result = result.strip('_') +	if not result: +		result = '_' +	return result  def orderedSet(iterable):  	""" Remove all duplicates from the input iterable """ | 
