diff options
Diffstat (limited to 'youtube_dl')
| -rw-r--r-- | youtube_dl/FileDownloader.py | 129 | ||||
| -rw-r--r-- | youtube_dl/InfoExtractors.py | 216 | ||||
| -rw-r--r-- | youtube_dl/PostProcessor.py | 6 | ||||
| -rw-r--r-- | youtube_dl/__init__.py | 62 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 38 |
5 files changed, 293 insertions, 158 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index 38c6a519a..bd0f75773 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -13,7 +13,7 @@ import urllib2 if os.name == 'nt': import ctypes - + from utils import * @@ -44,37 +44,40 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file - subtitleslang: Language of the subtitles to download + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + forcefilename: Force printing final filename. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + restrictfilenames: Do not allow "&" and spaces in file names + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + buffersize: Size of download buffer in bytes. + noresizebuffer: Do not automatically resize the download buffer. + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + writesubtitles: Write the video subtitles to a .srt file + subtitleslang: Language of the subtitles to download """ params = None @@ -93,6 +96,9 @@ class FileDownloader(object): self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] self.params = params + if '%(stitle)s' in self.params['outtmpl']: + self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') + @staticmethod def format_bytes(bytes): if bytes is None: @@ -139,23 +145,23 @@ class FileDownloader(object): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: - return long(new_max) + return int(new_max) rate = bytes / elapsed_time if rate > new_max: - return long(new_max) + return int(new_max) if rate < new_min: - return long(new_min) - return long(rate) + return int(new_min) + return int(rate) @staticmethod def parse_bytes(bytestr): - """Parse a string indicating a byte quantity into a long integer.""" + """Parse a string indicating a byte quantity into an integer.""" matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return long(round(number * multiplier)) + return int(round(number * multiplier)) def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -173,7 +179,6 @@ class FileDownloader(object): if not self.params.get('quiet', False): terminator = [u'\n', u''][skip_eol] output = message + terminator - if 'b' not in self._screen_file.mode or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr output = output.encode(preferredencoding(), 'ignore') self._screen_file.write(output) @@ -181,7 +186,8 @@ class FileDownloader(object): def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(preferredencoding()) + assert type(message) == type(u'') + sys.stderr.write((message + u'\n').encode(preferredencoding())) def to_cons_title(self, message): """Set console/terminal window title to message.""" @@ -321,7 +327,7 @@ class FileDownloader(object): """Generate the output filename.""" try: template_dict = dict(info_dict) - template_dict['epoch'] = unicode(long(time.time())) + template_dict['epoch'] = unicode(int(time.time())) template_dict['autonumber'] = unicode('%05d' % self._num_downloads) filename = self.params['outtmpl'] % template_dict return filename @@ -334,17 +340,22 @@ class FileDownloader(object): title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) - if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): - return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + if matchtitle: + matchtitle = matchtitle.decode('utf8') + if not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + if rejecttitle: + rejecttitle = rejecttitle.decode('utf8') + if re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - info_dict['stitle'] = sanitize_filename(info_dict['title']) + # Keep for backwards compatibility + info_dict['stitle'] = info_dict['title'] reason = self._match_entry(info_dict) if reason is not None: @@ -357,20 +368,21 @@ class FileDownloader(object): raise MaxDownloadsReached() filename = self.prepare_filename(info_dict) - + filename = sanitize_filename(filename, self.params.get('restrictfilenames')) + # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcedescription', False) and 'description' in info_dict: - print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcefilename', False) and filename is not None: - print filename.encode(preferredencoding(), 'xmlcharrefreplace') + print(filename.encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceformat', False): - print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')) # Do nothing else if in simulate mode if self.params.get('simulate', False): @@ -399,10 +411,10 @@ class FileDownloader(object): except (OSError, IOError): self.trouble(u'ERROR: Cannot write description file ' + descfn) return - + if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: # subtitles download errors are already managed as troubles in relevant IE - # that way it will silently go on when used with unsupporting IE + # that way it will silently go on when used with unsupporting IE try: srtfn = filename.rsplit('.', 1)[0] + u'.srt' self.report_writesubtitles(srtfn) @@ -448,7 +460,7 @@ class FileDownloader(object): except (ContentTooShortError, ), err: self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) return - + if success: try: self.post_process(filename, info_dict) @@ -634,7 +646,7 @@ class FileDownloader(object): data_len = long(data_len) + resume_len data_len_str = self.format_bytes(data_len) byte_counter = 0 + resume_len - block_size = 1024 + block_size = self.params.get('buffersize', 1024) start = time.time() while True: # Download and write @@ -660,7 +672,8 @@ class FileDownloader(object): except (IOError, OSError), err: self.trouble(u'\nERROR: unable to write data: %s' % str(err)) return False - block_size = self.best_block_size(after - before, len(data_block)) + if not self.params.get('noresizebuffer', False): + block_size = self.best_block_size(after - before, len(data_block)) # Progress message speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 88973cce8..13b04ab5b 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -102,6 +102,7 @@ class YoutubeIE(InfoExtractor): (?:https?://)? # http(s):// (optional) (?:youtu\.be/|(?:\w+\.)?youtube(?:-nocookie)?\.com/| tube\.majestyc\.net/) # the various hostnames, with wildcard subdomains + (?:.*?\#/)? # handle anchor (#/) redirect urls (?!view_play_list|my_playlists|artist|playlist) # ignore playlist URLs (?: # the various things that can precede the ID: (?:(?:v|embed|e)/) # v/ or embed/ or e/ @@ -212,9 +213,9 @@ class YoutubeIE(InfoExtractor): return srt def _print_formats(self, formats): - print 'Available formats:' + print('Available formats:') for x in formats: - print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')) + print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) def _real_initialize(self): if self._downloader is None: @@ -237,7 +238,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return # Set language @@ -246,7 +247,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) return # No authentication to be performed @@ -269,7 +270,7 @@ class YoutubeIE(InfoExtractor): self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return # Confirm age @@ -282,7 +283,7 @@ class YoutubeIE(InfoExtractor): self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -304,7 +305,7 @@ class YoutubeIE(InfoExtractor): try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return # Attempt to extract SWF player URL @@ -326,7 +327,7 @@ class YoutubeIE(InfoExtractor): if 'token' in video_info: break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if 'token' not in video_info: if 'reason' in video_info: @@ -389,7 +390,7 @@ class YoutubeIE(InfoExtractor): try: srt_list = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) if not srt_lang_list: @@ -406,13 +407,19 @@ class YoutubeIE(InfoExtractor): try: srt_xml = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) if not srt_xml: raise Trouble(u'WARNING: unable to download video subtitles') video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8')) except Trouble as trouble: self._downloader.trouble(trouble[0]) + if 'length_seconds' not in video_info: + self._downloader.trouble(u'WARNING: unable to extract video duration') + video_duration = '' + else: + video_duration = urllib.unquote_plus(video_info['length_seconds'][0]) + # token video_token = urllib.unquote_plus(video_info['token'][0]) @@ -479,7 +486,8 @@ class YoutubeIE(InfoExtractor): 'thumbnail': video_thumbnail.decode('utf-8'), 'description': video_description, 'player_url': player_url, - 'subtitles': video_subtitles + 'subtitles': video_subtitles, + 'duration': video_duration }) return results @@ -518,7 +526,7 @@ class MetacafeIE(InfoExtractor): self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) return # Confirm age @@ -531,7 +539,7 @@ class MetacafeIE(InfoExtractor): self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -555,7 +563,7 @@ class MetacafeIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@ -595,7 +603,7 @@ class MetacafeIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') - mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage) + mobj = re.search(r'submitter=(.*?);', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -648,7 +656,7 @@ class DailymotionIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@ -684,9 +692,14 @@ class DailymotionIE(InfoExtractor): video_title = unescapeHTML(mobj.group('title').decode('utf-8')) video_uploader = u'NA' - mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage) + mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage) if mobj is None: - self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + # lookin for official user + mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage) + if mobj_official is None: + self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + else: + video_uploader = mobj_official.group(1) else: video_uploader = mobj.group(1) @@ -741,7 +754,7 @@ class GoogleIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -780,7 +793,7 @@ class GoogleIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage) if mobj is None: @@ -836,7 +849,7 @@ class PhotobucketIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -906,7 +919,7 @@ class YahooIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) @@ -930,7 +943,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract uploader and title from webpage @@ -988,7 +1001,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract media URL from playlist XML @@ -1017,7 +1030,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)' IE_NAME = u'vimeo' def __init__(self, downloader=None): @@ -1046,7 +1059,7 @@ class VimeoIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Now we begin extracting as much information as we can from what we @@ -1087,21 +1100,32 @@ class VimeoIE(InfoExtractor): timestamp = config['request']['timestamp'] # Vimeo specific: extract video codec and quality information + # First consider quality, then codecs, then take everything # TODO bind to format param codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] - for codec in codecs: - if codec[0] in config["video"]["files"]: - video_codec = codec[0] - video_extension = codec[1] - if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd' - else: quality = 'sd' + files = { 'hd': [], 'sd': [], 'other': []} + for codec_name, codec_extension in codecs: + if codec_name in config["video"]["files"]: + if 'hd' in config["video"]["files"][codec_name]: + files['hd'].append((codec_name, codec_extension, 'hd')) + elif 'sd' in config["video"]["files"][codec_name]: + files['sd'].append((codec_name, codec_extension, 'sd')) + else: + files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0])) + + for quality in ('hd', 'sd', 'other'): + if len(files[quality]) > 0: + video_quality = files[quality][0][2] + video_codec = files[quality][0][0] + video_extension = files[quality][0][1] + self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality)) break else: self._downloader.trouble(u'ERROR: no known codec found') return video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ - %(video_id, sig, timestamp, quality, video_codec.upper()) + %(video_id, sig, timestamp, video_quality, video_codec.upper()) return [{ 'id': video_id, @@ -1201,7 +1225,7 @@ class GenericIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return except ValueError, err: # since this is the last-resort InfoExtractor, if @@ -1322,7 +1346,7 @@ class YoutubeSearchIE(InfoExtractor): try: data = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err)) return api_response = json.loads(data)['data'] @@ -1399,7 +1423,7 @@ class GoogleSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1482,7 +1506,7 @@ class YahooSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1508,7 +1532,7 @@ class YahooSearchIE(InfoExtractor): class YoutubePlaylistIE(InfoExtractor): """Information Extractor for YouTube playlists.""" - _VALID_URL = r'(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL|EC)?([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' + _VALID_URL = r'(?:(?:https?://)?(?:\w+\.)?youtube\.com/(?:(?:course|view_play_list|my_playlists|artist|playlist)\?.*?(p|a|list)=|user/.*?/user/|p/|user/.*?#[pg]/c/)(?:PL|EC)?|PL|EC)([0-9A-Za-z-_]+)(?:/.*?/([0-9A-Za-z_-]+))?.*' _TEMPLATE_URL = 'http://www.youtube.com/%s?%s=%s&page=%s&gl=US&hl=en' _VIDEO_INDICATOR_TEMPLATE = r'/watch\?v=(.+?)&([^&"]+&)*list=.*?%s' _MORE_PAGES_INDICATOR = r'yt-uix-pager-next' @@ -1552,7 +1576,7 @@ class YoutubePlaylistIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1609,7 +1633,7 @@ class YoutubeChannelIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1672,7 +1696,7 @@ class YoutubeUserIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1744,7 +1768,7 @@ class BlipTVUserIE(InfoExtractor): mobj = re.search(r'data-users-id="([^"]+)"', page) page_base = page_base % mobj.group(1) except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return @@ -1832,7 +1856,7 @@ class DepositFilesIE(InfoExtractor): self.report_download_webpage(file_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err)) return # Search for the real file URL @@ -1949,7 +1973,7 @@ class FacebookIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return if useremail is None: @@ -1969,7 +1993,7 @@ class FacebookIE(InfoExtractor): self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return def _real_extract(self, url): @@ -1986,7 +2010,7 @@ class FacebookIE(InfoExtractor): page = urllib2.urlopen(request) video_webpage = page.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return # Start extracting information @@ -2120,13 +2144,13 @@ class BlipTVIE(InfoExtractor): 'urlhandle': urlh } except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if info is None: # Regular URL try: json_code = urlh.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err)) return try: @@ -2194,7 +2218,7 @@ class MyVideoIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -2229,6 +2253,25 @@ class ComedyCentralIE(InfoExtractor): _VALID_URL = r'^(:(?P<shortname>tds|thedailyshow|cr|colbert|colbertnation|colbertreport))|(https?://)?(www\.)?(?P<showname>thedailyshow|colbertnation)\.com/full-episodes/(?P<episode>.*)$' IE_NAME = u'comedycentral' + _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] + + _video_extensions = { + '3500': 'mp4', + '2200': 'mp4', + '1700': 'mp4', + '1200': 'mp4', + '750': 'mp4', + '400': 'mp4', + } + _video_dimensions = { + '3500': '1280x720', + '2200': '960x540', + '1700': '768x432', + '1200': '640x360', + '750': '512x288', + '400': '384x216', + } + def report_extraction(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) @@ -2241,6 +2284,13 @@ class ComedyCentralIE(InfoExtractor): def report_player_url(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) + + def _print_formats(self, formats): + print('Available formats:') + for x in formats: + print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'mp4'), self._video_dimensions.get(x, '???'))) + + def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) if mobj is None: @@ -2281,10 +2331,19 @@ class ComedyCentralIE(InfoExtractor): epTitle = mobj.group('episode') mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*episode.*?:.*?))"', html) + if len(mMovieParams) == 0: - self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) - return + # The Colbert Report embeds the information in a without + # a URL prefix; so extract the alternate reference + # and then add the URL prefix manually. + altMovieParams = re.findall('data-mgid="([^"]*episode.*?:.*?)"', html) + if len(altMovieParams) == 0: + self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) + return + else: + mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] + playerUrl_raw = mMovieParams[0][0] self.report_player_url(epTitle) try: @@ -2333,10 +2392,31 @@ class ComedyCentralIE(InfoExtractor): if len(turls) == 0: self._downloader.trouble(u'\nERROR: unable to download ' + mediaId + ': No videos found') continue + + if self._downloader.params.get('listformats', None): + self._print_formats([i[0] for i in turls]) + return # For now, just pick the highest bitrate format,video_url = turls[-1] + # Get the format arg from the arg stream + req_format = self._downloader.params.get('format', None) + + # Select format if we can find one + for f,v in turls: + if f == req_format: + format, video_url = f, v + break + + # Patch to download from alternative CDN, which does not + # break on current RTMPDump builds + broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/" + better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/" + + if video_url.startswith(broken_cdn): + video_url = video_url.replace(broken_cdn, better_cdn) + effTitle = showId + u'-' + epTitle info = { 'id': shortMediaId, @@ -2348,7 +2428,7 @@ class ComedyCentralIE(InfoExtractor): 'format': format, 'thumbnail': None, 'description': officialTitle, - 'player_url': playerUrl + 'player_url': None #playerUrl } results.append(info) @@ -2456,7 +2536,7 @@ class CollegeHumorIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage) @@ -2475,7 +2555,7 @@ class CollegeHumorIE(InfoExtractor): try: metaXml = urllib2.urlopen(xmlUrl).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metaXml) @@ -2521,7 +2601,7 @@ class XVideosIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -2607,7 +2687,7 @@ class SoundcloudIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction('%s/%s' % (uploader, slug_title)) @@ -2634,7 +2714,7 @@ class SoundcloudIE(InfoExtractor): mobj = re.search('track-description-value"><p>(.*?)</p>', webpage) if mobj: description = mobj.group(1) - + # upload date upload_date = None mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage) @@ -2642,7 +2722,7 @@ class SoundcloudIE(InfoExtractor): try: upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') except Exception, e: - self._downloader.to_stderr(str(e)) + self._downloader.to_stderr(compat_str(e)) # for soundcloud, a request to a cross domain is required for cookies request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) @@ -2686,7 +2766,7 @@ class InfoQIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction(url) @@ -2772,15 +2852,15 @@ class MixcloudIE(InfoExtractor): return None def _print_formats(self, formats): - print 'Available formats:' + print('Available formats:') for fmt in formats.keys(): for b in formats[fmt]: try: ext = formats[fmt][b][0] - print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]) + print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) except TypeError: # we have no bitrate info ext = formats[fmt][0] - print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]) + print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) break def _real_extract(self, url): @@ -2800,7 +2880,7 @@ class MixcloudIE(InfoExtractor): self.report_download_json(file_url) jsonData = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err)) return # parse JSON @@ -2984,7 +3064,7 @@ class MTVIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage) @@ -3017,7 +3097,7 @@ class MTVIE(InfoExtractor): try: metadataXml = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metadataXml) @@ -3104,7 +3184,7 @@ class YoukuIE(InfoExtractor): self.report_download_webpage(video_id) jsondata = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -3280,7 +3360,7 @@ class GooglePlusIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err)) return # Extract update date @@ -3322,7 +3402,7 @@ class GooglePlusIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extract_vid_page(video_page) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index f2e2aa1fa..0501cc7f6 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -73,7 +73,7 @@ class FFmpegExtractAudioPP(PostProcessor): def detect_executables(): def executable(exe): try: - subprocess.check_output([exe, '-version']) + subprocess.Popen([exe, '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() except OSError: return False return exe @@ -146,7 +146,7 @@ class FFmpegExtractAudioPP(PostProcessor): if int(self._preferredquality) < 10: more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] else: - more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] + more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] else: # We convert the audio (lossy) acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] @@ -156,7 +156,7 @@ class FFmpegExtractAudioPP(PostProcessor): if int(self._preferredquality) < 10: more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] else: - more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] + more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index a52d69a30..92478aa6b 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +from __future__ import with_statement + __authors__ = ( 'Ricardo Garcia Gonzalez', 'Danny Colligan', @@ -19,7 +21,7 @@ __authors__ = ( ) __license__ = 'Public Domain' -__version__ = '2012.10.09' +__version__ = '2012.11.28' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION' @@ -46,7 +48,7 @@ from PostProcessor import * def updateSelf(downloader, filename): ''' Update the program file with the latest version from the repository ''' # Note: downloader only used for options - + if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) @@ -64,7 +66,7 @@ def updateSelf(downloader, filename): directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): sys.exit('ERROR: no write permissions on %s' % directory) - + try: urlh = urllib2.urlopen(UPDATE_URL_EXE) newcontent = urlh.read() @@ -73,20 +75,18 @@ def updateSelf(downloader, filename): outf.write(newcontent) except (IOError, OSError), err: sys.exit('ERROR: unable to download latest version') - + try: bat = os.path.join(directory, 'youtube-dl-updater.bat') b = open(bat, 'w') - - print >> b, """ + b.write(""" echo Updating youtube-dl... ping 127.0.0.1 -n 5 -w 1000 > NUL move /Y "%s.new" "%s" del "%s" - """ %(exe, exe, bat) - + \n""" %(exe, exe, bat)) b.close() - + os.startfile(bat) except (IOError, OSError), err: sys.exit('ERROR: unable to overwrite current version') @@ -187,6 +187,11 @@ def parseOpts(): dest='ratelimit', metavar='LIMIT', help='download rate limit (e.g. 50k or 44.6m)') general.add_option('-R', '--retries', dest='retries', metavar='RETRIES', help='number of retries (default is %default)', default=10) + general.add_option('--buffer-size', + dest='buffersize', metavar='SIZE', help='size of download buffer (e.g. 1024 or 16k) (default is %default)', default="1024") + general.add_option('--no-resize-buffer', + action='store_true', dest='noresizebuffer', + help='do not automatically adjust the buffer size. By default, the buffer size is automatically resized from an initial value of SIZE.', default=False) general.add_option('--dump-user-agent', action='store_true', dest='dump_user_agent', help='display the current browser identification', default=False) @@ -263,13 +268,18 @@ def parseOpts(): filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('--id', + action='store_true', dest='useid', help='use video ID in file name', default=False) filesystem.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) + action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-A', '--auto-number', action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') + dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') + filesystem.add_option('--restrict-filenames', + action='store_true', dest='restrictfilenames', + help='Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames', default=False) filesystem.add_option('-a', '--batch-file', dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', @@ -294,7 +304,7 @@ def parseOpts(): help='write video metadata to a .info.json file', default=False) - postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default') @@ -422,10 +432,10 @@ def _real_main(): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): - parser.error(u'using output template conflicts with using title, literal title or auto number') - if opts.usetitle and opts.useliteral: - parser.error(u'using title conflicts with using literal title') + if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): + parser.error(u'using output template conflicts with using title, video ID or auto number') + if opts.usetitle and opts.useid: + parser.error(u'using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = getpass.getpass(u'Type account password and press return:') if opts.ratelimit is not None: @@ -438,6 +448,11 @@ def _real_main(): opts.retries = long(opts.retries) except (TypeError, ValueError), err: parser.error(u'invalid retry count specified') + if opts.buffersize is not None: + numeric_buffersize = FileDownloader.parse_bytes(opts.buffersize) + if numeric_buffersize is None: + parser.error(u'invalid buffer size specified') + opts.buffersize = numeric_buffersize try: opts.playliststart = int(opts.playliststart) if opts.playliststart <= 0: @@ -476,19 +491,20 @@ def _real_main(): 'format_limit': opts.format_limit, 'listformats': opts.listformats, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) - or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s') + or (opts.useid and u'%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), + 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, 'retries': opts.retries, + 'buffersize': opts.buffersize, + 'noresizebuffer': opts.noresizebuffer, 'continuedl': opts.continue_dl, 'noprogress': opts.noprogress, 'playliststart': opts.playliststart, @@ -528,7 +544,7 @@ def _real_main(): parser.error(u'you must provide at least one URL') else: sys.exit() - + try: retcode = fd.download(all_urls) except MaxDownloadsReached: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 839da17d0..4ace22c2f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,6 +26,11 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } +try: + compat_str = unicode # Python 2 +except NameError: + compat_str = str + def preferredencoding(): """Get preferred encoding. @@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser): HTMLParser.HTMLParser.__init__(self) def error(self, message): - print >> sys.stderr, self.getpos() if self.error_count > 10 or self.started: raise HTMLParser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -190,14 +194,36 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def sanitize_filename(s): - """Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + """ def replace_insane(char): - if char in u' .\\/|?*<>:"' or ord(char) < 32: + if char == '?' or ord(char) < 32 or ord(char) == 127: + return '' + elif char == '"': + return '' if restricted else '\'' + elif char == ':': + return '_-' if restricted else ' -' + elif char in '\\/|*<>': + return '_' + if restricted and (char in '&\'' or char.isspace()): + return '_' + if restricted and ord(char) > 127: return '_' return char - return u''.join(map(replace_insane, s)).strip('_') + + result = u''.join(map(replace_insane, s)) + while '__' in result: + result = result.replace('__', '_') + result = result.strip('_') + # Common case of "Foreign band name - English song title" + if restricted and result.startswith('-_'): + result = result[2:] + if not result: + result = '_' + return result def orderedSet(iterable): """ Remove all duplicates from the input iterable """ |
