diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/FileDownloader.py | 105 | ||||
-rw-r--r-- | youtube_dl/InfoExtractors.py | 142 | ||||
-rw-r--r-- | youtube_dl/PostProcessor.py | 4 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 46 | ||||
-rw-r--r-- | youtube_dl/utils.py | 30 |
5 files changed, 185 insertions, 142 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index ed5a79f13..4c79be432 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -44,37 +44,38 @@ class FileDownloader(object): Available options: - username: Username for authentication purposes. - password: Password for authentication purposes. - usenetrc: Use netrc for authentication instead. - quiet: Do not print messages to stdout. - forceurl: Force printing final URL. - forcetitle: Force printing title. - forcethumbnail: Force printing thumbnail URL. - forcedescription: Force printing description. - forcefilename: Force printing final filename. - simulate: Do not download the video files. - format: Video format code. - format_limit: Highest quality format to try. - outtmpl: Template for output names. - ignoreerrors: Do not stop on download errors. - ratelimit: Download speed limit, in bytes/sec. - nooverwrites: Prevent overwriting files. - retries: Number of times to retry for HTTP error 5xx - continuedl: Try to continue downloads if possible. - noprogress: Do not print the progress bar. - playliststart: Playlist item to start at. - playlistend: Playlist item to end at. - matchtitle: Download only matching titles. - rejecttitle: Reject downloads for matching titles. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. - nopart: Do not use temporary .part files. - updatetime: Use the Last-modified header to set output file timestamps. - writedescription: Write the video description to a .description file - writeinfojson: Write the video description to a .info.json file - writesubtitles: Write the video subtitles to a .srt file - subtitleslang: Language of the subtitles to download + username: Username for authentication purposes. + password: Password for authentication purposes. + usenetrc: Use netrc for authentication instead. + quiet: Do not print messages to stdout. + forceurl: Force printing final URL. + forcetitle: Force printing title. + forcethumbnail: Force printing thumbnail URL. + forcedescription: Force printing description. + forcefilename: Force printing final filename. + simulate: Do not download the video files. + format: Video format code. + format_limit: Highest quality format to try. + outtmpl: Template for output names. + restrictfilenames: Do not allow "&" and spaces in file names + ignoreerrors: Do not stop on download errors. + ratelimit: Download speed limit, in bytes/sec. + nooverwrites: Prevent overwriting files. + retries: Number of times to retry for HTTP error 5xx + continuedl: Try to continue downloads if possible. + noprogress: Do not print the progress bar. + playliststart: Playlist item to start at. + playlistend: Playlist item to end at. + matchtitle: Download only matching titles. + rejecttitle: Reject downloads for matching titles. + logtostderr: Log messages to stderr instead of stdout. + consoletitle: Display progress in console window's titlebar. + nopart: Do not use temporary .part files. + updatetime: Use the Last-modified header to set output file timestamps. + writedescription: Write the video description to a .description file + writeinfojson: Write the video description to a .info.json file + writesubtitles: Write the video subtitles to a .srt file + subtitleslang: Language of the subtitles to download """ params = None @@ -139,23 +140,23 @@ class FileDownloader(object): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: - return long(new_max) + return int(new_max) rate = bytes / elapsed_time if rate > new_max: - return long(new_max) + return int(new_max) if rate < new_min: - return long(new_min) - return long(rate) + return int(new_min) + return int(rate) @staticmethod def parse_bytes(bytestr): - """Parse a string indicating a byte quantity into a long integer.""" + """Parse a string indicating a byte quantity into an integer.""" matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) if matchobj is None: return None number = float(matchobj.group(1)) multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return long(round(number * multiplier)) + return int(round(number * multiplier)) def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" @@ -180,7 +181,8 @@ class FileDownloader(object): def to_stderr(self, message): """Print message to stderr.""" - print >>sys.stderr, message.encode(preferredencoding()) + assert type(message) == type(u'') + sys.stderr.write((message + u'\n').encode(preferredencoding())) def to_cons_title(self, message): """Set console/terminal window title to message.""" @@ -322,6 +324,7 @@ class FileDownloader(object): template_dict = dict(info_dict) template_dict['epoch'] = unicode(long(time.time())) template_dict['autonumber'] = unicode('%05d' % self._num_downloads) + template_dict['title'] = template_dict['stitle'] # Keep both for backwards compatibility filename = self.params['outtmpl'] % template_dict return filename except (ValueError, KeyError), err: @@ -333,17 +336,21 @@ class FileDownloader(object): title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) - if matchtitle and not re.search(matchtitle, title, re.IGNORECASE): - return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' + if matchtitle: + matchtitle = matchtitle.decode('utf8') + if not re.search(matchtitle, title, re.IGNORECASE): + return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) - if rejecttitle and re.search(rejecttitle, title, re.IGNORECASE): - return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' + if rejecttitle: + rejecttitle = rejecttitle.decode('utf8') + if re.search(rejecttitle, title, re.IGNORECASE): + return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' return None def process_info(self, info_dict): """Process a single dictionary returned by an InfoExtractor.""" - info_dict['stitle'] = sanitize_filename(info_dict['title']) + info_dict['stitle'] = sanitize_filename(info_dict['title'], self.params.get('restrictfilenames')) reason = self._match_entry(info_dict) if reason is not None: @@ -359,17 +366,17 @@ class FileDownloader(object): # Forced printings if self.params.get('forcetitle', False): - print info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['title'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceurl', False): - print info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['url'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: - print info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['thumbnail'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcedescription', False) and 'description' in info_dict: - print info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['description'].encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forcefilename', False) and filename is not None: - print filename.encode(preferredencoding(), 'xmlcharrefreplace') + print(filename.encode(preferredencoding(), 'xmlcharrefreplace')) if self.params.get('forceformat', False): - print info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace') + print(info_dict['format'].encode(preferredencoding(), 'xmlcharrefreplace')) # Do nothing else if in simulate mode if self.params.get('simulate', False): diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 6e59ba8fd..35ba6cc5c 100644 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -213,9 +213,9 @@ class YoutubeIE(InfoExtractor): return srt def _print_formats(self, formats): - print 'Available formats:' + print('Available formats:') for x in formats: - print '%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???')) + print('%s\t:\t%s\t[%s]' %(x, self._video_extensions.get(x, 'flv'), self._video_dimensions.get(x, '???'))) def _real_initialize(self): if self._downloader is None: @@ -238,7 +238,7 @@ class YoutubeIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return # Set language @@ -247,7 +247,7 @@ class YoutubeIE(InfoExtractor): self.report_lang() urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to set language: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to set language: %s' % compat_str(err)) return # No authentication to be performed @@ -270,7 +270,7 @@ class YoutubeIE(InfoExtractor): self._downloader.to_stderr(u'WARNING: unable to log in: bad username or password') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return # Confirm age @@ -283,7 +283,7 @@ class YoutubeIE(InfoExtractor): self.report_age_confirmation() age_results = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -305,7 +305,7 @@ class YoutubeIE(InfoExtractor): try: video_webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return # Attempt to extract SWF player URL @@ -327,7 +327,7 @@ class YoutubeIE(InfoExtractor): if 'token' in video_info: break except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if 'token' not in video_info: if 'reason' in video_info: @@ -390,7 +390,7 @@ class YoutubeIE(InfoExtractor): try: srt_list = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) srt_lang_list = re.findall(r'name="([^"]*)"[^>]+lang_code="([\w\-]+)"', srt_list) srt_lang_list = dict((l[1], l[0]) for l in srt_lang_list) if not srt_lang_list: @@ -407,7 +407,7 @@ class YoutubeIE(InfoExtractor): try: srt_xml = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - raise Trouble(u'WARNING: unable to download video subtitles: %s' % str(err)) + raise Trouble(u'WARNING: unable to download video subtitles: %s' % compat_str(err)) if not srt_xml: raise Trouble(u'WARNING: unable to download video subtitles') video_subtitles = self._closed_captions_xml_to_srt(srt_xml.decode('utf-8')) @@ -526,7 +526,7 @@ class MetacafeIE(InfoExtractor): self.report_disclaimer() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to retrieve disclaimer: %s' % compat_str(err)) return # Confirm age @@ -539,7 +539,7 @@ class MetacafeIE(InfoExtractor): self.report_age_confirmation() disclaimer = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to confirm age: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to confirm age: %s' % compat_str(err)) return def _real_extract(self, url): @@ -563,7 +563,7 @@ class MetacafeIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@ -603,7 +603,7 @@ class MetacafeIE(InfoExtractor): return video_title = mobj.group(1).decode('utf-8') - mobj = re.search(r'(?ms)By:\s*<a .*?>(.+?)<', webpage) + mobj = re.search(r'submitter=(.*?);', webpage) if mobj is None: self._downloader.trouble(u'ERROR: unable to extract uploader nickname') return @@ -656,7 +656,7 @@ class DailymotionIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader and title from webpage @@ -692,9 +692,14 @@ class DailymotionIE(InfoExtractor): video_title = unescapeHTML(mobj.group('title').decode('utf-8')) video_uploader = u'NA' - mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a></span>', webpage) + mobj = re.search(r'(?im)<span class="owner[^\"]+?">[^<]+?<a [^>]+?>([^<]+?)</a>', webpage) if mobj is None: - self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + # lookin for official user + mobj_official = re.search(r'<span rel="author"[^>]+?>([^<]+?)</span>', webpage) + if mobj_official is None: + self._downloader.trouble(u'WARNING: unable to extract uploader nickname') + else: + video_uploader = mobj_official.group(1) else: video_uploader = mobj.group(1) @@ -749,7 +754,7 @@ class GoogleIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -788,7 +793,7 @@ class GoogleIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'<img class=thumbnail-img (?:.* )?src=(http.*)>', webpage) if mobj is None: @@ -844,7 +849,7 @@ class PhotobucketIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract URL, uploader, and title from webpage @@ -914,7 +919,7 @@ class YahooIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return mobj = re.search(r'\("id", "([0-9]+)"\);', webpage) @@ -938,7 +943,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract uploader and title from webpage @@ -996,7 +1001,7 @@ class YahooIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Extract media URL from playlist XML @@ -1025,7 +1030,7 @@ class VimeoIE(InfoExtractor): """Information extractor for vimeo.com.""" # _VALID_URL matches Vimeo URLs - _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:groups/[^/]+/)?(?:videos?/)?([0-9]+)' + _VALID_URL = r'(?:https?://)?(?:(?:www|player).)?vimeo\.com/(?:(?:groups|album)/[^/]+/)?(?:videos?/)?([0-9]+)' IE_NAME = u'vimeo' def __init__(self, downloader=None): @@ -1054,7 +1059,7 @@ class VimeoIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return # Now we begin extracting as much information as we can from what we @@ -1095,21 +1100,32 @@ class VimeoIE(InfoExtractor): timestamp = config['request']['timestamp'] # Vimeo specific: extract video codec and quality information + # First consider quality, then codecs, then take everything # TODO bind to format param codecs = [('h264', 'mp4'), ('vp8', 'flv'), ('vp6', 'flv')] - for codec in codecs: - if codec[0] in config["video"]["files"]: - video_codec = codec[0] - video_extension = codec[1] - if 'hd' in config["video"]["files"][codec[0]]: quality = 'hd' - else: quality = 'sd' + files = { 'hd': [], 'sd': [], 'other': []} + for codec_name, codec_extension in codecs: + if codec_name in config["video"]["files"]: + if 'hd' in config["video"]["files"][codec_name]: + files['hd'].append((codec_name, codec_extension, 'hd')) + elif 'sd' in config["video"]["files"][codec_name]: + files['sd'].append((codec_name, codec_extension, 'sd')) + else: + files['other'].append((codec_name, codec_extension, config["video"]["files"][codec_name][0])) + + for quality in ('hd', 'sd', 'other'): + if len(files[quality]) > 0: + video_quality = files[quality][0][2] + video_codec = files[quality][0][0] + video_extension = files[quality][0][1] + self._downloader.to_screen(u'[vimeo] %s: Downloading %s file at %s quality' % (video_id, video_codec.upper(), video_quality)) break else: self._downloader.trouble(u'ERROR: no known codec found') return video_url = "http://player.vimeo.com/play_redirect?clip_id=%s&sig=%s&time=%s&quality=%s&codecs=%s&type=moogaloop_local&embed_location=" \ - %(video_id, sig, timestamp, quality, video_codec.upper()) + %(video_id, sig, timestamp, video_quality, video_codec.upper()) return [{ 'id': video_id, @@ -1209,7 +1225,7 @@ class GenericIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return except ValueError, err: # since this is the last-resort InfoExtractor, if @@ -1330,7 +1346,7 @@ class YoutubeSearchIE(InfoExtractor): try: data = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download API page: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download API page: %s' % compat_str(err)) return api_response = json.loads(data)['data'] @@ -1407,7 +1423,7 @@ class GoogleSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1490,7 +1506,7 @@ class YahooSearchIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1560,7 +1576,7 @@ class YoutubePlaylistIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1617,7 +1633,7 @@ class YoutubeChannelIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1680,7 +1696,7 @@ class YoutubeUserIE(InfoExtractor): try: page = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return # Extract video identifiers @@ -1752,7 +1768,7 @@ class BlipTVUserIE(InfoExtractor): mobj = re.search(r'data-users-id="([^"]+)"', page) page_base = page_base % mobj.group(1) except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return @@ -1840,7 +1856,7 @@ class DepositFilesIE(InfoExtractor): self.report_download_webpage(file_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve file webpage: %s' % compat_str(err)) return # Search for the real file URL @@ -1957,7 +1973,7 @@ class FacebookIE(InfoExtractor): else: raise netrc.NetrcParseError('No authenticators for %s' % self._NETRC_MACHINE) except (IOError, netrc.NetrcParseError), err: - self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: parsing .netrc: %s' % compat_str(err)) return if useremail is None: @@ -1977,7 +1993,7 @@ class FacebookIE(InfoExtractor): self._downloader.to_stderr(u'WARNING: unable to log in: bad username/password, or exceded login rate limit (~3/min). Check credentials or wait.') return except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.to_stderr(u'WARNING: unable to log in: %s' % str(err)) + self._downloader.to_stderr(u'WARNING: unable to log in: %s' % compat_str(err)) return def _real_extract(self, url): @@ -1994,7 +2010,7 @@ class FacebookIE(InfoExtractor): page = urllib2.urlopen(request) video_webpage = page.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return # Start extracting information @@ -2128,13 +2144,13 @@ class BlipTVIE(InfoExtractor): 'urlhandle': urlh } except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info webpage: %s' % compat_str(err)) return if info is None: # Regular URL try: json_code = urlh.read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to read video info webpage: %s' % compat_str(err)) return try: @@ -2202,7 +2218,7 @@ class MyVideoIE(InfoExtractor): self.report_download_webpage(video_id) webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -2464,7 +2480,7 @@ class CollegeHumorIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return m = re.search(r'id="video:(?P<internalvideoid>[0-9]+)"', webpage) @@ -2483,7 +2499,7 @@ class CollegeHumorIE(InfoExtractor): try: metaXml = urllib2.urlopen(xmlUrl).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video info XML: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metaXml) @@ -2529,7 +2545,7 @@ class XVideosIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -2615,7 +2631,7 @@ class SoundcloudIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction('%s/%s' % (uploader, slug_title)) @@ -2642,7 +2658,7 @@ class SoundcloudIE(InfoExtractor): mobj = re.search('track-description-value"><p>(.*?)</p>', webpage) if mobj: description = mobj.group(1) - + # upload date upload_date = None mobj = re.search("pretty-date'>on ([\w]+ [\d]+, [\d]+ \d+:\d+)</abbr></h2>", webpage) @@ -2650,7 +2666,7 @@ class SoundcloudIE(InfoExtractor): try: upload_date = datetime.datetime.strptime(mobj.group(1), '%B %d, %Y %H:%M').strftime('%Y%m%d') except Exception, e: - self._downloader.to_stderr(str(e)) + self._downloader.to_stderr(compat_str(e)) # for soundcloud, a request to a cross domain is required for cookies request = urllib2.Request('http://media.soundcloud.com/crossdomain.xml', std_headers) @@ -2694,7 +2710,7 @@ class InfoQIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return self.report_extraction(url) @@ -2780,15 +2796,15 @@ class MixcloudIE(InfoExtractor): return None def _print_formats(self, formats): - print 'Available formats:' + print('Available formats:') for fmt in formats.keys(): for b in formats[fmt]: try: ext = formats[fmt][b][0] - print '%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1]) + print('%s\t%s\t[%s]' % (fmt, b, ext.split('.')[-1])) except TypeError: # we have no bitrate info ext = formats[fmt][0] - print '%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1]) + print('%s\t%s\t[%s]' % (fmt, '??', ext.split('.')[-1])) break def _real_extract(self, url): @@ -2808,7 +2824,7 @@ class MixcloudIE(InfoExtractor): self.report_download_json(file_url) jsonData = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve file: %s' % compat_str(err)) return # parse JSON @@ -2992,7 +3008,7 @@ class MTVIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video webpage: %s' % compat_str(err)) return mobj = re.search(r'<meta name="mtv_vt" content="([^"]+)"/>', webpage) @@ -3025,7 +3041,7 @@ class MTVIE(InfoExtractor): try: metadataXml = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % str(err)) + self._downloader.trouble(u'ERROR: unable to download video metadata: %s' % compat_str(err)) return mdoc = xml.etree.ElementTree.fromstring(metadataXml) @@ -3112,7 +3128,7 @@ class YoukuIE(InfoExtractor): self.report_download_webpage(video_id) jsondata = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extraction(video_id) @@ -3288,7 +3304,7 @@ class GooglePlusIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve entry webpage: %s' % compat_str(err)) return # Extract update date @@ -3330,7 +3346,7 @@ class GooglePlusIE(InfoExtractor): try: webpage = urllib2.urlopen(request).read() except (urllib2.URLError, httplib.HTTPException, socket.error), err: - self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % str(err)) + self._downloader.trouble(u'ERROR: Unable to retrieve video webpage: %s' % compat_str(err)) return self.report_extract_vid_page(video_page) diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index d14fe71a2..0501cc7f6 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -146,7 +146,7 @@ class FFmpegExtractAudioPP(PostProcessor): if int(self._preferredquality) < 10: more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] else: - more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] + more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] else: # We convert the audio (lossy) acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] @@ -156,7 +156,7 @@ class FFmpegExtractAudioPP(PostProcessor): if int(self._preferredquality) < 10: more_opts += [self._exes['avconv'] and '-q:a' or '-aq', self._preferredquality] else: - more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality] + more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] if self._preferredcodec == 'aac': more_opts += ['-f', 'adts'] if self._preferredcodec == 'm4a': diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index ad9a06c55..cbf1dd1a7 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -21,7 +21,7 @@ __authors__ = ( ) __license__ = 'Public Domain' -__version__ = '2012.10.09' +__version__ = '2012.11.27' UPDATE_URL = 'https://raw.github.com/rg3/youtube-dl/master/youtube-dl' UPDATE_URL_VERSION = 'https://raw.github.com/rg3/youtube-dl/master/LATEST_VERSION' @@ -48,7 +48,7 @@ from PostProcessor import * def updateSelf(downloader, filename): ''' Update the program file with the latest version from the repository ''' # Note: downloader only used for options - + if not os.access(filename, os.W_OK): sys.exit('ERROR: no write permissions on %s' % filename) @@ -66,7 +66,7 @@ def updateSelf(downloader, filename): directory = os.path.dirname(exe) if not os.access(directory, os.W_OK): sys.exit('ERROR: no write permissions on %s' % directory) - + try: urlh = urllib2.urlopen(UPDATE_URL_EXE) newcontent = urlh.read() @@ -75,20 +75,18 @@ def updateSelf(downloader, filename): outf.write(newcontent) except (IOError, OSError), err: sys.exit('ERROR: unable to download latest version') - + try: bat = os.path.join(directory, 'youtube-dl-updater.bat') b = open(bat, 'w') - - print >> b, """ + b.write(""" echo Updating youtube-dl... ping 127.0.0.1 -n 5 -w 1000 > NUL move /Y "%s.new" "%s" del "%s" - """ %(exe, exe, bat) - + \n""" %(exe, exe, bat)) b.close() - + os.startfile(bat) except (IOError, OSError), err: sys.exit('ERROR: unable to overwrite current version') @@ -265,13 +263,18 @@ def parseOpts(): filesystem.add_option('-t', '--title', action='store_true', dest='usetitle', help='use title in file name', default=False) + filesystem.add_option('--id', + action='store_true', dest='useid', help='use video ID in file name', default=False) filesystem.add_option('-l', '--literal', - action='store_true', dest='useliteral', help='use literal title in file name', default=False) + action='store_true', dest='usetitle', help='[deprecated] alias of --title', default=False) filesystem.add_option('-A', '--auto-number', action='store_true', dest='autonumber', help='number downloaded files starting from 00000', default=False) filesystem.add_option('-o', '--output', - dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(stitle)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') + dest='outtmpl', metavar='TEMPLATE', help='output filename template. Use %(title)s to get the title, %(uploader)s for the uploader name, %(autonumber)s to get an automatically incremented number, %(ext)s for the filename extension, %(upload_date)s for the upload date (YYYYMMDD), %(extractor)s for the provider (youtube, metacafe, etc), %(id)s for the video id and %% for a literal percent. Use - to output to stdout.') + filesystem.add_option('--restrict-filenames', + action='store_true', dest='restrictfilenames', + help='Avoid some characters such as "&" and spaces in filenames', default=False) filesystem.add_option('-a', '--batch-file', dest='batchfile', metavar='FILE', help='file containing URLs to download (\'-\' for stdin)') filesystem.add_option('-w', '--no-overwrites', @@ -296,7 +299,7 @@ def parseOpts(): help='write video metadata to a .info.json file', default=False) - postproc.add_option('--extract-audio', action='store_true', dest='extractaudio', default=False, + postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default') @@ -424,10 +427,10 @@ def _real_main(): parser.error(u'using .netrc conflicts with giving username/password') if opts.password is not None and opts.username is None: parser.error(u'account username missing') - if opts.outtmpl is not None and (opts.useliteral or opts.usetitle or opts.autonumber): - parser.error(u'using output template conflicts with using title, literal title or auto number') - if opts.usetitle and opts.useliteral: - parser.error(u'using title conflicts with using literal title') + if opts.outtmpl is not None and (opts.usetitle or opts.autonumber or opts.useid): + parser.error(u'using output template conflicts with using title, video ID or auto number') + if opts.usetitle and opts.useid: + parser.error(u'using title conflicts with using video ID') if opts.username is not None and opts.password is None: opts.password = getpass.getpass(u'Type account password and press return:') if opts.ratelimit is not None: @@ -478,15 +481,14 @@ def _real_main(): 'format_limit': opts.format_limit, 'listformats': opts.listformats, 'outtmpl': ((opts.outtmpl is not None and opts.outtmpl.decode(preferredencoding())) - or (opts.format == '-1' and opts.usetitle and u'%(stitle)s-%(id)s-%(format)s.%(ext)s') - or (opts.format == '-1' and opts.useliteral and u'%(title)s-%(id)s-%(format)s.%(ext)s') + or (opts.format == '-1' and opts.usetitle and u'%(title)s-%(id)s-%(format)s.%(ext)s') or (opts.format == '-1' and u'%(id)s-%(format)s.%(ext)s') - or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') - or (opts.usetitle and u'%(stitle)s-%(id)s.%(ext)s') - or (opts.useliteral and u'%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and opts.autonumber and u'%(autonumber)s-%(title)s-%(id)s.%(ext)s') + or (opts.usetitle and u'%(title)s-%(id)s.%(ext)s') + or (opts.useid and u'%(id)s.%(ext)s') or (opts.autonumber and u'%(autonumber)s-%(id)s.%(ext)s') or u'%(id)s.%(ext)s'), + 'restrictfilenames': opts.restrictfilenames, 'ignoreerrors': opts.ignoreerrors, 'ratelimit': opts.ratelimit, 'nooverwrites': opts.nooverwrites, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 839da17d0..1f60d34ae 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -26,6 +26,11 @@ std_headers = { 'Accept-Language': 'en-us,en;q=0.5', } +try: + compat_str = unicode # Python 2 +except NameError: + compat_str = str + def preferredencoding(): """Get preferred encoding. @@ -83,7 +88,6 @@ class IDParser(HTMLParser.HTMLParser): HTMLParser.HTMLParser.__init__(self) def error(self, message): - print >> sys.stderr, self.getpos() if self.error_count > 10 or self.started: raise HTMLParser.HTMLParseError(message, self.getpos()) self.rawdata = '\n'.join(self.html.split('\n')[self.getpos()[0]:]) # skip one line @@ -190,14 +194,28 @@ def timeconvert(timestr): if timetuple is not None: timestamp = email.utils.mktime_tz(timetuple) return timestamp - -def sanitize_filename(s): - """Sanitizes a string so it could be used as part of a filename.""" + +def sanitize_filename(s, restricted=False): + """Sanitizes a string so it could be used as part of a filename. + If restricted is set, use a stricter subset of allowed characters. + """ def replace_insane(char): - if char in u' .\\/|?*<>:"' or ord(char) < 32: + if char == '?' or ord(char) < 32 or ord(char) == 127: + return '' + elif char == '"': + return '' if restricted else '\'' + elif char == ':': + return '_-' if restricted else ' -' + elif char in '\\/|*<>': + return '-' + if restricted and (char in '&\'' or char.isspace()): return '_' return char - return u''.join(map(replace_insane, s)).strip('_') + + result = u''.join(map(replace_insane, s)) + while '--' in result: + result = result.replace('--', '-') + return result.strip('-') def orderedSet(iterable): """ Remove all duplicates from the input iterable """ |