diff options
Diffstat (limited to 'youtube_dl')
-rw-r--r-- | youtube_dl/FileDownloader.py | 23 | ||||
-rwxr-xr-x | youtube_dl/InfoExtractors.py | 66 | ||||
-rw-r--r-- | youtube_dl/PostProcessor.py | 114 | ||||
-rw-r--r-- | youtube_dl/__init__.py | 23 | ||||
-rw-r--r-- | youtube_dl/utils.py | 3 | ||||
-rw-r--r-- | youtube_dl/version.py | 2 |
6 files changed, 135 insertions, 96 deletions
diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index be9e4918e..51df4c175 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -81,6 +81,7 @@ class FileDownloader(object): writesubtitles: Write the video subtitles to a .srt file subtitleslang: Language of the subtitles to download test: Download only first bytes to test the downloader. + keepvideo: Keep the video file after post-processing """ params = None @@ -529,13 +530,27 @@ class FileDownloader(object): return self._download_retcode def post_process(self, filename, ie_info): - """Run the postprocessing chain on the given file.""" + """Run all the postprocessors on the given file.""" info = dict(ie_info) info['filepath'] = filename + keep_video = None for pp in self._pps: - info = pp.run(info) - if info is None: - break + try: + keep_video_wish,new_info = pp.run(info) + if keep_video_wish is not None: + if keep_video_wish: + keep_video = keep_video_wish + elif keep_video is None: + # No clear decision yet, let IE decide + keep_video = keep_video_wish + except PostProcessingError as e: + self.to_stderr(u'ERROR: ' + e.msg) + if keep_video is False and not self.params.get('keepvideo', False): + try: + self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename) + os.remove(encodeFilename(filename)) + except (IOError, OSError): + self.to_stderr(u'WARNING: Unable to remove downloaded video file') def _download_with_rtmpdump(self, filename, url, player_url, page_url): self.report_destination(filename) diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py index 83be8313f..092bfef22 100755 --- a/youtube_dl/InfoExtractors.py +++ b/youtube_dl/InfoExtractors.py @@ -2333,7 +2333,6 @@ class ComedyCentralIE(InfoExtractor): (the-colbert-report-(videos|collections)/(?P<clipID>[0-9]+)/[^/]*/(?P<cntitle>.*?)) |(watch/(?P<date>[^/]*)/(?P<tdstitle>.*))))) $""" - IE_NAME = u'comedycentral' _available_formats = ['3500', '2200', '1700', '1200', '750', '400'] @@ -2361,16 +2360,12 @@ class ComedyCentralIE(InfoExtractor): def report_extraction(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Extracting information' % episode_id) - def report_config_download(self, episode_id): - self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration' % episode_id) + def report_config_download(self, episode_id, media_id): + self._downloader.to_screen(u'[comedycentral] %s: Downloading configuration for %s' % (episode_id, media_id)) def report_index_download(self, episode_id): self._downloader.to_screen(u'[comedycentral] %s: Downloading show index' % episode_id) - def report_player_url(self, episode_id): - self._downloader.to_screen(u'[comedycentral] %s: Determining player URL' % episode_id) - - def _print_formats(self, formats): print('Available formats:') for x in formats: @@ -2409,6 +2404,7 @@ class ComedyCentralIE(InfoExtractor): try: htmlHandle = compat_urllib_request.urlopen(req) html = htmlHandle.read() + webpage = html.decode('utf-8') except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: self._downloader.trouble(u'ERROR: unable to download webpage: %s' % compat_str(err)) return @@ -2423,29 +2419,20 @@ class ComedyCentralIE(InfoExtractor): return epTitle = mobj.group('episode') - mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', html) + mMovieParams = re.findall('(?:<param name="movie" value="|var url = ")(http://media.mtvnservices.com/([^"]*(?:episode|video).*?:.*?))"', webpage) if len(mMovieParams) == 0: # The Colbert Report embeds the information in a without # a URL prefix; so extract the alternate reference # and then add the URL prefix manually. - altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', html) + altMovieParams = re.findall('data-mgid="([^"]*(?:episode|video).*?:.*?)"', webpage) if len(altMovieParams) == 0: self._downloader.trouble(u'ERROR: unable to find Flash URL in webpage ' + url) return else: mMovieParams = [("http://media.mtvnservices.com/" + altMovieParams[0], altMovieParams[0])] - playerUrl_raw = mMovieParams[0][0] - self.report_player_url(epTitle) - try: - urlHandle = compat_urllib_request.urlopen(playerUrl_raw) - playerUrl = urlHandle.geturl() - except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: - self._downloader.trouble(u'ERROR: unable to find out player URL: ' + compat_str(err)) - return - uri = mMovieParams[0][1] indexUrl = 'http://shadow.comedycentral.com/feeds/video_player/mrss/?' + compat_urllib_parse.urlencode({'uri': uri}) self.report_index_download(epTitle) @@ -2459,7 +2446,7 @@ class ComedyCentralIE(InfoExtractor): idoc = xml.etree.ElementTree.fromstring(indexXml) itemEls = idoc.findall('.//item') - for itemEl in itemEls: + for partNum,itemEl in enumerate(itemEls): mediaId = itemEl.findall('./guid')[0].text shortMediaId = mediaId.split(':')[-1] showId = mediaId.split(':')[-2].replace('.com', '') @@ -2469,7 +2456,7 @@ class ComedyCentralIE(InfoExtractor): configUrl = ('http://www.comedycentral.com/global/feeds/entertainment/media/mediaGenEntertainment.jhtml?' + compat_urllib_parse.urlencode({'uri': mediaId})) configReq = compat_urllib_request.Request(configUrl) - self.report_config_download(epTitle) + self.report_config_download(epTitle, shortMediaId) try: configXml = compat_urllib_request.urlopen(configReq).read() except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: @@ -2491,7 +2478,7 @@ class ComedyCentralIE(InfoExtractor): return # For now, just pick the highest bitrate - format,video_url = turls[-1] + format,rtmp_video_url = turls[-1] # Get the format arg from the arg stream req_format = self._downloader.params.get('format', None) @@ -2499,18 +2486,16 @@ class ComedyCentralIE(InfoExtractor): # Select format if we can find one for f,v in turls: if f == req_format: - format, video_url = f, v + format, rtmp_video_url = f, v break - # Patch to download from alternative CDN, which does not - # break on current RTMPDump builds - broken_cdn = "rtmpe://viacomccstrmfs.fplive.net/viacomccstrm/gsp.comedystor/" - better_cdn = "rtmpe://cp10740.edgefcs.net/ondemand/mtvnorigin/gsp.comedystor/" - - if video_url.startswith(broken_cdn): - video_url = video_url.replace(broken_cdn, better_cdn) + m = re.match(r'^rtmpe?://.*?/(?P<finalid>gsp.comedystor/.*)$', rtmp_video_url) + if not m: + raise ExtractorError(u'Cannot transform RTMP url') + base = 'http://mtvnmobile.vo.llnwd.net/kip0/_pxn=1+_pxI0=Ripod-h264+_pxL0=undefined+_pxM0=+_pxK=18639+_pxE=mp4/44620/mtvnorigin/' + video_url = base + m.group('finalid') - effTitle = showId + u'-' + epTitle + effTitle = showId + u'-' + epTitle + u' part ' + compat_str(partNum+1) info = { 'id': shortMediaId, 'url': video_url, @@ -2521,9 +2506,7 @@ class ComedyCentralIE(InfoExtractor): 'format': format, 'thumbnail': None, 'description': officialTitle, - 'player_url': None #playerUrl } - results.append(info) return results @@ -2603,7 +2586,6 @@ class EscapistIE(InfoExtractor): return [info] - class CollegeHumorIE(InfoExtractor): """Information extractor for collegehumor.com""" @@ -3542,17 +3524,23 @@ class JustinTVIE(InfoExtractor): return response = json.loads(webpage) + if type(response) != list: + error_text = response.get('error', 'unknown error') + self._downloader.trouble(u'ERROR: Justin.tv API: %s' % error_text) + return info = [] for clip in response: video_url = clip['video_file_url'] if video_url: video_extension = os.path.splitext(video_url)[1][1:] - video_date = re.sub('-', '', clip['created_on'][:10]) + video_date = re.sub('-', '', clip['start_time'][:10]) + video_uploader_id = clip.get('user_id', clip.get('channel_id')) info.append({ 'id': clip['id'], 'url': video_url, 'title': clip['title'], - 'uploader': clip.get('user_id', clip.get('channel_id')), + 'uploader': clip.get('channel_name', video_uploader_id), + 'uploader_id': video_uploader_id, 'upload_date': video_date, 'ext': video_extension, }) @@ -3571,7 +3559,7 @@ class JustinTVIE(InfoExtractor): paged = True api += '/channel/archives/%s.json' else: - api += '/clip/show/%s.json' + api += '/broadcast/by_archive/%s.json' api = api % (video_id,) self.report_extraction(video_id) @@ -3711,11 +3699,11 @@ class SteamIE(InfoExtractor): } videos.append(info) return videos - + class UstreamIE(InfoExtractor): - _VALID_URL = r'http://www.ustream.tv/recorded/(?P<videoID>\d+)' + _VALID_URL = r'https?://www\.ustream\.tv/recorded/(?P<videoID>\d+)' IE_NAME = u'ustream' - + def _real_extract(self, url): m = re.match(self._VALID_URL, url) video_id = m.group('videoID') diff --git a/youtube_dl/PostProcessor.py b/youtube_dl/PostProcessor.py index a04828518..545b6992b 100644 --- a/youtube_dl/PostProcessor.py +++ b/youtube_dl/PostProcessor.py @@ -45,31 +45,24 @@ class PostProcessor(object): one has an extra field called "filepath" that points to the downloaded file. - When this method returns None, the postprocessing chain is - stopped. However, this method may return an information - dictionary that will be passed to the next postprocessing - object in the chain. It can be the one it received after - changing some fields. + This method returns a tuple, the first element of which describes + whether the original file should be kept (i.e. not deleted - None for + no preference), and the second of which is the updated information. In addition, this method may raise a PostProcessingError - exception that will be taken into account by the downloader - it was called from. + exception if post processing fails. """ - return information # by default, do nothing + return None, information # by default, keep file and do nothing -class AudioConversionError(BaseException): - def __init__(self, message): - self.message = message +class FFmpegPostProcessorError(PostProcessingError): + pass -class FFmpegExtractAudioPP(PostProcessor): - def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, keepvideo=False, nopostoverwrites=False): +class AudioConversionError(PostProcessingError): + pass + +class FFmpegPostProcessor(PostProcessor): + def __init__(self,downloader=None): PostProcessor.__init__(self, downloader) - if preferredcodec is None: - preferredcodec = 'best' - self._preferredcodec = preferredcodec - self._preferredquality = preferredquality - self._keepvideo = keepvideo - self._nopostoverwrites = nopostoverwrites self._exes = self.detect_executables() @staticmethod @@ -83,10 +76,37 @@ class FFmpegExtractAudioPP(PostProcessor): programs = ['avprobe', 'avconv', 'ffmpeg', 'ffprobe'] return dict((program, executable(program)) for program in programs) + def run_ffmpeg(self, path, out_path, opts): + if not self._exes['ffmpeg'] and not self._exes['avconv']: + raise FFmpegPostProcessorError(u'ffmpeg or avconv not found. Please install one.') + cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path)] + + opts + + [encodeFilename(self._ffmpeg_filename_argument(out_path))]) + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout,stderr = p.communicate() + if p.returncode != 0: + msg = stderr.strip().split('\n')[-1] + raise FFmpegPostProcessorError(msg.decode('utf-8', 'replace')) + + def _ffmpeg_filename_argument(self, fn): + # ffmpeg broke --, see https://ffmpeg.org/trac/ffmpeg/ticket/2127 for details + if fn.startswith(u'-'): + return u'./' + fn + return fn + +class FFmpegExtractAudioPP(FFmpegPostProcessor): + def __init__(self, downloader=None, preferredcodec=None, preferredquality=None, nopostoverwrites=False): + FFmpegPostProcessor.__init__(self, downloader) + if preferredcodec is None: + preferredcodec = 'best' + self._preferredcodec = preferredcodec + self._preferredquality = preferredquality + self._nopostoverwrites = nopostoverwrites + def get_audio_codec(self, path): if not self._exes['ffprobe'] and not self._exes['avprobe']: return None try: - cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', '--', encodeFilename(path)] + cmd = [self._exes['avprobe'] or self._exes['ffprobe'], '-show_streams', encodeFilename(self._ffmpeg_filename_argument(path))] handle = subprocess.Popen(cmd, stderr=compat_subprocess_get_DEVNULL(), stdout=subprocess.PIPE) output = handle.communicate()[0] if handle.wait() != 0: @@ -108,22 +128,18 @@ class FFmpegExtractAudioPP(PostProcessor): acodec_opts = [] else: acodec_opts = ['-acodec', codec] - cmd = ([self._exes['avconv'] or self._exes['ffmpeg'], '-y', '-i', encodeFilename(path), '-vn'] - + acodec_opts + more_opts + - ['--', encodeFilename(out_path)]) - p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - stdout,stderr = p.communicate() - if p.returncode != 0: - msg = stderr.strip().split('\n')[-1] - raise AudioConversionError(msg) + opts = ['-vn'] + acodec_opts + more_opts + try: + FFmpegPostProcessor.run_ffmpeg(self, path, out_path, opts) + except FFmpegPostProcessorError as err: + raise AudioConversionError(err.message) def run(self, information): path = information['filepath'] filecodec = self.get_audio_codec(path) if filecodec is None: - self._downloader.to_stderr(u'WARNING: unable to obtain file audio codec with ffprobe') - return None + raise PostProcessingError(u'WARNING: unable to obtain file audio codec with ffprobe') more_opts = [] if self._preferredcodec == 'best' or self._preferredcodec == filecodec or (self._preferredcodec == 'm4a' and filecodec == 'aac'): @@ -132,7 +148,7 @@ class FFmpegExtractAudioPP(PostProcessor): acodec = 'copy' extension = self._preferredcodec more_opts = [self._exes['avconv'] and '-bsf:a' or '-absf', 'aac_adtstoasc'] - elif filecodec in ['aac', 'mp3', 'vorbis']: + elif filecodec in ['aac', 'mp3', 'vorbis', 'opus']: # Lossless if possible acodec = 'copy' extension = filecodec @@ -152,7 +168,7 @@ class FFmpegExtractAudioPP(PostProcessor): more_opts += [self._exes['avconv'] and '-b:a' or '-ab', self._preferredquality + 'k'] else: # We convert the audio (lossy) - acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] + acodec = {'mp3': 'libmp3lame', 'aac': 'aac', 'm4a': 'aac', 'opus': 'opus', 'vorbis': 'libvorbis', 'wav': None}[self._preferredcodec] extension = self._preferredcodec more_opts = [] if self._preferredquality is not None: @@ -181,10 +197,10 @@ class FFmpegExtractAudioPP(PostProcessor): except: etype,e,tb = sys.exc_info() if isinstance(e, AudioConversionError): - self._downloader.to_stderr(u'ERROR: audio conversion failed: ' + e.message) + msg = u'audio conversion failed: ' + e.message else: - self._downloader.to_stderr(u'ERROR: error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg')) - return None + msg = u'error running ' + (self._exes['avconv'] and 'avconv' or 'ffmpeg') + raise PostProcessingError(msg) # Try to update the date time for extracted audio file. if information.get('filetime') is not None: @@ -193,12 +209,24 @@ class FFmpegExtractAudioPP(PostProcessor): except: self._downloader.to_stderr(u'WARNING: Cannot update utime of audio file') - if not self._keepvideo: - try: - os.remove(encodeFilename(path)) - except (IOError, OSError): - self._downloader.to_stderr(u'WARNING: Unable to remove downloaded video file') - return None - information['filepath'] = new_path - return information + return False,information + +class FFmpegVideoConvertor(FFmpegPostProcessor): + def __init__(self, downloader=None,preferedformat=None): + super(FFmpegVideoConvertor, self).__init__(downloader) + self._preferedformat=preferedformat + + def run(self, information): + path = information['filepath'] + prefix, sep, ext = path.rpartition(u'.') + outpath = prefix + sep + self._preferedformat + if information['ext'] == self._preferedformat: + self._downloader.to_screen(u'[ffmpeg] Not converting video file %s - already is in target format %s' % (path, self._preferedformat)) + return True,information + self._downloader.to_screen(u'['+'ffmpeg'+'] Converting video from %s to %s, Destination: ' % (information['ext'], self._preferedformat) +outpath) + self.run_ffmpeg(path, outpath, []) + information['filepath'] = outpath + information['format'] = self._preferedformat + information['ext'] = self._preferedformat + return False,information diff --git a/youtube_dl/__init__.py b/youtube_dl/__init__.py index 1d914709f..ae12128b9 100644 --- a/youtube_dl/__init__.py +++ b/youtube_dl/__init__.py @@ -175,7 +175,6 @@ def parseOpts(): action='store', dest='subtitleslang', metavar='LANG', help='language of the closed captions to download (optional) use IETF language tags like \'en\'') - verbosity.add_option('-q', '--quiet', action='store_true', dest='quiet', help='activates quiet mode', default=False) verbosity.add_option('-s', '--simulate', @@ -248,9 +247,11 @@ def parseOpts(): postproc.add_option('-x', '--extract-audio', action='store_true', dest='extractaudio', default=False, help='convert video files to audio-only files (requires ffmpeg or avconv and ffprobe or avprobe)') postproc.add_option('--audio-format', metavar='FORMAT', dest='audioformat', default='best', - help='"best", "aac", "vorbis", "mp3", "m4a", or "wav"; best by default') + help='"best", "aac", "vorbis", "mp3", "m4a", "opus", or "wav"; best by default') postproc.add_option('--audio-quality', metavar='QUALITY', dest='audioquality', default='5', help='ffmpeg/avconv audio quality specification, insert a value between 0 (better) and 9 (worse) for VBR or a specific bitrate like 128K (default 5)') + postproc.add_option('--recode-video', metavar='FORMAT', dest='recodevideo', default=None, + help='Encode the video to another format if necessary (currently supported: mp4|flv|ogg|webm)') postproc.add_option('-k', '--keep-video', action='store_true', dest='keepvideo', default=False, help='keeps the video file on disk after the post-processing; the video is erased by default') postproc.add_option('--no-post-overwrites', action='store_true', dest='nopostoverwrites', default=False, @@ -278,6 +279,10 @@ def parseOpts(): def _real_main(): parser, opts, args = parseOpts() + # Update version + if opts.update_self: + update_self(fd.to_screen, opts.verbose, sys.argv[0]) + # Open appropriate CookieJar if opts.cookiefile is None: jar = compat_cookiejar.CookieJar() @@ -370,12 +375,15 @@ def _real_main(): except (TypeError, ValueError) as err: parser.error(u'invalid playlist end number specified') if opts.extractaudio: - if opts.audioformat not in ['best', 'aac', 'mp3', 'vorbis', 'm4a', 'wav']: + if opts.audioformat not in ['best', 'aac', 'mp3', 'm4a', 'opus', 'vorbis', 'wav']: parser.error(u'invalid audio format specified') if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') if not opts.audioquality.isdigit(): parser.error(u'invalid audio quality specified') + if opts.recodevideo is not None: + if opts.recodevideo not in ['mp4', 'flv', 'webm', 'ogg']: + parser.error(u'invalid video recode format specified') if sys.version_info < (3,): # In Python 2, sys.argv is a bytestring (also note http://bugs.python.org/issue2128 for Windows systems) @@ -432,6 +440,7 @@ def _real_main(): 'prefer_free_formats': opts.prefer_free_formats, 'verbose': opts.verbose, 'test': opts.test, + 'keepvideo': opts.keepvideo, }) if opts.verbose: @@ -453,11 +462,9 @@ def _real_main(): # PostProcessors if opts.extractaudio: - fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, keepvideo=opts.keepvideo, nopostoverwrites=opts.nopostoverwrites)) - - # Update version - if opts.update_self: - update_self(fd.to_screen, opts.verbose, sys.argv[0]) + fd.add_post_processor(FFmpegExtractAudioPP(preferredcodec=opts.audioformat, preferredquality=opts.audioquality, nopostoverwrites=opts.nopostoverwrites)) + if opts.recodevideo: + fd.add_post_processor(FFmpegVideoConvertor(preferedformat=opts.recodevideo)) # Maybe do nothing if len(all_urls) < 1: diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index 8f856ee8c..0e37390a2 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -450,7 +450,8 @@ class PostProcessingError(Exception): This exception may be raised by PostProcessor's .run() method to indicate an error in the postprocessing task. """ - pass + def __init__(self, msg): + self.msg = msg class MaxDownloadsReached(Exception): """ --max-downloads limit has been reached. """ diff --git a/youtube_dl/version.py b/youtube_dl/version.py index a4e9d2478..d8e82f4cd 100644 --- a/youtube_dl/version.py +++ b/youtube_dl/version.py @@ -1,2 +1,2 @@ -__version__ = '2013.01.02' +__version__ = '2013.01.11' |