diff options
Diffstat (limited to 'youtube_dl/FileDownloader.py')
| -rw-r--r-- | youtube_dl/FileDownloader.py | 542 | 
1 files changed, 14 insertions, 528 deletions
| diff --git a/youtube_dl/FileDownloader.py b/youtube_dl/FileDownloader.py index f21c66c29..445f3e85e 100644 --- a/youtube_dl/FileDownloader.py +++ b/youtube_dl/FileDownloader.py @@ -1,9 +1,6 @@  import math -import io  import os  import re -import shutil -import socket  import subprocess  import sys  import time @@ -13,103 +10,45 @@ if os.name == 'nt':      import ctypes  from .utils import * -from .extractor import get_info_extractor  class FileDownloader(object):      """File Downloader class.      File downloader objects are the ones responsible of downloading the -    actual video file and writing it to disk if the user has requested -    it, among some other tasks. In most cases there should be one per -    program. As, given a video URL, the downloader doesn't know how to -    extract all the needed information, task that InfoExtractors do, it -    has to pass the URL to one of them. - -    For this, file downloader objects have a method that allows -    InfoExtractors to be registered in a given order. When it is passed -    a URL, the file downloader handles it to the first InfoExtractor it -    finds that reports being able to handle it. The InfoExtractor extracts -    all the information about the video or videos the URL refers to, and -    asks the FileDownloader to process the video information, possibly -    downloading the video. +    actual video file and writing it to disk.      File downloaders accept a lot of parameters. In order not to saturate      the object constructor with arguments, it receives a dictionary of -    options instead. These options are available through the params -    attribute for the InfoExtractors to use. The FileDownloader also -    registers itself as the downloader in charge for the InfoExtractors -    that are added to it, so this is a "mutual registration". +    options instead.      Available options: -    username:          Username for authentication purposes. -    password:          Password for authentication purposes. -    usenetrc:          Use netrc for authentication instead. +    verbose:           Print additional info to stdout.      quiet:             Do not print messages to stdout. -    forceurl:          Force printing final URL. -    forcetitle:        Force printing title. -    forceid:           Force printing ID. -    forcethumbnail:    Force printing thumbnail URL. -    forcedescription:  Force printing description. -    forcefilename:     Force printing final filename. -    simulate:          Do not download the video files. -    format:            Video format code. -    format_limit:      Highest quality format to try. -    outtmpl:           Template for output names. -    restrictfilenames: Do not allow "&" and spaces in file names -    ignoreerrors:      Do not stop on download errors.      ratelimit:         Download speed limit, in bytes/sec. -    nooverwrites:      Prevent overwriting files.      retries:           Number of times to retry for HTTP error 5xx      buffersize:        Size of download buffer in bytes.      noresizebuffer:    Do not automatically resize the download buffer.      continuedl:        Try to continue downloads if possible.      noprogress:        Do not print the progress bar. -    playliststart:     Playlist item to start at. -    playlistend:       Playlist item to end at. -    matchtitle:        Download only matching titles. -    rejecttitle:       Reject downloads for matching titles.      logtostderr:       Log messages to stderr instead of stdout.      consoletitle:      Display progress in console window's titlebar.      nopart:            Do not use temporary .part files.      updatetime:        Use the Last-modified header to set output file timestamps. -    writedescription:  Write the video description to a .description file -    writeinfojson:     Write the video description to a .info.json file -    writethumbnail:    Write the thumbnail image to a file -    writesubtitles:    Write the video subtitles to a file -    allsubtitles:      Downloads all the subtitles of the video -    listsubtitles:     Lists all available subtitles for the video -    subtitlesformat:   Subtitle format [sbv/srt] (default=srt) -    subtitleslang:     Language of the subtitles to download      test:              Download only first bytes to test the downloader. -    keepvideo:         Keep the video file after post-processing      min_filesize:      Skip files smaller than this size      max_filesize:      Skip files larger than this size -    daterange:         A DateRange object, download only if the upload_date is in the range. -    skip_download:     Skip the actual download of the video file      """      params = None -    _ies = [] -    _pps = [] -    _download_retcode = None -    _num_downloads = None -    _screen_file = None -    def __init__(self, params): +    def __init__(self, ydl, params):          """Create a FileDownloader object with the given options.""" -        self._ies = [] -        self._pps = [] +        self.ydl = ydl          self._progress_hooks = [] -        self._download_retcode = 0 -        self._num_downloads = 0 -        self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]          self.params = params -        if '%(stitle)s' in self.params['outtmpl']: -            self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') -      @staticmethod      def format_bytes(bytes):          if bytes is None: @@ -174,34 +113,11 @@ class FileDownloader(object):          multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())          return int(round(number * multiplier)) -    def add_info_extractor(self, ie): -        """Add an InfoExtractor object to the end of the list.""" -        self._ies.append(ie) -        ie.set_downloader(self) - -    def add_post_processor(self, pp): -        """Add a PostProcessor object to the end of the chain.""" -        self._pps.append(pp) -        pp.set_downloader(self) - -    def to_screen(self, message, skip_eol=False): -        """Print message to stdout if not in quiet mode.""" -        assert type(message) == type(u'') -        if not self.params.get('quiet', False): -            terminator = [u'\n', u''][skip_eol] -            output = message + terminator -            if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr -                output = output.encode(preferredencoding(), 'ignore') -            self._screen_file.write(output) -            self._screen_file.flush() +    def to_screen(self, *args, **kargs): +        self.ydl.to_screen(*args, **kargs)      def to_stderr(self, message): -        """Print message to stderr.""" -        assert type(message) == type(u'') -        output = message + u'\n' -        if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr -            output = output.encode(preferredencoding()) -        sys.stderr.write(output) +        self.ydl.to_screen(message)      def to_cons_title(self, message):          """Set console/terminal window title to message.""" @@ -214,63 +130,14 @@ class FileDownloader(object):          elif 'TERM' in os.environ:              self.to_screen('\033]0;%s\007' % message, skip_eol=True) -    def fixed_template(self): -        """Checks if the output template is fixed.""" -        return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) - -    def trouble(self, message=None, tb=None): -        """Determine action to take when a download problem appears. +    def trouble(self, *args, **kargs): +        self.ydl.trouble(*args, **kargs) -        Depending on if the downloader has been configured to ignore -        download errors or not, this method may throw an exception or -        not when errors are found, after printing the message. +    def report_warning(self, *args, **kargs): +        self.ydl.report_warning(*args, **kargs) -        tb, if given, is additional traceback information. -        """ -        if message is not None: -            self.to_stderr(message) -        if self.params.get('verbose'): -            if tb is None: -                if sys.exc_info()[0]:  # if .trouble has been called from an except block -                    tb = u'' -                    if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: -                        tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) -                    tb += compat_str(traceback.format_exc()) -                else: -                    tb_data = traceback.format_list(traceback.extract_stack()) -                    tb = u''.join(tb_data) -            self.to_stderr(tb) -        if not self.params.get('ignoreerrors', False): -            if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: -                exc_info = sys.exc_info()[1].exc_info -            else: -                exc_info = sys.exc_info() -            raise DownloadError(message, exc_info) -        self._download_retcode = 1 - -    def report_warning(self, message): -        ''' -        Print the message to stderr, it will be prefixed with 'WARNING:' -        If stderr is a tty file the 'WARNING:' will be colored -        ''' -        if sys.stderr.isatty() and os.name != 'nt': -            _msg_header=u'\033[0;33mWARNING:\033[0m' -        else: -            _msg_header=u'WARNING:' -        warning_message=u'%s %s' % (_msg_header,message) -        self.to_stderr(warning_message) - -    def report_error(self, message, tb=None): -        ''' -        Do the same as trouble, but prefixes the message with 'ERROR:', colored -        in red if stderr is a tty file. -        ''' -        if sys.stderr.isatty() and os.name != 'nt': -            _msg_header = u'\033[0;31mERROR:\033[0m' -        else: -            _msg_header = u'ERROR:' -        error_message = u'%s %s' % (_msg_header, message) -        self.trouble(error_message, tb) +    def report_error(self, *args, **kargs): +        self.ydl.error(*args, **kargs)      def slow_down(self, start_time, byte_counter):          """Sleep if the download speed is over the rate limit.""" @@ -326,18 +193,6 @@ class FileDownloader(object):              pass          return filetime -    def report_writedescription(self, descfn): -        """ Report that the description file is being written """ -        self.to_screen(u'[info] Writing video description to: ' + descfn) - -    def report_writesubtitles(self, sub_filename): -        """ Report that the subtitles file is being written """ -        self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) - -    def report_writeinfojson(self, infofn): -        """ Report that the metadata file has been written """ -        self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) -      def report_destination(self, filename):          """Report destination filename."""          self.to_screen(u'[download] Destination: ' + filename) @@ -382,375 +237,6 @@ class FileDownloader(object):          else:              self.to_screen(u'') -    def increment_downloads(self): -        """Increment the ordinal that assigns a number to each file.""" -        self._num_downloads += 1 - -    def prepare_filename(self, info_dict): -        """Generate the output filename.""" -        try: -            template_dict = dict(info_dict) - -            template_dict['epoch'] = int(time.time()) -            autonumber_size = self.params.get('autonumber_size') -            if autonumber_size is None: -                autonumber_size = 5 -            autonumber_templ = u'%0' + str(autonumber_size) + u'd' -            template_dict['autonumber'] = autonumber_templ % self._num_downloads -            if template_dict['playlist_index'] is not None: -                template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] - -            sanitize = lambda k,v: sanitize_filename( -                u'NA' if v is None else compat_str(v), -                restricted=self.params.get('restrictfilenames'), -                is_id=(k==u'id')) -            template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) - -            filename = self.params['outtmpl'] % template_dict -            return filename -        except KeyError as err: -            self.report_error(u'Erroneous output template') -            return None -        except ValueError as err: -            self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) -            return None - -    def _match_entry(self, info_dict): -        """ Returns None iff the file should be downloaded """ - -        title = info_dict['title'] -        matchtitle = self.params.get('matchtitle', False) -        if matchtitle: -            if not re.search(matchtitle, title, re.IGNORECASE): -                return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' -        rejecttitle = self.params.get('rejecttitle', False) -        if rejecttitle: -            if re.search(rejecttitle, title, re.IGNORECASE): -                return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' -        date = info_dict.get('upload_date', None) -        if date is not None: -            dateRange = self.params.get('daterange', DateRange()) -            if date not in dateRange: -                return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) -        return None -         -    def extract_info(self, url, download=True, ie_key=None, extra_info={}): -        ''' -        Returns a list with a dictionary for each video we find. -        If 'download', also downloads the videos. -        extra_info is a dict containing the extra values to add to each result -         ''' -         -        if ie_key: -            ie = get_info_extractor(ie_key)() -            ie.set_downloader(self) -            ies = [ie] -        else: -            ies = self._ies - -        for ie in ies: -            if not ie.suitable(url): -                continue - -            if not ie.working(): -                self.report_warning(u'The program functionality for this site has been marked as broken, ' -                                    u'and will probably not work.') - -            try: -                ie_result = ie.extract(url) -                if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) -                    break -                if isinstance(ie_result, list): -                    # Backwards compatibility: old IE result format -                    for result in ie_result: -                        result.update(extra_info) -                    ie_result = { -                        '_type': 'compat_list', -                        'entries': ie_result, -                    } -                else: -                    ie_result.update(extra_info) -                if 'extractor' not in ie_result: -                    ie_result['extractor'] = ie.IE_NAME -                return self.process_ie_result(ie_result, download=download) -            except ExtractorError as de: # An error we somewhat expected -                self.report_error(compat_str(de), de.format_traceback()) -                break -            except Exception as e: -                if self.params.get('ignoreerrors', False): -                    self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) -                    break -                else: -                    raise -        else: -            self.report_error(u'no suitable InfoExtractor: %s' % url) -         -    def process_ie_result(self, ie_result, download=True, extra_info={}): -        """ -        Take the result of the ie(may be modified) and resolve all unresolved -        references (URLs, playlist items). - -        It will also download the videos if 'download'. -        Returns the resolved ie_result. -        """ - -        result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system -        if result_type == 'video': -            if 'playlist' not in ie_result: -                # It isn't part of a playlist -                ie_result['playlist'] = None -                ie_result['playlist_index'] = None -            if download: -                self.process_info(ie_result) -            return ie_result -        elif result_type == 'url': -            # We have to add extra_info to the results because it may be -            # contained in a playlist -            return self.extract_info(ie_result['url'], -                                     download, -                                     ie_key=ie_result.get('ie_key'), -                                     extra_info=extra_info) -        elif result_type == 'playlist': -            # We process each entry in the playlist -            playlist = ie_result.get('title', None) or ie_result.get('id', None) -            self.to_screen(u'[download] Downloading playlist: %s'  % playlist) - -            playlist_results = [] - -            n_all_entries = len(ie_result['entries']) -            playliststart = self.params.get('playliststart', 1) - 1 -            playlistend = self.params.get('playlistend', -1) - -            if playlistend == -1: -                entries = ie_result['entries'][playliststart:] -            else: -                entries = ie_result['entries'][playliststart:playlistend] - -            n_entries = len(entries) - -            self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % -                (ie_result['extractor'], playlist, n_all_entries, n_entries)) - -            for i,entry in enumerate(entries,1): -                self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) -                extra = { -                         'playlist': playlist,  -                         'playlist_index': i + playliststart, -                         } -                if not 'extractor' in entry: -                    # We set the extractor, if it's an url it will be set then to -                    # the new extractor, but if it's already a video we must make -                    # sure it's present: see issue #877 -                    entry['extractor'] = ie_result['extractor'] -                entry_result = self.process_ie_result(entry, -                                                      download=download, -                                                      extra_info=extra) -                playlist_results.append(entry_result) -            ie_result['entries'] = playlist_results -            return ie_result -        elif result_type == 'compat_list': -            def _fixup(r): -                r.setdefault('extractor', ie_result['extractor']) -                return r -            ie_result['entries'] = [ -                self.process_ie_result(_fixup(r), download=download) -                for r in ie_result['entries'] -            ] -            return ie_result -        else: -            raise Exception('Invalid result type: %s' % result_type) - -    def process_info(self, info_dict): -        """Process a single resolved IE result.""" - -        assert info_dict.get('_type', 'video') == 'video' -        #We increment the download the download count here to match the previous behaviour. -        self.increment_downloads() - -        info_dict['fulltitle'] = info_dict['title'] -        if len(info_dict['title']) > 200: -            info_dict['title'] = info_dict['title'][:197] + u'...' - -        # Keep for backwards compatibility -        info_dict['stitle'] = info_dict['title'] - -        if not 'format' in info_dict: -            info_dict['format'] = info_dict['ext'] - -        reason = self._match_entry(info_dict) -        if reason is not None: -            self.to_screen(u'[download] ' + reason) -            return - -        max_downloads = self.params.get('max_downloads') -        if max_downloads is not None: -            if self._num_downloads > int(max_downloads): -                raise MaxDownloadsReached() - -        filename = self.prepare_filename(info_dict) - -        # Forced printings -        if self.params.get('forcetitle', False): -            compat_print(info_dict['title']) -        if self.params.get('forceid', False): -            compat_print(info_dict['id']) -        if self.params.get('forceurl', False): -            compat_print(info_dict['url']) -        if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: -            compat_print(info_dict['thumbnail']) -        if self.params.get('forcedescription', False) and 'description' in info_dict: -            compat_print(info_dict['description']) -        if self.params.get('forcefilename', False) and filename is not None: -            compat_print(filename) -        if self.params.get('forceformat', False): -            compat_print(info_dict['format']) - -        # Do nothing else if in simulate mode -        if self.params.get('simulate', False): -            return - -        if filename is None: -            return - -        try: -            dn = os.path.dirname(encodeFilename(filename)) -            if dn != '' and not os.path.exists(dn): -                os.makedirs(dn) -        except (OSError, IOError) as err: -            self.report_error(u'unable to create directory ' + compat_str(err)) -            return - -        if self.params.get('writedescription', False): -            try: -                descfn = filename + u'.description' -                self.report_writedescription(descfn) -                with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: -                    descfile.write(info_dict['description']) -            except (OSError, IOError): -                self.report_error(u'Cannot write description file ' + descfn) -                return - -        if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: -            # subtitles download errors are already managed as troubles in relevant IE -            # that way it will silently go on when used with unsupporting IE -            subtitle = info_dict['subtitles'][0] -            (sub_error, sub_lang, sub) = subtitle -            sub_format = self.params.get('subtitlesformat') -            if sub_error: -                self.report_warning("Some error while getting the subtitles") -            else: -                try: -                    sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format -                    self.report_writesubtitles(sub_filename) -                    with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: -                        subfile.write(sub) -                except (OSError, IOError): -                    self.report_error(u'Cannot write subtitles file ' + descfn) -                    return - -        if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: -            subtitles = info_dict['subtitles'] -            sub_format = self.params.get('subtitlesformat') -            for subtitle in subtitles: -                (sub_error, sub_lang, sub) = subtitle -                if sub_error: -                    self.report_warning("Some error while getting the subtitles") -                else: -                    try: -                        sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format -                        self.report_writesubtitles(sub_filename) -                        with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: -                                subfile.write(sub) -                    except (OSError, IOError): -                        self.report_error(u'Cannot write subtitles file ' + descfn) -                        return - -        if self.params.get('writeinfojson', False): -            infofn = filename + u'.info.json' -            self.report_writeinfojson(infofn) -            try: -                json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) -                write_json_file(json_info_dict, encodeFilename(infofn)) -            except (OSError, IOError): -                self.report_error(u'Cannot write metadata to JSON file ' + infofn) -                return - -        if self.params.get('writethumbnail', False): -            if 'thumbnail' in info_dict: -                thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2] -                if not thumb_format: -                    thumb_format = 'jpg' -                thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format -                self.to_screen(u'[%s] %s: Downloading thumbnail ...' % -                               (info_dict['extractor'], info_dict['id'])) -                uf = compat_urllib_request.urlopen(info_dict['thumbnail']) -                with open(thumb_filename, 'wb') as thumbf: -                    shutil.copyfileobj(uf, thumbf) -                self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % -                               (info_dict['extractor'], info_dict['id'], thumb_filename)) - -        if not self.params.get('skip_download', False): -            if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): -                success = True -            else: -                try: -                    success = self._do_download(filename, info_dict) -                except (OSError, IOError) as err: -                    raise UnavailableVideoError() -                except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: -                    self.report_error(u'unable to download video data: %s' % str(err)) -                    return -                except (ContentTooShortError, ) as err: -                    self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) -                    return - -            if success: -                try: -                    self.post_process(filename, info_dict) -                except (PostProcessingError) as err: -                    self.report_error(u'postprocessing: %s' % str(err)) -                    return - -    def download(self, url_list): -        """Download a given list of URLs.""" -        if len(url_list) > 1 and self.fixed_template(): -            raise SameFileError(self.params['outtmpl']) - -        for url in url_list: -            try: -                #It also downloads the videos -                videos = self.extract_info(url) -            except UnavailableVideoError: -                self.report_error(u'unable to download video') -            except MaxDownloadsReached: -                self.to_screen(u'[info] Maximum number of downloaded files reached.') -                raise - -        return self._download_retcode - -    def post_process(self, filename, ie_info): -        """Run all the postprocessors on the given file.""" -        info = dict(ie_info) -        info['filepath'] = filename -        keep_video = None -        for pp in self._pps: -            try: -                keep_video_wish,new_info = pp.run(info) -                if keep_video_wish is not None: -                    if keep_video_wish: -                        keep_video = keep_video_wish -                    elif keep_video is None: -                        # No clear decision yet, let IE decide -                        keep_video = keep_video_wish -            except PostProcessingError as e: -                self.to_stderr(u'ERROR: ' + e.msg) -        if keep_video is False and not self.params.get('keepvideo', False): -            try: -                self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) -                os.remove(encodeFilename(filename)) -            except (IOError, OSError): -                self.report_warning(u'Unable to remove downloaded video file') -      def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):          self.report_destination(filename)          tmpfilename = self.temp_name(filename) | 
