diff options
Diffstat (limited to 'youtube_dl/downloader/fragment.py')
| -rw-r--r-- | youtube_dl/downloader/fragment.py | 122 | 
1 files changed, 109 insertions, 13 deletions
| diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 56f975266..bccc8ecc1 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -2,6 +2,7 @@ from __future__ import division, unicode_literals  import os  import time +import json  from .common import FileDownloader  from .http import HttpFD @@ -28,15 +29,37 @@ class FragmentFD(FileDownloader):                          and hlsnative only)      skip_unavailable_fragments:                          Skip unavailable fragments (DASH and hlsnative only) +    keep_fragments:     Keep downloaded fragments on disk after downloading is +                        finished + +    For each incomplete fragment download youtube-dl keeps on disk a special +    bookkeeping file with download state and metadata (in future such files will +    be used for any incomplete download handled by youtube-dl). This file is +    used to properly handle resuming, check download file consistency and detect +    potential errors. The file has a .ytdl extension and represents a standard +    JSON file of the following format: + +    extractor: +        Dictionary of extractor related data. TBD. + +    downloader: +        Dictionary of downloader related data. May contain following data: +            current_fragment: +                Dictionary with current (being downloaded) fragment data: +                index:  0-based index of current fragment among all fragments +            fragment_count: +                Total count of fragments + +    This feature is experimental and file format may change in future.      """ -    def report_retry_fragment(self, err, fragment_name, count, retries): +    def report_retry_fragment(self, err, frag_index, count, retries):          self.to_screen( -            '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' -            % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries))) +            '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' +            % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) -    def report_skip_fragment(self, fragment_name): -        self.to_screen('[download] Skipping fragment %s...' % fragment_name) +    def report_skip_fragment(self, frag_index): +        self.to_screen('[download] Skipping fragment %d...' % frag_index)      def _prepare_url(self, info_dict, url):          headers = info_dict.get('http_headers') @@ -46,6 +69,51 @@ class FragmentFD(FileDownloader):          self._prepare_frag_download(ctx)          self._start_frag_download(ctx) +    @staticmethod +    def __do_ytdl_file(ctx): +        return not ctx['live'] and not ctx['tmpfilename'] == '-' + +    def _read_ytdl_file(self, ctx): +        stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') +        ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] +        stream.close() + +    def _write_ytdl_file(self, ctx): +        frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') +        downloader = { +            'current_fragment': { +                'index': ctx['fragment_index'], +            }, +        } +        if ctx.get('fragment_count') is not None: +            downloader['fragment_count'] = ctx['fragment_count'] +        frag_index_stream.write(json.dumps({'downloader': downloader})) +        frag_index_stream.close() + +    def _download_fragment(self, ctx, frag_url, info_dict, headers=None): +        fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) +        success = ctx['dl'].download(fragment_filename, { +            'url': frag_url, +            'http_headers': headers or info_dict.get('http_headers'), +        }) +        if not success: +            return False, None +        down, frag_sanitized = sanitize_open(fragment_filename, 'rb') +        ctx['fragment_filename_sanitized'] = frag_sanitized +        frag_content = down.read() +        down.close() +        return True, frag_content + +    def _append_fragment(self, ctx, frag_content): +        try: +            ctx['dest_stream'].write(frag_content) +        finally: +            if self.__do_ytdl_file(ctx): +                self._write_ytdl_file(ctx) +            if not self.params.get('keep_fragments', False): +                os.remove(ctx['fragment_filename_sanitized']) +            del ctx['fragment_filename_sanitized'] +      def _prepare_frag_download(self, ctx):          if 'live' not in ctx:              ctx['live'] = False @@ -66,11 +134,36 @@ class FragmentFD(FileDownloader):              }          )          tmpfilename = self.temp_name(ctx['filename']) -        dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb') +        open_mode = 'wb' +        resume_len = 0 + +        # Establish possible resume length +        if os.path.isfile(encodeFilename(tmpfilename)): +            open_mode = 'ab' +            resume_len = os.path.getsize(encodeFilename(tmpfilename)) + +        # Should be initialized before ytdl file check +        ctx.update({ +            'tmpfilename': tmpfilename, +            'fragment_index': 0, +        }) + +        if self.__do_ytdl_file(ctx): +            if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): +                self._read_ytdl_file(ctx) +            else: +                self._write_ytdl_file(ctx) +            if ctx['fragment_index'] > 0: +                assert resume_len > 0 + +        dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) +          ctx.update({              'dl': dl,              'dest_stream': dest_stream,              'tmpfilename': tmpfilename, +            # Total complete fragments downloaded so far in bytes +            'complete_frags_downloaded_bytes': resume_len,          })      def _start_frag_download(self, ctx): @@ -79,9 +172,9 @@ class FragmentFD(FileDownloader):          # hook          state = {              'status': 'downloading', -            'downloaded_bytes': 0, -            'frag_index': 0, -            'frag_count': total_frags, +            'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], +            'fragment_index': ctx['fragment_index'], +            'fragment_count': total_frags,              'filename': ctx['filename'],              'tmpfilename': ctx['tmpfilename'],          } @@ -89,8 +182,6 @@ class FragmentFD(FileDownloader):          start = time.time()          ctx.update({              'started': start, -            # Total complete fragments downloaded so far in bytes -            'complete_frags_downloaded_bytes': 0,              # Amount of fragment's bytes downloaded by the time of the previous              # frag progress hook invocation              'prev_frag_downloaded_bytes': 0, @@ -106,11 +197,12 @@ class FragmentFD(FileDownloader):              if not ctx['live']:                  estimated_size = (                      (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / -                    (state['frag_index'] + 1) * total_frags) +                    (state['fragment_index'] + 1) * total_frags)                  state['total_bytes_estimate'] = estimated_size              if s['status'] == 'finished': -                state['frag_index'] += 1 +                state['fragment_index'] += 1 +                ctx['fragment_index'] = state['fragment_index']                  state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes']                  ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes']                  ctx['prev_frag_downloaded_bytes'] = 0 @@ -132,6 +224,10 @@ class FragmentFD(FileDownloader):      def _finish_frag_download(self, ctx):          ctx['dest_stream'].close() +        if self.__do_ytdl_file(ctx): +            ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) +            if os.path.isfile(ytdl_filename): +                os.remove(ytdl_filename)          elapsed = time.time() - ctx['started']          self.try_rename(ctx['tmpfilename'], ctx['filename'])          fsize = os.path.getsize(encodeFilename(ctx['filename'])) | 
