diff options
author | Tithen-Firion <Tithen-Firion@users.noreply.github.com> | 2017-05-04 11:00:06 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-05-04 11:00:06 +0200 |
commit | c89267d31ad99eb5b1a87cd354de5280a2a087b1 (patch) | |
tree | 8bb3b01cd088d0646089344bddd3d4ff272c0065 /youtube_dl/downloader | |
parent | 7552f96352f35cd877e52fd0770b77ba1856fc62 (diff) | |
parent | 0c265486016b06342fb257966474ce591667aaff (diff) |
Merge branch 'master' into openload-phantomjs-method
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r-- | youtube_dl/downloader/common.py | 34 | ||||
-rw-r--r-- | youtube_dl/downloader/dash.py | 43 | ||||
-rw-r--r-- | youtube_dl/downloader/external.py | 12 | ||||
-rw-r--r-- | youtube_dl/downloader/f4m.py | 33 | ||||
-rw-r--r-- | youtube_dl/downloader/fragment.py | 122 | ||||
-rw-r--r-- | youtube_dl/downloader/hls.py | 34 | ||||
-rw-r--r-- | youtube_dl/downloader/ism.py | 34 |
7 files changed, 182 insertions, 130 deletions
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py index 2c4470a95..5d6621147 100644 --- a/youtube_dl/downloader/common.py +++ b/youtube_dl/downloader/common.py @@ -187,6 +187,9 @@ class FileDownloader(object): return filename[:-len('.part')] return filename + def ytdl_filename(self, filename): + return filename + '.ytdl' + def try_rename(self, old_filename, new_filename): try: if old_filename == new_filename: @@ -327,21 +330,22 @@ class FileDownloader(object): os.path.exists(encodeFilename(filename)) ) - continuedl_and_exists = ( - self.params.get('continuedl', True) and - os.path.isfile(encodeFilename(filename)) and - not self.params.get('nopart', False) - ) - - # Check file already present - if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): - self.report_file_already_downloaded(filename) - self._hook_progress({ - 'filename': filename, - 'status': 'finished', - 'total_bytes': os.path.getsize(encodeFilename(filename)), - }) - return True + if not hasattr(filename, 'write'): + continuedl_and_exists = ( + self.params.get('continuedl', True) and + os.path.isfile(encodeFilename(filename)) and + not self.params.get('nopart', False) + ) + + # Check file already present + if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): + self.report_file_already_downloaded(filename) + self._hook_progress({ + 'filename': filename, + 'status': 'finished', + 'total_bytes': os.path.getsize(encodeFilename(filename)), + }) + return True min_sleep_interval = self.params.get('sleep_interval') if min_sleep_interval: diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index e2ddc369e..7491fdad8 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,13 +1,7 @@ from __future__ import unicode_literals -import os - from .fragment import FragmentFD from ..compat import compat_urllib_error -from ..utils import ( - sanitize_open, - encodeFilename, -) class DashSegmentsFD(FragmentFD): @@ -28,31 +22,24 @@ class DashSegmentsFD(FragmentFD): self._prepare_and_start_frag_download(ctx) - segments_filenames = [] - fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) - def process_segment(segment, tmp_filename, num): - segment_url = segment['url'] - segment_name = 'Frag%d' % num - target_filename = '%s-%s' % (tmp_filename, segment_name) + frag_index = 0 + for i, segment in enumerate(segments): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue # In DASH, the first segment contains necessary headers to # generate a valid MP4 file, so always abort for the first segment - fatal = num == 0 or not skip_unavailable_fragments + fatal = i == 0 or not skip_unavailable_fragments count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, { - 'url': segment_url, - 'http_headers': info_dict.get('http_headers'), - }) + success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False - down, target_sanitized = sanitize_open(target_filename, 'rb') - ctx['dest_stream'].write(down.read()) - down.close() - segments_filenames.append(target_sanitized) + self._append_fragment(ctx, frag_content) break except compat_urllib_error.HTTPError as err: # YouTube may often return 404 HTTP error for a fragment causing the @@ -63,22 +50,14 @@ class DashSegmentsFD(FragmentFD): # HTTP error. count += 1 if count <= fragment_retries: - self.report_retry_fragment(err, segment_name, count, fragment_retries) + self.report_retry_fragment(err, frag_index, count, fragment_retries) if count > fragment_retries: if not fatal: - self.report_skip_fragment(segment_name) - return True + self.report_skip_fragment(frag_index) + continue self.report_error('giving up after %s fragment retries' % fragment_retries) return False - return True - - for i, segment in enumerate(segments): - if not process_segment(segment, ctx['tmpfilename'], i): - return False self._finish_frag_download(ctx) - for segment_file in segments_filenames: - os.remove(encodeFilename(segment_file)) - return True diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index e13cf547d..e78169a0d 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -29,7 +29,17 @@ class ExternalFD(FileDownloader): self.report_destination(filename) tmpfilename = self.temp_name(filename) - retval = self._call_downloader(tmpfilename, info_dict) + try: + retval = self._call_downloader(tmpfilename, info_dict) + except KeyboardInterrupt: + if not info_dict.get('is_live'): + raise + # Live stream downloading cancellation should be considered as + # correct and expected termination thus all postprocessing + # should take place + retval = 0 + self.to_screen('[%s] Interrupted by user' % self.get_basename()) + if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize)) diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 688e086eb..c8fde9a89 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -3,7 +3,6 @@ from __future__ import division, unicode_literals import base64 import io import itertools -import os import time from .fragment import FragmentFD @@ -16,9 +15,7 @@ from ..compat import ( compat_struct_unpack, ) from ..utils import ( - encodeFilename, fix_xml_ampersands, - sanitize_open, xpath_text, ) @@ -366,17 +363,21 @@ class F4mFD(FragmentFD): dest_stream = ctx['dest_stream'] - write_flv_header(dest_stream) - if not live: - write_metadata_tag(dest_stream, metadata) + if ctx['complete_frags_downloaded_bytes'] == 0: + write_flv_header(dest_stream) + if not live: + write_metadata_tag(dest_stream, metadata) base_url_parsed = compat_urllib_parse_urlparse(base_url) self._start_frag_download(ctx) - frags_filenames = [] + frag_index = 0 while fragments_list: seg_i, frag_i = fragments_list.pop(0) + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue name = 'Seg%d-Frag%d' % (seg_i, frag_i) query = [] if base_url_parsed.query: @@ -386,17 +387,10 @@ class F4mFD(FragmentFD): if info_dict.get('extra_param_to_segment_url'): query.append(info_dict['extra_param_to_segment_url']) url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) - frag_filename = '%s-%s' % (ctx['tmpfilename'], name) try: - success = ctx['dl'].download(frag_filename, { - 'url': url_parsed.geturl(), - 'http_headers': info_dict.get('http_headers'), - }) + success, down_data = self._download_fragment(ctx, url_parsed.geturl(), info_dict) if not success: return False - (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') - down_data = down.read() - down.close() reader = FlvReader(down_data) while True: try: @@ -411,12 +405,8 @@ class F4mFD(FragmentFD): break raise if box_type == b'mdat': - dest_stream.write(box_data) + self._append_fragment(ctx, box_data) break - if live: - os.remove(encodeFilename(frag_sanitized)) - else: - frags_filenames.append(frag_sanitized) except (compat_urllib_error.HTTPError, ) as err: if live and (err.code == 404 or err.code == 410): # We didn't keep up with the live window. Continue @@ -436,7 +426,4 @@ class F4mFD(FragmentFD): self._finish_frag_download(ctx) - for frag_file in frags_filenames: - os.remove(encodeFilename(frag_file)) - return True diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py index 56f975266..bccc8ecc1 100644 --- a/youtube_dl/downloader/fragment.py +++ b/youtube_dl/downloader/fragment.py @@ -2,6 +2,7 @@ from __future__ import division, unicode_literals import os import time +import json from .common import FileDownloader from .http import HttpFD @@ -28,15 +29,37 @@ class FragmentFD(FileDownloader): and hlsnative only) skip_unavailable_fragments: Skip unavailable fragments (DASH and hlsnative only) + keep_fragments: Keep downloaded fragments on disk after downloading is + finished + + For each incomplete fragment download youtube-dl keeps on disk a special + bookkeeping file with download state and metadata (in future such files will + be used for any incomplete download handled by youtube-dl). This file is + used to properly handle resuming, check download file consistency and detect + potential errors. The file has a .ytdl extension and represents a standard + JSON file of the following format: + + extractor: + Dictionary of extractor related data. TBD. + + downloader: + Dictionary of downloader related data. May contain following data: + current_fragment: + Dictionary with current (being downloaded) fragment data: + index: 0-based index of current fragment among all fragments + fragment_count: + Total count of fragments + + This feature is experimental and file format may change in future. """ - def report_retry_fragment(self, err, fragment_name, count, retries): + def report_retry_fragment(self, err, frag_index, count, retries): self.to_screen( - '[download] Got server HTTP error: %s. Retrying fragment %s (attempt %d of %s)...' - % (error_to_compat_str(err), fragment_name, count, self.format_retries(retries))) + '[download] Got server HTTP error: %s. Retrying fragment %d (attempt %d of %s)...' + % (error_to_compat_str(err), frag_index, count, self.format_retries(retries))) - def report_skip_fragment(self, fragment_name): - self.to_screen('[download] Skipping fragment %s...' % fragment_name) + def report_skip_fragment(self, frag_index): + self.to_screen('[download] Skipping fragment %d...' % frag_index) def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') @@ -46,6 +69,51 @@ class FragmentFD(FileDownloader): self._prepare_frag_download(ctx) self._start_frag_download(ctx) + @staticmethod + def __do_ytdl_file(ctx): + return not ctx['live'] and not ctx['tmpfilename'] == '-' + + def _read_ytdl_file(self, ctx): + stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'r') + ctx['fragment_index'] = json.loads(stream.read())['downloader']['current_fragment']['index'] + stream.close() + + def _write_ytdl_file(self, ctx): + frag_index_stream, _ = sanitize_open(self.ytdl_filename(ctx['filename']), 'w') + downloader = { + 'current_fragment': { + 'index': ctx['fragment_index'], + }, + } + if ctx.get('fragment_count') is not None: + downloader['fragment_count'] = ctx['fragment_count'] + frag_index_stream.write(json.dumps({'downloader': downloader})) + frag_index_stream.close() + + def _download_fragment(self, ctx, frag_url, info_dict, headers=None): + fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) + success = ctx['dl'].download(fragment_filename, { + 'url': frag_url, + 'http_headers': headers or info_dict.get('http_headers'), + }) + if not success: + return False, None + down, frag_sanitized = sanitize_open(fragment_filename, 'rb') + ctx['fragment_filename_sanitized'] = frag_sanitized + frag_content = down.read() + down.close() + return True, frag_content + + def _append_fragment(self, ctx, frag_content): + try: + ctx['dest_stream'].write(frag_content) + finally: + if self.__do_ytdl_file(ctx): + self._write_ytdl_file(ctx) + if not self.params.get('keep_fragments', False): + os.remove(ctx['fragment_filename_sanitized']) + del ctx['fragment_filename_sanitized'] + def _prepare_frag_download(self, ctx): if 'live' not in ctx: ctx['live'] = False @@ -66,11 +134,36 @@ class FragmentFD(FileDownloader): } ) tmpfilename = self.temp_name(ctx['filename']) - dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb') + open_mode = 'wb' + resume_len = 0 + + # Establish possible resume length + if os.path.isfile(encodeFilename(tmpfilename)): + open_mode = 'ab' + resume_len = os.path.getsize(encodeFilename(tmpfilename)) + + # Should be initialized before ytdl file check + ctx.update({ + 'tmpfilename': tmpfilename, + 'fragment_index': 0, + }) + + if self.__do_ytdl_file(ctx): + if os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))): + self._read_ytdl_file(ctx) + else: + self._write_ytdl_file(ctx) + if ctx['fragment_index'] > 0: + assert resume_len > 0 + + dest_stream, tmpfilename = sanitize_open(tmpfilename, open_mode) + ctx.update({ 'dl': dl, 'dest_stream': dest_stream, 'tmpfilename': tmpfilename, + # Total complete fragments downloaded so far in bytes + 'complete_frags_downloaded_bytes': resume_len, }) def _start_frag_download(self, ctx): @@ -79,9 +172,9 @@ class FragmentFD(FileDownloader): # hook state = { 'status': 'downloading', - 'downloaded_bytes': 0, - 'frag_index': 0, - 'frag_count': total_frags, + 'downloaded_bytes': ctx['complete_frags_downloaded_bytes'], + 'fragment_index': ctx['fragment_index'], + 'fragment_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], } @@ -89,8 +182,6 @@ class FragmentFD(FileDownloader): start = time.time() ctx.update({ 'started': start, - # Total complete fragments downloaded so far in bytes - 'complete_frags_downloaded_bytes': 0, # Amount of fragment's bytes downloaded by the time of the previous # frag progress hook invocation 'prev_frag_downloaded_bytes': 0, @@ -106,11 +197,12 @@ class FragmentFD(FileDownloader): if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / - (state['frag_index'] + 1) * total_frags) + (state['fragment_index'] + 1) * total_frags) state['total_bytes_estimate'] = estimated_size if s['status'] == 'finished': - state['frag_index'] += 1 + state['fragment_index'] += 1 + ctx['fragment_index'] = state['fragment_index'] state['downloaded_bytes'] += frag_total_bytes - ctx['prev_frag_downloaded_bytes'] ctx['complete_frags_downloaded_bytes'] = state['downloaded_bytes'] ctx['prev_frag_downloaded_bytes'] = 0 @@ -132,6 +224,10 @@ class FragmentFD(FileDownloader): def _finish_frag_download(self, ctx): ctx['dest_stream'].close() + if self.__do_ytdl_file(ctx): + ytdl_filename = encodeFilename(self.ytdl_filename(ctx['filename'])) + if os.path.isfile(ytdl_filename): + os.remove(ytdl_filename) elapsed = time.time() - ctx['started'] self.try_rename(ctx['tmpfilename'], ctx['filename']) fsize = os.path.getsize(encodeFilename(ctx['filename'])) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index d0a5f7ba4..0e29c8a2a 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import os.path import re import binascii try: @@ -18,8 +17,6 @@ from ..compat import ( compat_struct_pack, ) from ..utils import ( - encodeFilename, - sanitize_open, parse_m3u8_attributes, update_url_query, ) @@ -103,17 +100,18 @@ class HlsFD(FragmentFD): media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} byte_range = {} - frags_filenames = [] + frag_index = 0 for line in s.splitlines(): line = line.strip() if line: if not line.startswith('#'): + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue frag_url = ( line if re.match(r'^https?://', line) else compat_urlparse.urljoin(man_url, line)) - frag_name = 'Frag%d' % i - frag_filename = '%s-%s' % (ctx['tmpfilename'], frag_name) if extra_query: frag_url = update_url_query(frag_url, extra_query) count = 0 @@ -122,15 +120,10 @@ class HlsFD(FragmentFD): headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end']) while count <= fragment_retries: try: - success = ctx['dl'].download(frag_filename, { - 'url': frag_url, - 'http_headers': headers, - }) + success, frag_content = self._download_fragment( + ctx, frag_url, info_dict, headers) if not success: return False - down, frag_sanitized = sanitize_open(frag_filename, 'rb') - frag_content = down.read() - down.close() break except compat_urllib_error.HTTPError as err: # Unavailable (possibly temporary) fragments may be served. @@ -139,28 +132,29 @@ class HlsFD(FragmentFD): # https://github.com/rg3/youtube-dl/issues/10448). count += 1 if count <= fragment_retries: - self.report_retry_fragment(err, frag_name, count, fragment_retries) + self.report_retry_fragment(err, frag_index, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: i += 1 media_sequence += 1 - self.report_skip_fragment(frag_name) + self.report_skip_fragment(frag_index) continue self.report_error( 'giving up after %s fragment retries' % fragment_retries) return False if decrypt_info['METHOD'] == 'AES-128': iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence) + decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(decrypt_info['URI']).read() frag_content = AES.new( decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content) - ctx['dest_stream'].write(frag_content) - frags_filenames.append(frag_sanitized) + self._append_fragment(ctx, frag_content) # We only download the first fragment during the test if test: break i += 1 media_sequence += 1 elif line.startswith('#EXT-X-KEY'): + decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': if 'IV' in decrypt_info: @@ -170,7 +164,8 @@ class HlsFD(FragmentFD): man_url, decrypt_info['URI']) if extra_query: decrypt_info['URI'] = update_url_query(decrypt_info['URI'], extra_query) - decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read() + if decrypt_url != decrypt_info['URI']: + decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) elif line.startswith('#EXT-X-BYTERANGE'): @@ -183,7 +178,4 @@ class HlsFD(FragmentFD): self._finish_frag_download(ctx) - for frag_file in frags_filenames: - os.remove(encodeFilename(frag_file)) - return True diff --git a/youtube_dl/downloader/ism.py b/youtube_dl/downloader/ism.py index 63a636cb7..5f6f9faef 100644 --- a/youtube_dl/downloader/ism.py +++ b/youtube_dl/downloader/ism.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import os import time import struct import binascii @@ -8,10 +7,6 @@ import io from .fragment import FragmentFD from ..compat import compat_urllib_error -from ..utils import ( - sanitize_open, - encodeFilename, -) u8 = struct.Struct(b'>B') @@ -225,50 +220,39 @@ class IsmFD(FragmentFD): self._prepare_and_start_frag_download(ctx) - segments_filenames = [] - fragment_retries = self.params.get('fragment_retries', 0) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) track_written = False + frag_index = 0 for i, segment in enumerate(segments): - segment_url = segment['url'] - segment_name = 'Frag%d' % i - target_filename = '%s-%s' % (ctx['tmpfilename'], segment_name) + frag_index += 1 + if frag_index <= ctx['fragment_index']: + continue count = 0 while count <= fragment_retries: try: - success = ctx['dl'].download(target_filename, { - 'url': segment_url, - 'http_headers': info_dict.get('http_headers'), - }) + success, frag_content = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False - down, target_sanitized = sanitize_open(target_filename, 'rb') - down_data = down.read() if not track_written: - tfhd_data = extract_box_data(down_data, [b'moof', b'traf', b'tfhd']) + tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] write_piff_header(ctx['dest_stream'], info_dict['_download_params']) track_written = True - ctx['dest_stream'].write(down_data) - down.close() - segments_filenames.append(target_sanitized) + self._append_fragment(ctx, frag_content) break except compat_urllib_error.HTTPError as err: count += 1 if count <= fragment_retries: - self.report_retry_fragment(err, segment_name, count, fragment_retries) + self.report_retry_fragment(err, frag_index, count, fragment_retries) if count > fragment_retries: if skip_unavailable_fragments: - self.report_skip_fragment(segment_name) + self.report_skip_fragment(frag_index) continue self.report_error('giving up after %s fragment retries' % fragment_retries) return False self._finish_frag_download(ctx) - for segment_file in segments_filenames: - os.remove(encodeFilename(segment_file)) - return True |