diff options
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r-- | youtube_dl/downloader/__init__.py | 2 | ||||
-rw-r--r-- | youtube_dl/downloader/dash.py | 66 | ||||
-rw-r--r-- | youtube_dl/downloader/external.py | 40 | ||||
-rw-r--r-- | youtube_dl/downloader/f4m.py | 98 | ||||
-rw-r--r-- | youtube_dl/downloader/fragment.py | 111 | ||||
-rw-r--r-- | youtube_dl/downloader/hls.py | 81 | ||||
-rw-r--r-- | youtube_dl/downloader/http.py | 19 | ||||
-rw-r--r-- | youtube_dl/downloader/rtmp.py | 2 |
8 files changed, 287 insertions, 132 deletions
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py index f110830c4..dccc59212 100644 --- a/youtube_dl/downloader/__init__.py +++ b/youtube_dl/downloader/__init__.py @@ -8,6 +8,7 @@ from .hls import NativeHlsFD from .http import HttpFD from .rtsp import RtspFD from .rtmp import RtmpFD +from .dash import DashSegmentsFD from ..utils import ( determine_protocol, @@ -20,6 +21,7 @@ PROTOCOL_MAP = { 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, + 'http_dash_segments': DashSegmentsFD, } diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py new file mode 100644 index 000000000..8b6fa2753 --- /dev/null +++ b/youtube_dl/downloader/dash.py @@ -0,0 +1,66 @@ +from __future__ import unicode_literals + +import re + +from .common import FileDownloader +from ..compat import compat_urllib_request + + +class DashSegmentsFD(FileDownloader): + """ + Download segments in a DASH manifest + """ + def real_download(self, filename, info_dict): + self.report_destination(filename) + tmpfilename = self.temp_name(filename) + base_url = info_dict['url'] + segment_urls = info_dict['segment_urls'] + + is_test = self.params.get('test', False) + remaining_bytes = self._TEST_FILE_SIZE if is_test else None + byte_counter = 0 + + def append_url_to_file(outf, target_url, target_name, remaining_bytes=None): + self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name)) + req = compat_urllib_request.Request(target_url) + if remaining_bytes is not None: + req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) + + data = self.ydl.urlopen(req).read() + + if remaining_bytes is not None: + data = data[:remaining_bytes] + + outf.write(data) + return len(data) + + def combine_url(base_url, target_url): + if re.match(r'^https?://', target_url): + return target_url + return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) + + with open(tmpfilename, 'wb') as outf: + append_url_to_file( + outf, combine_url(base_url, info_dict['initialization_url']), + 'initialization segment') + for i, segment_url in enumerate(segment_urls): + segment_len = append_url_to_file( + outf, combine_url(base_url, segment_url), + 'segment %d / %d' % (i + 1, len(segment_urls)), + remaining_bytes) + byte_counter += segment_len + if remaining_bytes is not None: + remaining_bytes -= segment_len + if remaining_bytes <= 0: + break + + self.try_rename(tmpfilename, filename) + + self._hook_progress({ + 'downloaded_bytes': byte_counter, + 'total_bytes': byte_counter, + 'filename': filename, + 'status': 'finished', + }) + + return True diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 7ca2d3143..6c310346c 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -45,11 +45,13 @@ class ExternalFD(FileDownloader): def supports(cls, info_dict): return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps') - def _source_address(self, command_option): - source_address = self.params.get('source_address') - if source_address is None: + def _option(self, command_option, param): + param = self.params.get(param) + if param is None: return [] - return [command_option, source_address] + if isinstance(param, bool): + return [command_option] + return [command_option, param] def _configuration_args(self, default=[]): ex_args = self.params.get('external_downloader_args') @@ -77,7 +79,17 @@ class CurlFD(ExternalFD): cmd = [self.exe, '--location', '-o', tmpfilename] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--interface') + cmd += self._option('--interface', 'source_address') + cmd += self._configuration_args() + cmd += ['--', info_dict['url']] + return cmd + + +class AxelFD(ExternalFD): + def _make_cmd(self, tmpfilename, info_dict): + cmd = [self.exe, '-o', tmpfilename] + for key, val in info_dict['http_headers'].items(): + cmd += ['-H', '%s: %s' % (key, val)] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -88,7 +100,9 @@ class WgetFD(ExternalFD): cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies'] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--bind-address') + cmd += self._option('--bind-address', 'source_address') + cmd += self._option('--proxy', 'proxy') + cmd += self._option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd @@ -105,10 +119,19 @@ class Aria2cFD(ExternalFD): cmd += ['--out', os.path.basename(tmpfilename)] for key, val in info_dict['http_headers'].items(): cmd += ['--header', '%s: %s' % (key, val)] - cmd += self._source_address('--interface') + cmd += self._option('--interface', 'source_address') + cmd += self._option('--all-proxy', 'proxy') cmd += ['--', info_dict['url']] return cmd + +class HttpieFD(ExternalFD): + def _make_cmd(self, tmpfilename, info_dict): + cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] + for key, val in info_dict['http_headers'].items(): + cmd += ['%s:%s' % (key, val)] + return cmd + _BY_NAME = dict( (klass.get_basename(), klass) for name, klass in globals().items() @@ -123,5 +146,6 @@ def list_external_downloaders(): def get_external_downloader(external_downloader): """ Given the name of the executable, see whether we support the given downloader . """ - bn = os.path.basename(external_downloader) + # Drop .exe extension on Windows + bn = os.path.splitext(os.path.basename(external_downloader))[0] return _BY_NAME[bn] diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 3cb07e15f..f478fc03c 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -7,8 +7,7 @@ import os import time import xml.etree.ElementTree as etree -from .common import FileDownloader -from .http import HttpFD +from .fragment import FragmentFD from ..compat import ( compat_urlparse, compat_urllib_error, @@ -16,8 +15,6 @@ from ..compat import ( from ..utils import ( struct_pack, struct_unpack, - encodeFilename, - sanitize_open, xpath_text, ) @@ -226,16 +223,13 @@ def _add_ns(prop): return '{http://ns.adobe.com/f4m/1.0}%s' % prop -class HttpQuietDownloader(HttpFD): - def to_screen(self, *args, **kargs): - pass - - -class F4mFD(FileDownloader): +class F4mFD(FragmentFD): """ A downloader for f4m manifests or AdobeHDS. """ + FD_NAME = 'f4m' + def _get_unencrypted_media(self, doc): media = doc.findall(_add_ns('media')) if not media: @@ -288,7 +282,7 @@ class F4mFD(FileDownloader): def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') - self.to_screen('[download] Downloading f4m manifest') + self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME) manifest = self.ydl.urlopen(man_url).read() doc = etree.fromstring(manifest) @@ -320,67 +314,20 @@ class F4mFD(FileDownloader): # For some akamai manifests we'll need to add a query to the fragment url akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) - self.report_destination(filename) - http_dl = HttpQuietDownloader( - self.ydl, - { - 'continuedl': True, - 'quiet': True, - 'noprogress': True, - 'ratelimit': self.params.get('ratelimit', None), - 'test': self.params.get('test', False), - } - ) - tmpfilename = self.temp_name(filename) - (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb') + ctx = { + 'filename': filename, + 'total_frags': total_frags, + } + + self._prepare_frag_download(ctx) + + dest_stream = ctx['dest_stream'] write_flv_header(dest_stream) if not live: write_metadata_tag(dest_stream, metadata) - # This dict stores the download progress, it's updated by the progress - # hook - state = { - 'status': 'downloading', - 'downloaded_bytes': 0, - 'frag_index': 0, - 'frag_count': total_frags, - 'filename': filename, - 'tmpfilename': tmpfilename, - } - start = time.time() - - def frag_progress_hook(s): - if s['status'] not in ('downloading', 'finished'): - return - - frag_total_bytes = s.get('total_bytes', 0) - if s['status'] == 'finished': - state['downloaded_bytes'] += frag_total_bytes - state['frag_index'] += 1 - - estimated_size = ( - (state['downloaded_bytes'] + frag_total_bytes) / - (state['frag_index'] + 1) * total_frags) - time_now = time.time() - state['total_bytes_estimate'] = estimated_size - state['elapsed'] = time_now - start - - if s['status'] == 'finished': - progress = self.calc_percent(state['frag_index'], total_frags) - else: - frag_downloaded_bytes = s['downloaded_bytes'] - frag_progress = self.calc_percent(frag_downloaded_bytes, - frag_total_bytes) - progress = self.calc_percent(state['frag_index'], total_frags) - progress += frag_progress / float(total_frags) - - state['eta'] = self.calc_eta( - start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) - state['speed'] = s.get('speed') - self._hook_progress(state) - - http_dl.add_progress_hook(frag_progress_hook) + self._start_frag_download(ctx) frags_filenames = [] while fragments_list: @@ -391,9 +338,9 @@ class F4mFD(FileDownloader): url += '?' + akamai_pv.strip(';') if info_dict.get('extra_param_to_segment_url'): url += info_dict.get('extra_param_to_segment_url') - frag_filename = '%s-%s' % (tmpfilename, name) + frag_filename = '%s-%s' % (ctx['tmpfilename'], name) try: - success = http_dl.download(frag_filename, {'url': url}) + success = ctx['dl'].download(frag_filename, {'url': url}) if not success: return False (down, frag_sanitized) = sanitize_open(frag_filename, 'rb') @@ -426,20 +373,9 @@ class F4mFD(FileDownloader): msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) self.report_warning(msg) - dest_stream.close() + self._finish_frag_download(ctx) - elapsed = time.time() - start - self.try_rename(tmpfilename, filename) for frag_file in frags_filenames: os.remove(encodeFilename(frag_file)) - fsize = os.path.getsize(encodeFilename(filename)) - self._hook_progress({ - 'downloaded_bytes': fsize, - 'total_bytes': fsize, - 'filename': filename, - 'status': 'finished', - 'elapsed': elapsed, - }) - return True diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py new file mode 100644 index 000000000..5a64b29ee --- /dev/null +++ b/youtube_dl/downloader/fragment.py @@ -0,0 +1,111 @@ +from __future__ import division, unicode_literals + +import os +import time + +from .common import FileDownloader +from .http import HttpFD +from ..utils import ( + encodeFilename, + sanitize_open, +) + + +class HttpQuietDownloader(HttpFD): + def to_screen(self, *args, **kargs): + pass + + +class FragmentFD(FileDownloader): + """ + A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). + """ + + def _prepare_and_start_frag_download(self, ctx): + self._prepare_frag_download(ctx) + self._start_frag_download(ctx) + + def _prepare_frag_download(self, ctx): + self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags'])) + self.report_destination(ctx['filename']) + dl = HttpQuietDownloader( + self.ydl, + { + 'continuedl': True, + 'quiet': True, + 'noprogress': True, + 'ratelimit': self.params.get('ratelimit', None), + 'retries': self.params.get('retries', 0), + 'test': self.params.get('test', False), + } + ) + tmpfilename = self.temp_name(ctx['filename']) + dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb') + ctx.update({ + 'dl': dl, + 'dest_stream': dest_stream, + 'tmpfilename': tmpfilename, + }) + + def _start_frag_download(self, ctx): + total_frags = ctx['total_frags'] + # This dict stores the download progress, it's updated by the progress + # hook + state = { + 'status': 'downloading', + 'downloaded_bytes': 0, + 'frag_index': 0, + 'frag_count': total_frags, + 'filename': ctx['filename'], + 'tmpfilename': ctx['tmpfilename'], + } + start = time.time() + ctx['started'] = start + + def frag_progress_hook(s): + if s['status'] not in ('downloading', 'finished'): + return + + frag_total_bytes = s.get('total_bytes', 0) + if s['status'] == 'finished': + state['downloaded_bytes'] += frag_total_bytes + state['frag_index'] += 1 + + estimated_size = ( + (state['downloaded_bytes'] + frag_total_bytes) / + (state['frag_index'] + 1) * total_frags) + time_now = time.time() + state['total_bytes_estimate'] = estimated_size + state['elapsed'] = time_now - start + + if s['status'] == 'finished': + progress = self.calc_percent(state['frag_index'], total_frags) + else: + frag_downloaded_bytes = s['downloaded_bytes'] + frag_progress = self.calc_percent(frag_downloaded_bytes, + frag_total_bytes) + progress = self.calc_percent(state['frag_index'], total_frags) + progress += frag_progress / float(total_frags) + + state['eta'] = self.calc_eta( + start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes) + state['speed'] = s.get('speed') + self._hook_progress(state) + + ctx['dl'].add_progress_hook(frag_progress_hook) + + return start + + def _finish_frag_download(self, ctx): + ctx['dest_stream'].close() + elapsed = time.time() - ctx['started'] + self.try_rename(ctx['tmpfilename'], ctx['filename']) + fsize = os.path.getsize(encodeFilename(ctx['filename'])) + + self._hook_progress({ + 'downloaded_bytes': fsize, + 'total_bytes': fsize, + 'filename': ctx['filename'], + 'status': 'finished', + 'elapsed': elapsed, + }) diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index 8be4f4249..2b6c3370f 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -4,12 +4,11 @@ import os import re import subprocess -from ..postprocessor.ffmpeg import FFmpegPostProcessor from .common import FileDownloader -from ..compat import ( - compat_urlparse, - compat_urllib_request, -) +from .fragment import FragmentFD + +from ..compat import compat_urlparse +from ..postprocessor.ffmpeg import FFmpegPostProcessor from ..utils import ( encodeArgument, encodeFilename, @@ -33,6 +32,8 @@ class HlsFD(FileDownloader): for opt in (ffpp.executable, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')] args.append(encodeFilename(tmpfilename, True)) + self._debug_cmd(args) + retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) @@ -51,54 +52,50 @@ class HlsFD(FileDownloader): return False -class NativeHlsFD(FileDownloader): +class NativeHlsFD(FragmentFD): """ A more limited implementation that does not require ffmpeg """ + FD_NAME = 'hlsnative' + def real_download(self, filename, info_dict): - url = info_dict['url'] - self.report_destination(filename) - tmpfilename = self.temp_name(filename) + man_url = info_dict['url'] + self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) + manifest = self.ydl.urlopen(man_url).read() - self.to_screen( - '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id']) - data = self.ydl.urlopen(url).read() - s = data.decode('utf-8', 'ignore') - segment_urls = [] + s = manifest.decode('utf-8', 'ignore') + fragment_urls = [] for line in s.splitlines(): line = line.strip() if line and not line.startswith('#'): segment_url = ( line if re.match(r'^https?://', line) - else compat_urlparse.urljoin(url, line)) - segment_urls.append(segment_url) - - is_test = self.params.get('test', False) - remaining_bytes = self._TEST_FILE_SIZE if is_test else None - byte_counter = 0 - with open(tmpfilename, 'wb') as outf: - for i, segurl in enumerate(segment_urls): - self.to_screen( - '[hlsnative] %s: Downloading segment %d / %d' % - (info_dict['id'], i + 1, len(segment_urls))) - seg_req = compat_urllib_request.Request(segurl) - if remaining_bytes is not None: - seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1)) - - segment = self.ydl.urlopen(seg_req).read() - if remaining_bytes is not None: - segment = segment[:remaining_bytes] - remaining_bytes -= len(segment) - outf.write(segment) - byte_counter += len(segment) - if remaining_bytes is not None and remaining_bytes <= 0: + else compat_urlparse.urljoin(man_url, line)) + fragment_urls.append(segment_url) + # We only download the first fragment during the test + if self.params.get('test', False): break - self._hook_progress({ - 'downloaded_bytes': byte_counter, - 'total_bytes': byte_counter, + ctx = { 'filename': filename, - 'status': 'finished', - }) - self.try_rename(tmpfilename, filename) + 'total_frags': len(fragment_urls), + } + + self._prepare_and_start_frag_download(ctx) + + frags_filenames = [] + for i, frag_url in enumerate(fragment_urls): + frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i) + success = ctx['dl'].download(frag_filename, {'url': frag_url}) + if not success: + return False + with open(frag_filename, 'rb') as down: + ctx['dest_stream'].write(down.read()) + frags_filenames.append(frag_filename) + + self._finish_frag_download(ctx) + + for frag_file in frags_filenames: + os.remove(frag_file) + return True diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py index b7f144af9..a29f5cf31 100644 --- a/youtube_dl/downloader/http.py +++ b/youtube_dl/downloader/http.py @@ -4,6 +4,7 @@ import errno import os import socket import time +import re from .common import FileDownloader from ..compat import ( @@ -57,6 +58,24 @@ class HttpFD(FileDownloader): # Establish connection try: data = self.ydl.urlopen(request) + # When trying to resume, Content-Range HTTP header of response has to be checked + # to match the value of requested Range HTTP header. This is due to a webservers + # that don't support resuming and serve a whole file with no Content-Range + # set in response despite of requested Range (see + # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799) + if resume_len > 0: + content_range = data.headers.get('Content-Range') + if content_range: + content_range_m = re.search(r'bytes (\d+)-', content_range) + # Content-Range is present and matches requested Range, resume is possible + if content_range_m and resume_len == int(content_range_m.group(1)): + break + # Content-Range is either not present or invalid. Assuming remote webserver is + # trying to send the whole file, resume is not possible, so wiping the local file + # and performing entire redownload + self.report_unable_to_resume() + resume_len = 0 + open_mode = 'wb' break except (compat_urllib_error.HTTPError, ) as err: if (err.code < 500 or err.code >= 600) and err.code != 416: diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py index 6865b5e2f..7d19bb808 100644 --- a/youtube_dl/downloader/rtmp.py +++ b/youtube_dl/downloader/rtmp.py @@ -131,7 +131,7 @@ class RtmpFD(FileDownloader): if play_path is not None: basic_args += ['--playpath', play_path] if tc_url is not None: - basic_args += ['--tcUrl', url] + basic_args += ['--tcUrl', tc_url] if test: basic_args += ['--stop', '1'] if flash_version is not None: |