aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/downloader
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r--youtube_dl/downloader/__init__.py11
-rw-r--r--youtube_dl/downloader/common.py118
-rw-r--r--youtube_dl/downloader/dash.py66
-rw-r--r--youtube_dl/downloader/external.py79
-rw-r--r--youtube_dl/downloader/f4m.py202
-rw-r--r--youtube_dl/downloader/fragment.py111
-rw-r--r--youtube_dl/downloader/hls.py107
-rw-r--r--youtube_dl/downloader/http.py57
-rw-r--r--youtube_dl/downloader/rtmp.py48
-rw-r--r--youtube_dl/downloader/rtsp.py (renamed from youtube_dl/downloader/mplayer.py)20
10 files changed, 546 insertions, 273 deletions
diff --git a/youtube_dl/downloader/__init__.py b/youtube_dl/downloader/__init__.py
index eff1122c5..dccc59212 100644
--- a/youtube_dl/downloader/__init__.py
+++ b/youtube_dl/downloader/__init__.py
@@ -6,8 +6,9 @@ from .f4m import F4mFD
from .hls import HlsFD
from .hls import NativeHlsFD
from .http import HttpFD
-from .mplayer import MplayerFD
+from .rtsp import RtspFD
from .rtmp import RtmpFD
+from .dash import DashSegmentsFD
from ..utils import (
determine_protocol,
@@ -17,9 +18,10 @@ PROTOCOL_MAP = {
'rtmp': RtmpFD,
'm3u8_native': NativeHlsFD,
'm3u8': HlsFD,
- 'mms': MplayerFD,
- 'rtsp': MplayerFD,
+ 'mms': RtspFD,
+ 'rtsp': RtspFD,
'f4m': F4mFD,
+ 'http_dash_segments': DashSegmentsFD,
}
@@ -34,6 +36,9 @@ def get_suitable_downloader(info_dict, params={}):
if ed.supports(info_dict):
return ed
+ if protocol == 'm3u8' and params.get('hls_prefer_native'):
+ return NativeHlsFD
+
return PROTOCOL_MAP.get(protocol, HttpFD)
diff --git a/youtube_dl/downloader/common.py b/youtube_dl/downloader/common.py
index 7bb3a948d..b8bf8daf8 100644
--- a/youtube_dl/downloader/common.py
+++ b/youtube_dl/downloader/common.py
@@ -1,4 +1,4 @@
-from __future__ import unicode_literals
+from __future__ import division, unicode_literals
import os
import re
@@ -8,6 +8,7 @@ import time
from ..compat import compat_str
from ..utils import (
encodeFilename,
+ decodeArgument,
format_bytes,
timeconvert,
)
@@ -41,7 +42,9 @@ class FileDownloader(object):
min_filesize: Skip files smaller than this size
max_filesize: Skip files larger than this size
xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
- (experimenatal)
+ (experimental)
+ external_downloader_args: A list of additional command-line arguments for the
+ external downloader.
Subclasses of this one must re-define the real_download method.
"""
@@ -54,6 +57,7 @@ class FileDownloader(object):
self.ydl = ydl
self._progress_hooks = []
self.params = params
+ self.add_progress_hook(self.report_progress)
@staticmethod
def format_seconds(seconds):
@@ -201,7 +205,7 @@ class FileDownloader(object):
return
try:
os.utime(filename, (time.time(), filetime))
- except:
+ except Exception:
pass
return filetime
@@ -226,42 +230,64 @@ class FileDownloader(object):
self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
self.to_console_title('youtube-dl ' + msg)
- def report_progress(self, percent, data_len_str, speed, eta):
- """Report download progress."""
- if self.params.get('noprogress', False):
+ def report_progress(self, s):
+ if s['status'] == 'finished':
+ if self.params.get('noprogress', False):
+ self.to_screen('[download] Download completed')
+ else:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ if s.get('elapsed') is not None:
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
+ else:
+ msg_template = '100%% of %(_total_bytes_str)s'
+ self._report_progress_status(
+ msg_template % s, is_last_line=True)
+
+ if self.params.get('noprogress'):
return
- if eta is not None:
- eta_str = self.format_eta(eta)
- else:
- eta_str = 'Unknown ETA'
- if percent is not None:
- percent_str = self.format_percent(percent)
+
+ if s['status'] != 'downloading':
+ return
+
+ if s.get('eta') is not None:
+ s['_eta_str'] = self.format_eta(s['eta'])
else:
- percent_str = 'Unknown %'
- speed_str = self.format_speed(speed)
+ s['_eta_str'] = 'Unknown ETA'
- msg = ('%s of %s at %s ETA %s' %
- (percent_str, data_len_str, speed_str, eta_str))
- self._report_progress_status(msg)
+ if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
+ elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
+ s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
+ else:
+ if s.get('downloaded_bytes') == 0:
+ s['_percent_str'] = self.format_percent(0)
+ else:
+ s['_percent_str'] = 'Unknown %'
- def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
- if self.params.get('noprogress', False):
- return
- downloaded_str = format_bytes(downloaded_data_len)
- speed_str = self.format_speed(speed)
- elapsed_str = FileDownloader.format_seconds(elapsed)
- msg = '%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
- self._report_progress_status(msg)
-
- def report_finish(self, data_len_str, tot_time):
- """Report download finished."""
- if self.params.get('noprogress', False):
- self.to_screen('[download] Download completed')
+ if s.get('speed') is not None:
+ s['_speed_str'] = self.format_speed(s['speed'])
+ else:
+ s['_speed_str'] = 'Unknown speed'
+
+ if s.get('total_bytes') is not None:
+ s['_total_bytes_str'] = format_bytes(s['total_bytes'])
+ msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
+ elif s.get('total_bytes_estimate') is not None:
+ s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
+ msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
else:
- self._report_progress_status(
- ('100%% of %s in %s' %
- (data_len_str, self.format_seconds(tot_time))),
- is_last_line=True)
+ if s.get('downloaded_bytes') is not None:
+ s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
+ if s.get('elapsed'):
+ s['_elapsed_str'] = self.format_seconds(s['elapsed'])
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
+ else:
+ msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
+ else:
+ msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
+
+ self._report_progress_status(msg_template % s)
def report_resuming_byte(self, resume_len):
"""Report attempt to resume at given byte."""
@@ -288,18 +314,18 @@ class FileDownloader(object):
"""
nooverwrites_and_exists = (
- self.params.get('nooverwrites', False)
- and os.path.exists(encodeFilename(filename))
+ self.params.get('nooverwrites', False) and
+ os.path.exists(encodeFilename(filename))
)
continuedl_and_exists = (
- self.params.get('continuedl', False)
- and os.path.isfile(encodeFilename(filename))
- and not self.params.get('nopart', False)
+ self.params.get('continuedl', True) and
+ os.path.isfile(encodeFilename(filename)) and
+ not self.params.get('nopart', False)
)
# Check file already present
- if filename != '-' and nooverwrites_and_exists or continuedl_and_exists:
+ if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
self.report_file_already_downloaded(filename)
self._hook_progress({
'filename': filename,
@@ -328,19 +354,15 @@ class FileDownloader(object):
# this interface
self._progress_hooks.append(ph)
- def _debug_cmd(self, args, subprocess_encoding, exe=None):
+ def _debug_cmd(self, args, exe=None):
if not self.params.get('verbose', False):
return
+ str_args = [decodeArgument(a) for a in args]
+
if exe is None:
- exe = os.path.basename(args[0])
+ exe = os.path.basename(str_args[0])
- if subprocess_encoding:
- str_args = [
- a.decode(subprocess_encoding) if isinstance(a, bytes) else a
- for a in args]
- else:
- str_args = args
try:
import pipes
shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
new file mode 100644
index 000000000..535f2a7fc
--- /dev/null
+++ b/youtube_dl/downloader/dash.py
@@ -0,0 +1,66 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import FileDownloader
+from ..utils import sanitized_Request
+
+
+class DashSegmentsFD(FileDownloader):
+ """
+ Download segments in a DASH manifest
+ """
+ def real_download(self, filename, info_dict):
+ self.report_destination(filename)
+ tmpfilename = self.temp_name(filename)
+ base_url = info_dict['url']
+ segment_urls = info_dict['segment_urls']
+
+ is_test = self.params.get('test', False)
+ remaining_bytes = self._TEST_FILE_SIZE if is_test else None
+ byte_counter = 0
+
+ def append_url_to_file(outf, target_url, target_name, remaining_bytes=None):
+ self.to_screen('[DashSegments] %s: Downloading %s' % (info_dict['id'], target_name))
+ req = sanitized_Request(target_url)
+ if remaining_bytes is not None:
+ req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
+
+ data = self.ydl.urlopen(req).read()
+
+ if remaining_bytes is not None:
+ data = data[:remaining_bytes]
+
+ outf.write(data)
+ return len(data)
+
+ def combine_url(base_url, target_url):
+ if re.match(r'^https?://', target_url):
+ return target_url
+ return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url)
+
+ with open(tmpfilename, 'wb') as outf:
+ append_url_to_file(
+ outf, combine_url(base_url, info_dict['initialization_url']),
+ 'initialization segment')
+ for i, segment_url in enumerate(segment_urls):
+ segment_len = append_url_to_file(
+ outf, combine_url(base_url, segment_url),
+ 'segment %d / %d' % (i + 1, len(segment_urls)),
+ remaining_bytes)
+ byte_counter += segment_len
+ if remaining_bytes is not None:
+ remaining_bytes -= segment_len
+ if remaining_bytes <= 0:
+ break
+
+ self.try_rename(tmpfilename, filename)
+
+ self._hook_progress({
+ 'downloaded_bytes': byte_counter,
+ 'total_bytes': byte_counter,
+ 'filename': filename,
+ 'status': 'finished',
+ })
+
+ return True
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index ff031d2e0..2bc011266 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -2,11 +2,15 @@ from __future__ import unicode_literals
import os.path
import subprocess
-import sys
from .common import FileDownloader
from ..utils import (
+ cli_option,
+ cli_valueless_option,
+ cli_bool_option,
+ cli_configuration_args,
encodeFilename,
+ encodeArgument,
)
@@ -45,25 +49,23 @@ class ExternalFD(FileDownloader):
def supports(cls, info_dict):
return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
- def _source_address(self, command_option):
- source_address = self.params.get('source_address')
- if source_address is None:
- return []
- return [command_option, source_address]
+ def _option(self, command_option, param):
+ return cli_option(self.params, command_option, param)
+
+ def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
+ return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
+
+ def _valueless_option(self, command_option, param, expected_value=True):
+ return cli_valueless_option(self.params, command_option, param, expected_value)
+
+ def _configuration_args(self, default=[]):
+ return cli_configuration_args(self.params, 'external_downloader_args', default)
def _call_downloader(self, tmpfilename, info_dict):
""" Either overwrite this or implement _make_cmd """
- cmd = self._make_cmd(tmpfilename, info_dict)
-
- if sys.platform == 'win32' and sys.version_info < (3, 0):
- # Windows subprocess module does not actually support Unicode
- # on Python 2.x
- # See http://stackoverflow.com/a/9951851/35070
- subprocess_encoding = sys.getfilesystemencoding()
- cmd = [a.encode(subprocess_encoding, 'ignore') for a in cmd]
- else:
- subprocess_encoding = None
- self._debug_cmd(cmd, subprocess_encoding)
+ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
+
+ self._debug_cmd(cmd)
p = subprocess.Popen(
cmd, stderr=subprocess.PIPE)
@@ -75,10 +77,23 @@ class ExternalFD(FileDownloader):
class CurlFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [self.exe, '-o', tmpfilename]
+ cmd = [self.exe, '--location', '-o', tmpfilename]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._source_address('--interface')
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--proxy', 'proxy')
+ cmd += self._valueless_option('--insecure', 'nocheckcertificate')
+ cmd += self._configuration_args()
+ cmd += ['--', info_dict['url']]
+ return cmd
+
+
+class AxelFD(ExternalFD):
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = [self.exe, '-o', tmpfilename]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['-H', '%s: %s' % (key, val)]
+ cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
@@ -88,26 +103,39 @@ class WgetFD(ExternalFD):
cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._source_address('--bind-address')
+ cmd += self._option('--bind-address', 'source_address')
+ cmd += self._option('--proxy', 'proxy')
+ cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
+ cmd += self._configuration_args()
cmd += ['--', info_dict['url']]
return cmd
class Aria2cFD(ExternalFD):
def _make_cmd(self, tmpfilename, info_dict):
- cmd = [
- self.exe, '-c',
- '--min-split-size', '1M', '--max-connection-per-server', '4']
+ cmd = [self.exe, '-c']
+ cmd += self._configuration_args([
+ '--min-split-size', '1M', '--max-connection-per-server', '4'])
dn = os.path.dirname(tmpfilename)
if dn:
cmd += ['--dir', dn]
cmd += ['--out', os.path.basename(tmpfilename)]
for key, val in info_dict['http_headers'].items():
cmd += ['--header', '%s: %s' % (key, val)]
- cmd += self._source_address('--interface')
+ cmd += self._option('--interface', 'source_address')
+ cmd += self._option('--all-proxy', 'proxy')
+ cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
cmd += ['--', info_dict['url']]
return cmd
+
+class HttpieFD(ExternalFD):
+ def _make_cmd(self, tmpfilename, info_dict):
+ cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
+ for key, val in info_dict['http_headers'].items():
+ cmd += ['%s:%s' % (key, val)]
+ return cmd
+
_BY_NAME = dict(
(klass.get_basename(), klass)
for name, klass in globals().items()
@@ -122,5 +150,6 @@ def list_external_downloaders():
def get_external_downloader(external_downloader):
""" Given the name of the executable, see whether we support the given
downloader . """
- bn = os.path.basename(external_downloader)
+ # Drop .exe extension on Windows
+ bn = os.path.splitext(os.path.basename(external_downloader))[0]
return _BY_NAME[bn]
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 0e7a1c200..6170cc155 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -1,23 +1,23 @@
-from __future__ import unicode_literals
+from __future__ import division, unicode_literals
import base64
import io
import itertools
import os
import time
-import xml.etree.ElementTree as etree
-from .common import FileDownloader
-from .http import HttpFD
+from .fragment import FragmentFD
from ..compat import (
+ compat_etree_fromstring,
compat_urlparse,
+ compat_urllib_error,
+ compat_urllib_parse_urlparse,
)
from ..utils import (
- struct_pack,
- struct_unpack,
- format_bytes,
encodeFilename,
sanitize_open,
+ struct_pack,
+ struct_unpack,
xpath_text,
)
@@ -122,7 +122,8 @@ class FlvReader(io.BytesIO):
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
- self.read(1)
+ flags = self.read_unsigned_char()
+ live = flags & 0x20 != 0
# time scale
self.read_unsigned_int()
# CurrentMediaTime
@@ -161,6 +162,7 @@ class FlvReader(io.BytesIO):
return {
'segments': segments,
'fragments': fragments,
+ 'live': live,
}
def read_bootstrap_info(self):
@@ -183,6 +185,10 @@ def build_fragments_list(boot_info):
for segment, fragments_count in segment_run_table['segment_run']:
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
+
+ if boot_info['live']:
+ res = res[-2:]
+
return res
@@ -220,16 +226,13 @@ def _add_ns(prop):
return '{http://ns.adobe.com/f4m/1.0}%s' % prop
-class HttpQuietDownloader(HttpFD):
- def to_screen(self, *args, **kargs):
- pass
-
-
-class F4mFD(FileDownloader):
+class F4mFD(FragmentFD):
"""
A downloader for f4m manifests or AdobeHDS.
"""
+ FD_NAME = 'f4m'
+
def _get_unencrypted_media(self, doc):
media = doc.findall(_add_ns('media'))
if not media:
@@ -247,24 +250,47 @@ class F4mFD(FileDownloader):
self.report_error('Unsupported DRM')
return media
+ def _get_bootstrap_from_url(self, bootstrap_url):
+ bootstrap = self.ydl.urlopen(bootstrap_url).read()
+ return read_bootstrap_info(bootstrap)
+
+ def _update_live_fragments(self, bootstrap_url, latest_fragment):
+ fragments_list = []
+ retries = 30
+ while (not fragments_list) and (retries > 0):
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ fragments_list = build_fragments_list(boot_info)
+ fragments_list = [f for f in fragments_list if f[1] > latest_fragment]
+ if not fragments_list:
+ # Retry after a while
+ time.sleep(5.0)
+ retries -= 1
+
+ if not fragments_list:
+ self.report_error('Failed to update fragments')
+
+ return fragments_list
+
+ def _parse_bootstrap_node(self, node, base_url):
+ if node.text is None:
+ bootstrap_url = compat_urlparse.urljoin(
+ base_url, node.attrib['url'])
+ boot_info = self._get_bootstrap_from_url(bootstrap_url)
+ else:
+ bootstrap_url = None
+ bootstrap = base64.b64decode(node.text.encode('ascii'))
+ boot_info = read_bootstrap_info(bootstrap)
+ return (boot_info, bootstrap_url)
+
def real_download(self, filename, info_dict):
man_url = info_dict['url']
requested_bitrate = info_dict.get('tbr')
- self.to_screen('[download] Downloading f4m manifest')
- manifest = self.ydl.urlopen(man_url).read()
- self.report_destination(filename)
- http_dl = HttpQuietDownloader(
- self.ydl,
- {
- 'continuedl': True,
- 'quiet': True,
- 'noprogress': True,
- 'ratelimit': self.params.get('ratelimit', None),
- 'test': self.params.get('test', False),
- }
- )
-
- doc = etree.fromstring(manifest)
+ self.to_screen('[%s] Downloading f4m manifest' % self.FD_NAME)
+ urlh = self.ydl.urlopen(man_url)
+ man_url = urlh.geturl()
+ manifest = urlh.read()
+
+ doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
for f in self._get_unencrypted_media(doc)]
if requested_bitrate is None:
@@ -277,18 +303,13 @@ class F4mFD(FileDownloader):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- if bootstrap_node.text is None:
- bootstrap_url = compat_urlparse.urljoin(
- base_url, bootstrap_node.attrib['url'])
- bootstrap = self.ydl.urlopen(bootstrap_url).read()
- else:
- bootstrap = base64.b64decode(bootstrap_node.text)
+ boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+ live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
- metadata = base64.b64decode(metadata_node.text)
+ metadata = base64.b64decode(metadata_node.text.encode('ascii'))
else:
metadata = None
- boot_info = read_bootstrap_info(bootstrap)
fragments_list = build_fragments_list(boot_info)
if self.params.get('test', False):
@@ -298,74 +319,73 @@ class F4mFD(FileDownloader):
# For some akamai manifests we'll need to add a query to the fragment url
akamai_pv = xpath_text(doc, _add_ns('pv-2.0'))
- tmpfilename = self.temp_name(filename)
- (dest_stream, tmpfilename) = sanitize_open(tmpfilename, 'wb')
+ ctx = {
+ 'filename': filename,
+ 'total_frags': total_frags,
+ }
+
+ self._prepare_frag_download(ctx)
+
+ dest_stream = ctx['dest_stream']
+
write_flv_header(dest_stream)
- write_metadata_tag(dest_stream, metadata)
+ if not live:
+ write_metadata_tag(dest_stream, metadata)
- # This dict stores the download progress, it's updated by the progress
- # hook
- state = {
- 'downloaded_bytes': 0,
- 'frag_counter': 0,
- }
- start = time.time()
-
- def frag_progress_hook(status):
- frag_total_bytes = status.get('total_bytes', 0)
- estimated_size = (state['downloaded_bytes'] +
- (total_frags - state['frag_counter']) * frag_total_bytes)
- if status['status'] == 'finished':
- state['downloaded_bytes'] += frag_total_bytes
- state['frag_counter'] += 1
- progress = self.calc_percent(state['frag_counter'], total_frags)
- byte_counter = state['downloaded_bytes']
- else:
- frag_downloaded_bytes = status['downloaded_bytes']
- byte_counter = state['downloaded_bytes'] + frag_downloaded_bytes
- frag_progress = self.calc_percent(frag_downloaded_bytes,
- frag_total_bytes)
- progress = self.calc_percent(state['frag_counter'], total_frags)
- progress += frag_progress / float(total_frags)
-
- eta = self.calc_eta(start, time.time(), estimated_size, byte_counter)
- self.report_progress(progress, format_bytes(estimated_size),
- status.get('speed'), eta)
- http_dl.add_progress_hook(frag_progress_hook)
+ base_url_parsed = compat_urllib_parse_urlparse(base_url)
+
+ self._start_frag_download(ctx)
frags_filenames = []
- for (seg_i, frag_i) in fragments_list:
+ while fragments_list:
+ seg_i, frag_i = fragments_list.pop(0)
name = 'Seg%d-Frag%d' % (seg_i, frag_i)
- url = base_url + name
+ query = []
+ if base_url_parsed.query:
+ query.append(base_url_parsed.query)
if akamai_pv:
- url += '?' + akamai_pv.strip(';')
- frag_filename = '%s-%s' % (tmpfilename, name)
- success = http_dl.download(frag_filename, {'url': url})
- if not success:
- return False
- with open(frag_filename, 'rb') as down:
+ query.append(akamai_pv.strip(';'))
+ if info_dict.get('extra_param_to_segment_url'):
+ query.append(info_dict['extra_param_to_segment_url'])
+ url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query))
+ frag_filename = '%s-%s' % (ctx['tmpfilename'], name)
+ try:
+ success = ctx['dl'].download(frag_filename, {'url': url_parsed.geturl()})
+ if not success:
+ return False
+ (down, frag_sanitized) = sanitize_open(frag_filename, 'rb')
down_data = down.read()
+ down.close()
reader = FlvReader(down_data)
while True:
_, box_type, box_data = reader.read_box_info()
if box_type == b'mdat':
dest_stream.write(box_data)
break
- frags_filenames.append(frag_filename)
+ if live:
+ os.remove(encodeFilename(frag_sanitized))
+ else:
+ frags_filenames.append(frag_sanitized)
+ except (compat_urllib_error.HTTPError, ) as err:
+ if live and (err.code == 404 or err.code == 410):
+ # We didn't keep up with the live window. Continue
+ # with the next available fragment.
+ msg = 'Fragment %d unavailable' % frag_i
+ self.report_warning(msg)
+ fragments_list = []
+ else:
+ raise
+
+ if not fragments_list and live and bootstrap_url:
+ fragments_list = self._update_live_fragments(bootstrap_url, frag_i)
+ total_frags += len(fragments_list)
+ if fragments_list and (fragments_list[0][1] > frag_i + 1):
+ msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1))
+ self.report_warning(msg)
+
+ self._finish_frag_download(ctx)
- dest_stream.close()
- self.report_finish(format_bytes(state['downloaded_bytes']), time.time() - start)
-
- self.try_rename(tmpfilename, filename)
for frag_file in frags_filenames:
- os.remove(frag_file)
-
- fsize = os.path.getsize(encodeFilename(filename))
- self._hook_progress({
- 'downloaded_bytes': fsize,
- 'total_bytes': fsize,
- 'filename': filename,
- 'status': 'finished',
- })
+ os.remove(encodeFilename(frag_file))
return True
diff --git a/youtube_dl/downloader/fragment.py b/youtube_dl/downloader/fragment.py
new file mode 100644
index 000000000..5a64b29ee
--- /dev/null
+++ b/youtube_dl/downloader/fragment.py
@@ -0,0 +1,111 @@
+from __future__ import division, unicode_literals
+
+import os
+import time
+
+from .common import FileDownloader
+from .http import HttpFD
+from ..utils import (
+ encodeFilename,
+ sanitize_open,
+)
+
+
+class HttpQuietDownloader(HttpFD):
+ def to_screen(self, *args, **kargs):
+ pass
+
+
+class FragmentFD(FileDownloader):
+ """
+ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests).
+ """
+
+ def _prepare_and_start_frag_download(self, ctx):
+ self._prepare_frag_download(ctx)
+ self._start_frag_download(ctx)
+
+ def _prepare_frag_download(self, ctx):
+ self.to_screen('[%s] Total fragments: %d' % (self.FD_NAME, ctx['total_frags']))
+ self.report_destination(ctx['filename'])
+ dl = HttpQuietDownloader(
+ self.ydl,
+ {
+ 'continuedl': True,
+ 'quiet': True,
+ 'noprogress': True,
+ 'ratelimit': self.params.get('ratelimit', None),
+ 'retries': self.params.get('retries', 0),
+ 'test': self.params.get('test', False),
+ }
+ )
+ tmpfilename = self.temp_name(ctx['filename'])
+ dest_stream, tmpfilename = sanitize_open(tmpfilename, 'wb')
+ ctx.update({
+ 'dl': dl,
+ 'dest_stream': dest_stream,
+ 'tmpfilename': tmpfilename,
+ })
+
+ def _start_frag_download(self, ctx):
+ total_frags = ctx['total_frags']
+ # This dict stores the download progress, it's updated by the progress
+ # hook
+ state = {
+ 'status': 'downloading',
+ 'downloaded_bytes': 0,
+ 'frag_index': 0,
+ 'frag_count': total_frags,
+ 'filename': ctx['filename'],
+ 'tmpfilename': ctx['tmpfilename'],
+ }
+ start = time.time()
+ ctx['started'] = start
+
+ def frag_progress_hook(s):
+ if s['status'] not in ('downloading', 'finished'):
+ return
+
+ frag_total_bytes = s.get('total_bytes', 0)
+ if s['status'] == 'finished':
+ state['downloaded_bytes'] += frag_total_bytes
+ state['frag_index'] += 1
+
+ estimated_size = (
+ (state['downloaded_bytes'] + frag_total_bytes) /
+ (state['frag_index'] + 1) * total_frags)
+ time_now = time.time()
+ state['total_bytes_estimate'] = estimated_size
+ state['elapsed'] = time_now - start
+
+ if s['status'] == 'finished':
+ progress = self.calc_percent(state['frag_index'], total_frags)
+ else:
+ frag_downloaded_bytes = s['downloaded_bytes']
+ frag_progress = self.calc_percent(frag_downloaded_bytes,
+ frag_total_bytes)
+ progress = self.calc_percent(state['frag_index'], total_frags)
+ progress += frag_progress / float(total_frags)
+
+ state['eta'] = self.calc_eta(
+ start, time_now, estimated_size, state['downloaded_bytes'] + frag_downloaded_bytes)
+ state['speed'] = s.get('speed')
+ self._hook_progress(state)
+
+ ctx['dl'].add_progress_hook(frag_progress_hook)
+
+ return start
+
+ def _finish_frag_download(self, ctx):
+ ctx['dest_stream'].close()
+ elapsed = time.time() - ctx['started']
+ self.try_rename(ctx['tmpfilename'], ctx['filename'])
+ fsize = os.path.getsize(encodeFilename(ctx['filename']))
+
+ self._hook_progress({
+ 'downloaded_bytes': fsize,
+ 'total_bytes': fsize,
+ 'filename': ctx['filename'],
+ 'status': 'finished',
+ 'elapsed': elapsed,
+ })
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index e527ee425..b5a3e1167 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -4,15 +4,16 @@ import os
import re
import subprocess
-from ..postprocessor.ffmpeg import FFmpegPostProcessor
from .common import FileDownloader
-from ..compat import (
- compat_urlparse,
- compat_urllib_request,
-)
+from .fragment import FragmentFD
+
+from ..compat import compat_urlparse
+from ..postprocessor.ffmpeg import FFmpegPostProcessor
from ..utils import (
encodeArgument,
encodeFilename,
+ sanitize_open,
+ handle_youtubedl_headers,
)
@@ -23,16 +24,27 @@ class HlsFD(FileDownloader):
tmpfilename = self.temp_name(filename)
ffpp = FFmpegPostProcessor(downloader=self)
- program = ffpp._executable
- if program is None:
+ if not ffpp.available:
self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
return False
ffpp.check_version()
- args = [
- encodeArgument(opt)
- for opt in (program, '-y', '-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc')]
- args.append(encodeFilename(tmpfilename, True))
+ args = [ffpp.executable, '-y']
+
+ if info_dict['http_headers'] and re.match(r'^https?://', url):
+ # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
+ # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
+ headers = handle_youtubedl_headers(info_dict['http_headers'])
+ args += [
+ '-headers',
+ ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+
+ args += ['-i', url, '-f', 'mp4', '-c', 'copy', '-bsf:a', 'aac_adtstoasc']
+
+ args = [encodeArgument(opt) for opt in args]
+ args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
+
+ self._debug_cmd(args)
retval = subprocess.call(args)
if retval == 0:
@@ -48,58 +60,55 @@ class HlsFD(FileDownloader):
return True
else:
self.to_stderr('\n')
- self.report_error('%s exited with code %d' % (program, retval))
+ self.report_error('%s exited with code %d' % (ffpp.basename, retval))
return False
-class NativeHlsFD(FileDownloader):
+class NativeHlsFD(FragmentFD):
""" A more limited implementation that does not require ffmpeg """
+ FD_NAME = 'hlsnative'
+
def real_download(self, filename, info_dict):
- url = info_dict['url']
- self.report_destination(filename)
- tmpfilename = self.temp_name(filename)
+ man_url = info_dict['url']
+ self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
+ manifest = self.ydl.urlopen(man_url).read()
- self.to_screen(
- '[hlsnative] %s: Downloading m3u8 manifest' % info_dict['id'])
- data = self.ydl.urlopen(url).read()
- s = data.decode('utf-8', 'ignore')
- segment_urls = []
+ s = manifest.decode('utf-8', 'ignore')
+ fragment_urls = []
for line in s.splitlines():
line = line.strip()
if line and not line.startswith('#'):
segment_url = (
line
if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(url, line))
- segment_urls.append(segment_url)
-
- is_test = self.params.get('test', False)
- remaining_bytes = self._TEST_FILE_SIZE if is_test else None
- byte_counter = 0
- with open(tmpfilename, 'wb') as outf:
- for i, segurl in enumerate(segment_urls):
- self.to_screen(
- '[hlsnative] %s: Downloading segment %d / %d' %
- (info_dict['id'], i + 1, len(segment_urls)))
- seg_req = compat_urllib_request.Request(segurl)
- if remaining_bytes is not None:
- seg_req.add_header('Range', 'bytes=0-%d' % (remaining_bytes - 1))
-
- segment = self.ydl.urlopen(seg_req).read()
- if remaining_bytes is not None:
- segment = segment[:remaining_bytes]
- remaining_bytes -= len(segment)
- outf.write(segment)
- byte_counter += len(segment)
- if remaining_bytes is not None and remaining_bytes <= 0:
+ else compat_urlparse.urljoin(man_url, line))
+ fragment_urls.append(segment_url)
+ # We only download the first fragment during the test
+ if self.params.get('test', False):
break
- self._hook_progress({
- 'downloaded_bytes': byte_counter,
- 'total_bytes': byte_counter,
+ ctx = {
'filename': filename,
- 'status': 'finished',
- })
- self.try_rename(tmpfilename, filename)
+ 'total_frags': len(fragment_urls),
+ }
+
+ self._prepare_and_start_frag_download(ctx)
+
+ frags_filenames = []
+ for i, frag_url in enumerate(fragment_urls):
+ frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
+ success = ctx['dl'].download(frag_filename, {'url': frag_url})
+ if not success:
+ return False
+ down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+ ctx['dest_stream'].write(down.read())
+ down.close()
+ frags_filenames.append(frag_sanitized)
+
+ self._finish_frag_download(ctx)
+
+ for frag_file in frags_filenames:
+ os.remove(encodeFilename(frag_file))
+
return True
diff --git a/youtube_dl/downloader/http.py b/youtube_dl/downloader/http.py
index 49170cf9d..56840e026 100644
--- a/youtube_dl/downloader/http.py
+++ b/youtube_dl/downloader/http.py
@@ -1,21 +1,18 @@
from __future__ import unicode_literals
+import errno
import os
+import socket
import time
-
-from socket import error as SocketError
-import errno
+import re
from .common import FileDownloader
-from ..compat import (
- compat_urllib_request,
- compat_urllib_error,
-)
+from ..compat import compat_urllib_error
from ..utils import (
ContentTooShortError,
encodeFilename,
sanitize_open,
- format_bytes,
+ sanitized_Request,
)
@@ -30,13 +27,8 @@ class HttpFD(FileDownloader):
add_headers = info_dict.get('http_headers')
if add_headers:
headers.update(add_headers)
- data = info_dict.get('http_post_data')
- http_method = info_dict.get('http_method')
- basic_request = compat_urllib_request.Request(url, data, headers)
- request = compat_urllib_request.Request(url, data, headers)
- if http_method is not None:
- basic_request.get_method = lambda: http_method
- request.get_method = lambda: http_method
+ basic_request = sanitized_Request(url, None, headers)
+ request = sanitized_Request(url, None, headers)
is_test = self.params.get('test', False)
@@ -51,7 +43,7 @@ class HttpFD(FileDownloader):
open_mode = 'wb'
if resume_len != 0:
- if self.params.get('continuedl', False):
+ if self.params.get('continuedl', True):
self.report_resuming_byte(resume_len)
request.add_header('Range', 'bytes=%d-' % resume_len)
open_mode = 'ab'
@@ -64,6 +56,24 @@ class HttpFD(FileDownloader):
# Establish connection
try:
data = self.ydl.urlopen(request)
+ # When trying to resume, Content-Range HTTP header of response has to be checked
+ # to match the value of requested Range HTTP header. This is due to a webservers
+ # that don't support resuming and serve a whole file with no Content-Range
+ # set in response despite of requested Range (see
+ # https://github.com/rg3/youtube-dl/issues/6057#issuecomment-126129799)
+ if resume_len > 0:
+ content_range = data.headers.get('Content-Range')
+ if content_range:
+ content_range_m = re.search(r'bytes (\d+)-', content_range)
+ # Content-Range is present and matches requested Range, resume is possible
+ if content_range_m and resume_len == int(content_range_m.group(1)):
+ break
+ # Content-Range is either not present or invalid. Assuming remote webserver is
+ # trying to send the whole file, resume is not possible, so wiping the local file
+ # and performing entire redownload
+ self.report_unable_to_resume()
+ resume_len = 0
+ open_mode = 'wb'
break
except (compat_urllib_error.HTTPError, ) as err:
if (err.code < 500 or err.code >= 600) and err.code != 416:
@@ -94,6 +104,8 @@ class HttpFD(FileDownloader):
self._hook_progress({
'filename': filename,
'status': 'finished',
+ 'downloaded_bytes': resume_len,
+ 'total_bytes': resume_len,
})
return True
else:
@@ -102,7 +114,7 @@ class HttpFD(FileDownloader):
resume_len = 0
open_mode = 'wb'
break
- except SocketError as e:
+ except socket.error as e:
if e.errno != errno.ECONNRESET:
# Connection reset is no problem, just retry
raise
@@ -137,7 +149,6 @@ class HttpFD(FileDownloader):
self.to_screen('\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
return False
- data_len_str = format_bytes(data_len)
byte_counter = 0 + resume_len
block_size = self.params.get('buffersize', 1024)
start = time.time()
@@ -196,20 +207,19 @@ class HttpFD(FileDownloader):
# Progress message
speed = self.calc_speed(start, now, byte_counter - resume_len)
if data_len is None:
- eta = percent = None
+ eta = None
else:
- percent = self.calc_percent(byte_counter, data_len)
eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
- self.report_progress(percent, data_len_str, speed, eta)
self._hook_progress({
+ 'status': 'downloading',
'downloaded_bytes': byte_counter,
'total_bytes': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
- 'status': 'downloading',
'eta': eta,
'speed': speed,
+ 'elapsed': now - start,
})
if is_test and byte_counter == data_len:
@@ -221,7 +231,7 @@ class HttpFD(FileDownloader):
return False
if tmpfilename != '-':
stream.close()
- self.report_finish(data_len_str, (time.time() - start))
+
if data_len is not None and byte_counter != data_len:
raise ContentTooShortError(byte_counter, int(data_len))
self.try_rename(tmpfilename, filename)
@@ -235,6 +245,7 @@ class HttpFD(FileDownloader):
'total_bytes': byte_counter,
'filename': filename,
'status': 'finished',
+ 'elapsed': time.time() - start,
})
return True
diff --git a/youtube_dl/downloader/rtmp.py b/youtube_dl/downloader/rtmp.py
index f7eeb6f43..14d56db47 100644
--- a/youtube_dl/downloader/rtmp.py
+++ b/youtube_dl/downloader/rtmp.py
@@ -3,7 +3,6 @@ from __future__ import unicode_literals
import os
import re
import subprocess
-import sys
import time
from .common import FileDownloader
@@ -11,7 +10,7 @@ from ..compat import compat_str
from ..utils import (
check_executable,
encodeFilename,
- format_bytes,
+ encodeArgument,
get_exe_version,
)
@@ -51,23 +50,23 @@ class RtmpFD(FileDownloader):
if not resume_percent:
resume_percent = percent
resume_downloaded_data_len = downloaded_data_len
- eta = self.calc_eta(start, time.time(), 100 - resume_percent, percent - resume_percent)
- speed = self.calc_speed(start, time.time(), downloaded_data_len - resume_downloaded_data_len)
+ time_now = time.time()
+ eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent)
+ speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len)
data_len = None
if percent > 0:
data_len = int(downloaded_data_len * 100 / percent)
- data_len_str = '~' + format_bytes(data_len)
- self.report_progress(percent, data_len_str, speed, eta)
- cursor_in_new_line = False
self._hook_progress({
+ 'status': 'downloading',
'downloaded_bytes': downloaded_data_len,
- 'total_bytes': data_len,
+ 'total_bytes_estimate': data_len,
'tmpfilename': tmpfilename,
'filename': filename,
- 'status': 'downloading',
'eta': eta,
+ 'elapsed': time_now - start,
'speed': speed,
})
+ cursor_in_new_line = False
else:
# no percent for live streams
mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
@@ -75,15 +74,15 @@ class RtmpFD(FileDownloader):
downloaded_data_len = int(float(mobj.group(1)) * 1024)
time_now = time.time()
speed = self.calc_speed(start, time_now, downloaded_data_len)
- self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
- cursor_in_new_line = False
self._hook_progress({
'downloaded_bytes': downloaded_data_len,
'tmpfilename': tmpfilename,
'filename': filename,
'status': 'downloading',
+ 'elapsed': time_now - start,
'speed': speed,
})
+ cursor_in_new_line = False
elif self.params.get('verbose', False):
if not cursor_in_new_line:
self.to_screen('')
@@ -106,7 +105,7 @@ class RtmpFD(FileDownloader):
protocol = info_dict.get('rtmp_protocol', None)
real_time = info_dict.get('rtmp_real_time', False)
no_resume = info_dict.get('no_resume', False)
- continue_dl = info_dict.get('continuedl', False)
+ continue_dl = self.params.get('continuedl', True)
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
@@ -118,9 +117,11 @@ class RtmpFD(FileDownloader):
return False
# Download using rtmpdump. rtmpdump returns exit code 2 when
- # the connection was interrumpted and resuming appears to be
+ # the connection was interrupted and resuming appears to be
# possible. This is part of rtmpdump's normal usage, AFAIK.
- basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
+ basic_args = [
+ 'rtmpdump', '--verbose', '-r', url,
+ '-o', tmpfilename]
if player_url is not None:
basic_args += ['--swfVfy', player_url]
if page_url is not None:
@@ -130,7 +131,7 @@ class RtmpFD(FileDownloader):
if play_path is not None:
basic_args += ['--playpath', play_path]
if tc_url is not None:
- basic_args += ['--tcUrl', url]
+ basic_args += ['--tcUrl', tc_url]
if test:
basic_args += ['--stop', '1']
if flash_version is not None:
@@ -153,16 +154,9 @@ class RtmpFD(FileDownloader):
if not live and continue_dl:
args += ['--skip', '1']
- if sys.platform == 'win32' and sys.version_info < (3, 0):
- # Windows subprocess module does not actually support Unicode
- # on Python 2.x
- # See http://stackoverflow.com/a/9951851/35070
- subprocess_encoding = sys.getfilesystemencoding()
- args = [a.encode(subprocess_encoding, 'ignore') for a in args]
- else:
- subprocess_encoding = None
+ args = [encodeArgument(a) for a in args]
- self._debug_cmd(args, subprocess_encoding, exe='rtmpdump')
+ self._debug_cmd(args, exe='rtmpdump')
RD_SUCCESS = 0
RD_FAILED = 1
@@ -179,7 +173,11 @@ class RtmpFD(FileDownloader):
prevsize = os.path.getsize(encodeFilename(tmpfilename))
self.to_screen('[rtmpdump] %s bytes' % prevsize)
time.sleep(5.0) # This seems to be needed
- retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == RD_FAILED])
+ args = basic_args + ['--resume']
+ if retval == RD_FAILED:
+ args += ['--skip', '1']
+ args = [encodeArgument(a) for a in args]
+ retval = run_rtmpdump(args)
cursize = os.path.getsize(encodeFilename(tmpfilename))
if prevsize == cursize and retval == RD_FAILED:
break
diff --git a/youtube_dl/downloader/mplayer.py b/youtube_dl/downloader/rtsp.py
index 72cef30ea..3eb29526c 100644
--- a/youtube_dl/downloader/mplayer.py
+++ b/youtube_dl/downloader/rtsp.py
@@ -10,21 +10,23 @@ from ..utils import (
)
-class MplayerFD(FileDownloader):
+class RtspFD(FileDownloader):
def real_download(self, filename, info_dict):
url = info_dict['url']
self.report_destination(filename)
tmpfilename = self.temp_name(filename)
- args = [
- 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
- '-dumpstream', '-dumpfile', tmpfilename, url]
- # Check for mplayer first
- if not check_executable('mplayer', ['-h']):
- self.report_error('MMS or RTSP download detected but "%s" could not be run' % args[0])
+ if check_executable('mplayer', ['-h']):
+ args = [
+ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy',
+ '-dumpstream', '-dumpfile', tmpfilename, url]
+ elif check_executable('mpv', ['-h']):
+ args = [
+ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url]
+ else:
+ self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install any.')
return False
- # Download using mplayer.
retval = subprocess.call(args)
if retval == 0:
fsize = os.path.getsize(encodeFilename(tmpfilename))
@@ -39,5 +41,5 @@ class MplayerFD(FileDownloader):
return True
else:
self.to_stderr('\n')
- self.report_error('mplayer exited with code %d' % retval)
+ self.report_error('%s exited with code %d' % (args[0], retval))
return False