aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/downloader
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r--youtube_dl/downloader/external.py20
-rw-r--r--youtube_dl/downloader/f4m.py63
-rw-r--r--youtube_dl/downloader/hls.py110
3 files changed, 154 insertions, 39 deletions
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py
index 8d642fc3e..fae245024 100644
--- a/youtube_dl/downloader/external.py
+++ b/youtube_dl/downloader/external.py
@@ -6,6 +6,7 @@ import sys
import re
from .common import FileDownloader
+from ..compat import compat_setenv
from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
from ..utils import (
cli_option,
@@ -84,7 +85,7 @@ class ExternalFD(FileDownloader):
cmd, stderr=subprocess.PIPE)
_, stderr = p.communicate()
if p.returncode != 0:
- self.to_stderr(stderr)
+ self.to_stderr(stderr.decode('utf-8', 'replace'))
return p.returncode
@@ -198,6 +199,19 @@ class FFmpegFD(ExternalFD):
'-headers',
''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
+ env = None
+ proxy = self.params.get('proxy')
+ if proxy:
+ if not re.match(r'^[\da-zA-Z]+://', proxy):
+ proxy = 'http://%s' % proxy
+ # Since December 2015 ffmpeg supports -http_proxy option (see
+ # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
+ # We could switch to the following code if we are able to detect version properly
+ # args += ['-http_proxy', proxy]
+ env = os.environ.copy()
+ compat_setenv('HTTP_PROXY', proxy, env=env)
+ compat_setenv('http_proxy', proxy, env=env)
+
protocol = info_dict.get('protocol')
if protocol == 'rtmp':
@@ -224,7 +238,7 @@ class FFmpegFD(ExternalFD):
args += ['-rtmp_live', 'live']
args += ['-i', url, '-c', 'copy']
- if protocol == 'm3u8':
+ if protocol in ('m3u8', 'm3u8_native'):
if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
args += ['-f', 'mpegts']
else:
@@ -239,7 +253,7 @@ class FFmpegFD(ExternalFD):
self._debug_cmd(args)
- proc = subprocess.Popen(args, stdin=subprocess.PIPE)
+ proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
try:
retval = proc.wait()
except KeyboardInterrupt:
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index 664d87543..80c21d40b 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -12,37 +12,49 @@ from ..compat import (
compat_urlparse,
compat_urllib_error,
compat_urllib_parse_urlparse,
+ compat_struct_pack,
+ compat_struct_unpack,
)
from ..utils import (
encodeFilename,
fix_xml_ampersands,
sanitize_open,
- struct_pack,
- struct_unpack,
xpath_text,
)
+class DataTruncatedError(Exception):
+ pass
+
+
class FlvReader(io.BytesIO):
"""
Reader for Flv files
The file format is documented in https://www.adobe.com/devnet/f4v.html
"""
+ def read_bytes(self, n):
+ data = self.read(n)
+ if len(data) < n:
+ raise DataTruncatedError(
+ 'FlvReader error: need %d bytes while only %d bytes got' % (
+ n, len(data)))
+ return data
+
# Utility functions for reading numbers and strings
def read_unsigned_long_long(self):
- return struct_unpack('!Q', self.read(8))[0]
+ return compat_struct_unpack('!Q', self.read_bytes(8))[0]
def read_unsigned_int(self):
- return struct_unpack('!I', self.read(4))[0]
+ return compat_struct_unpack('!I', self.read_bytes(4))[0]
def read_unsigned_char(self):
- return struct_unpack('!B', self.read(1))[0]
+ return compat_struct_unpack('!B', self.read_bytes(1))[0]
def read_string(self):
res = b''
while True:
- char = self.read(1)
+ char = self.read_bytes(1)
if char == b'\x00':
break
res += char
@@ -53,18 +65,18 @@ class FlvReader(io.BytesIO):
Read a box and return the info as a tuple: (box_size, box_type, box_data)
"""
real_size = size = self.read_unsigned_int()
- box_type = self.read(4)
+ box_type = self.read_bytes(4)
header_end = 8
if size == 1:
real_size = self.read_unsigned_long_long()
header_end = 16
- return real_size, box_type, self.read(real_size - header_end)
+ return real_size, box_type, self.read_bytes(real_size - header_end)
def read_asrt(self):
# version
self.read_unsigned_char()
# flags
- self.read(3)
+ self.read_bytes(3)
quality_entry_count = self.read_unsigned_char()
# QualityEntryCount
for i in range(quality_entry_count):
@@ -85,7 +97,7 @@ class FlvReader(io.BytesIO):
# version
self.read_unsigned_char()
# flags
- self.read(3)
+ self.read_bytes(3)
# time scale
self.read_unsigned_int()
@@ -119,7 +131,7 @@ class FlvReader(io.BytesIO):
# version
self.read_unsigned_char()
# flags
- self.read(3)
+ self.read_bytes(3)
self.read_unsigned_int() # BootstrapinfoVersion
# Profile,Live,Update,Reserved
@@ -184,6 +196,11 @@ def build_fragments_list(boot_info):
first_frag_number = fragment_run_entry_table[0]['first']
fragments_counter = itertools.count(first_frag_number)
for segment, fragments_count in segment_run_table['segment_run']:
+ # In some live HDS streams (for example Rai), `fragments_count` is
+ # abnormal and causing out-of-memory errors. It's OK to change the
+ # number of fragments for live streams as they are updated periodically
+ if fragments_count == 4294967295 and boot_info['live']:
+ fragments_count = 2
for _ in range(fragments_count):
res.append((segment, next(fragments_counter)))
@@ -194,11 +211,11 @@ def build_fragments_list(boot_info):
def write_unsigned_int(stream, val):
- stream.write(struct_pack('!I', val))
+ stream.write(compat_struct_pack('!I', val))
def write_unsigned_int_24(stream, val):
- stream.write(struct_pack('!I', val)[1:])
+ stream.write(compat_struct_pack('!I', val)[1:])
def write_flv_header(stream):
@@ -307,7 +324,7 @@ class F4mFD(FragmentFD):
doc = compat_etree_fromstring(manifest)
formats = [(int(f.attrib.get('bitrate', -1)), f)
for f in self._get_unencrypted_media(doc)]
- if requested_bitrate is None:
+ if requested_bitrate is None or len(formats) == 1:
# get the best format
formats = sorted(formats, key=lambda f: f[0])
rate, media = formats[-1]
@@ -317,7 +334,11 @@ class F4mFD(FragmentFD):
base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, base_url)
+ # From Adobe F4M 3.0 spec:
+ # The <baseURL> element SHALL be the base URL for all relative
+ # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
+ # URLs should be relative to the location of the containing document.
+ boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
@@ -374,7 +395,17 @@ class F4mFD(FragmentFD):
down.close()
reader = FlvReader(down_data)
while True:
- _, box_type, box_data = reader.read_box_info()
+ try:
+ _, box_type, box_data = reader.read_box_info()
+ except DataTruncatedError:
+ if test:
+ # In tests, segments may be truncated, and thus
+ # FlvReader may not be able to parse the whole
+ # chunk. If so, write the segment as is
+ # See https://github.com/rg3/youtube-dl/issues/9214
+ dest_stream.write(down_data)
+ break
+ raise
if box_type == b'mdat':
dest_stream.write(box_data)
break
diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py
index a01dac031..3b7bb3508 100644
--- a/youtube_dl/downloader/hls.py
+++ b/youtube_dl/downloader/hls.py
@@ -2,13 +2,24 @@ from __future__ import unicode_literals
import os.path
import re
+import binascii
+try:
+ from Crypto.Cipher import AES
+ can_decrypt_frag = True
+except ImportError:
+ can_decrypt_frag = False
from .fragment import FragmentFD
+from .external import FFmpegFD
-from ..compat import compat_urlparse
+from ..compat import (
+ compat_urlparse,
+ compat_struct_pack,
+)
from ..utils import (
encodeFilename,
sanitize_open,
+ parse_m3u8_attributes,
)
@@ -17,42 +28,101 @@ class HlsFD(FragmentFD):
FD_NAME = 'hlsnative'
+ @staticmethod
+ def can_download(manifest):
+ UNSUPPORTED_FEATURES = (
+ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1]
+ r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2]
+
+ # Live streams heuristic does not always work (e.g. geo restricted to Germany
+ # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0)
+ # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3]
+
+ # This heuristic also is not correct since segments may not be appended as well.
+ # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite
+ # no segments will definitely be appended to the end of the playlist.
+ # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
+ # # event media playlists [4]
+
+ # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
+ # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
+ # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
+ # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
+ )
+ check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
+ check_results.append(can_decrypt_frag or '#EXT-X-KEY:METHOD=AES-128' not in manifest)
+ return all(check_results)
+
def real_download(self, filename, info_dict):
man_url = info_dict['url']
self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)
manifest = self.ydl.urlopen(man_url).read()
s = manifest.decode('utf-8', 'ignore')
- fragment_urls = []
+
+ if not self.can_download(s):
+ self.report_warning(
+ 'hlsnative has detected features it does not support, '
+ 'extraction will be delegated to ffmpeg')
+ fd = FFmpegFD(self.ydl, self.params)
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ return fd.real_download(filename, info_dict)
+
+ total_frags = 0
for line in s.splitlines():
line = line.strip()
if line and not line.startswith('#'):
- segment_url = (
- line
- if re.match(r'^https?://', line)
- else compat_urlparse.urljoin(man_url, line))
- fragment_urls.append(segment_url)
- # We only download the first fragment during the test
- if self.params.get('test', False):
- break
+ total_frags += 1
ctx = {
'filename': filename,
- 'total_frags': len(fragment_urls),
+ 'total_frags': total_frags,
}
self._prepare_and_start_frag_download(ctx)
+ i = 0
+ media_sequence = 0
+ decrypt_info = {'METHOD': 'NONE'}
frags_filenames = []
- for i, frag_url in enumerate(fragment_urls):
- frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
- success = ctx['dl'].download(frag_filename, {'url': frag_url})
- if not success:
- return False
- down, frag_sanitized = sanitize_open(frag_filename, 'rb')
- ctx['dest_stream'].write(down.read())
- down.close()
- frags_filenames.append(frag_sanitized)
+ for line in s.splitlines():
+ line = line.strip()
+ if line:
+ if not line.startswith('#'):
+ frag_url = (
+ line
+ if re.match(r'^https?://', line)
+ else compat_urlparse.urljoin(man_url, line))
+ frag_filename = '%s-Frag%d' % (ctx['tmpfilename'], i)
+ success = ctx['dl'].download(frag_filename, {'url': frag_url})
+ if not success:
+ return False
+ down, frag_sanitized = sanitize_open(frag_filename, 'rb')
+ frag_content = down.read()
+ down.close()
+ if decrypt_info['METHOD'] == 'AES-128':
+ iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
+ frag_content = AES.new(
+ decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
+ ctx['dest_stream'].write(frag_content)
+ frags_filenames.append(frag_sanitized)
+ # We only download the first fragment during the test
+ if self.params.get('test', False):
+ break
+ i += 1
+ media_sequence += 1
+ elif line.startswith('#EXT-X-KEY'):
+ decrypt_info = parse_m3u8_attributes(line[11:])
+ if decrypt_info['METHOD'] == 'AES-128':
+ if 'IV' in decrypt_info:
+ decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:])
+ if not re.match(r'^https?://', decrypt_info['URI']):
+ decrypt_info['URI'] = compat_urlparse.urljoin(
+ man_url, decrypt_info['URI'])
+ decrypt_info['KEY'] = self.ydl.urlopen(decrypt_info['URI']).read()
+ elif line.startswith('#EXT-X-MEDIA-SEQUENCE'):
+ media_sequence = int(line[22:])
self._finish_frag_download(ctx)