diff options
Diffstat (limited to 'youtube_dl/downloader')
-rw-r--r-- | youtube_dl/downloader/external.py | 18 | ||||
-rw-r--r-- | youtube_dl/downloader/f4m.py | 52 | ||||
-rw-r--r-- | youtube_dl/downloader/hls.py | 34 |
3 files changed, 87 insertions, 17 deletions
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 8d642fc3e..3ff1f9ed4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -6,6 +6,7 @@ import sys import re from .common import FileDownloader +from ..compat import compat_setenv from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS from ..utils import ( cli_option, @@ -198,6 +199,19 @@ class FFmpegFD(ExternalFD): '-headers', ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] + env = None + proxy = self.params.get('proxy') + if proxy: + if not re.match(r'^[\da-zA-Z]+://', proxy): + proxy = 'http://%s' % proxy + # Since December 2015 ffmpeg supports -http_proxy option (see + # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) + # We could switch to the following code if we are able to detect version properly + # args += ['-http_proxy', proxy] + env = os.environ.copy() + compat_setenv('HTTP_PROXY', proxy, env=env) + compat_setenv('http_proxy', proxy, env=env) + protocol = info_dict.get('protocol') if protocol == 'rtmp': @@ -224,7 +238,7 @@ class FFmpegFD(ExternalFD): args += ['-rtmp_live', 'live'] args += ['-i', url, '-c', 'copy'] - if protocol == 'm3u8': + if protocol in ('m3u8', 'm3u8_native'): if self.params.get('hls_use_mpegts', False) or tmpfilename == '-': args += ['-f', 'mpegts'] else: @@ -239,7 +253,7 @@ class FFmpegFD(ExternalFD): self._debug_cmd(args) - proc = subprocess.Popen(args, stdin=subprocess.PIPE) + proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env) try: retval = proc.wait() except KeyboardInterrupt: diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 664d87543..8f88b0241 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,37 +12,49 @@ from ..compat import ( compat_urlparse, compat_urllib_error, compat_urllib_parse_urlparse, + compat_struct_pack, + compat_struct_unpack, ) from ..utils import ( encodeFilename, fix_xml_ampersands, sanitize_open, - struct_pack, - struct_unpack, xpath_text, ) +class DataTruncatedError(Exception): + pass + + class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ + def read_bytes(self, n): + data = self.read(n) + if len(data) < n: + raise DataTruncatedError( + 'FlvReader error: need %d bytes while only %d bytes got' % ( + n, len(data))) + return data + # Utility functions for reading numbers and strings def read_unsigned_long_long(self): - return struct_unpack('!Q', self.read(8))[0] + return compat_struct_unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): - return struct_unpack('!I', self.read(4))[0] + return compat_struct_unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): - return struct_unpack('!B', self.read(1))[0] + return compat_struct_unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' while True: - char = self.read(1) + char = self.read_bytes(1) if char == b'\x00': break res += char @@ -53,18 +65,18 @@ class FlvReader(io.BytesIO): Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() - box_type = self.read(4) + box_type = self.read_bytes(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 - return real_size, box_type, self.read(real_size - header_end) + return real_size, box_type, self.read_bytes(real_size - header_end) def read_asrt(self): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for i in range(quality_entry_count): @@ -85,7 +97,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) # time scale self.read_unsigned_int() @@ -119,7 +131,7 @@ class FlvReader(io.BytesIO): # version self.read_unsigned_char() # flags - self.read(3) + self.read_bytes(3) self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved @@ -194,11 +206,11 @@ def build_fragments_list(boot_info): def write_unsigned_int(stream, val): - stream.write(struct_pack('!I', val)) + stream.write(compat_struct_pack('!I', val)) def write_unsigned_int_24(stream, val): - stream.write(struct_pack('!I', val)[1:]) + stream.write(compat_struct_pack('!I', val)[1:]) def write_flv_header(stream): @@ -307,7 +319,7 @@ class F4mFD(FragmentFD): doc = compat_etree_fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in self._get_unencrypted_media(doc)] - if requested_bitrate is None: + if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] @@ -374,7 +386,17 @@ class F4mFD(FragmentFD): down.close() reader = FlvReader(down_data) while True: - _, box_type, box_data = reader.read_box_info() + try: + _, box_type, box_data = reader.read_box_info() + except DataTruncatedError: + if test: + # In tests, segments may be truncated, and thus + # FlvReader may not be able to parse the whole + # chunk. If so, write the segment as is + # See https://github.com/rg3/youtube-dl/issues/9214 + dest_stream.write(down_data) + break + raise if box_type == b'mdat': dest_stream.write(box_data) break diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index a01dac031..54f2108e9 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -4,6 +4,7 @@ import os.path import re from .fragment import FragmentFD +from .external import FFmpegFD from ..compat import compat_urlparse from ..utils import ( @@ -17,12 +18,45 @@ class HlsFD(FragmentFD): FD_NAME = 'hlsnative' + @staticmethod + def can_download(manifest): + UNSUPPORTED_FEATURES = ( + r'#EXT-X-KEY:METHOD=(?!NONE)', # encrypted streams [1] + r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] + + # Live streams heuristic does not always work (e.g. geo restricted to Germany + # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) + # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] + + # This heuristic also is not correct since segments may not be appended as well. + # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite + # no segments will definitely be appended to the end of the playlist. + # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of + # # event media playlists [4] + + # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 + # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 + # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 + # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 + ) + return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) + def real_download(self, filename, info_dict): man_url = info_dict['url'] self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME) manifest = self.ydl.urlopen(man_url).read() s = manifest.decode('utf-8', 'ignore') + + if not self.can_download(s): + self.report_warning( + 'hlsnative has detected features it does not support, ' + 'extraction will be delegated to ffmpeg') + fd = FFmpegFD(self.ydl, self.params) + for ph in self._progress_hooks: + fd.add_progress_hook(ph) + return fd.real_download(filename, info_dict) + fragment_urls = [] for line in s.splitlines(): line = line.strip() |