diff options
Diffstat (limited to 'youtube_dl/downloader')
| -rw-r--r-- | youtube_dl/downloader/external.py | 18 | ||||
| -rw-r--r-- | youtube_dl/downloader/f4m.py | 52 | ||||
| -rw-r--r-- | youtube_dl/downloader/hls.py | 34 | 
3 files changed, 87 insertions, 17 deletions
diff --git a/youtube_dl/downloader/external.py b/youtube_dl/downloader/external.py index 8d642fc3e..3ff1f9ed4 100644 --- a/youtube_dl/downloader/external.py +++ b/youtube_dl/downloader/external.py @@ -6,6 +6,7 @@ import sys  import re  from .common import FileDownloader +from ..compat import compat_setenv  from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS  from ..utils import (      cli_option, @@ -198,6 +199,19 @@ class FFmpegFD(ExternalFD):                  '-headers',                  ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())] +        env = None +        proxy = self.params.get('proxy') +        if proxy: +            if not re.match(r'^[\da-zA-Z]+://', proxy): +                proxy = 'http://%s' % proxy +            # Since December 2015 ffmpeg supports -http_proxy option (see +            # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) +            # We could switch to the following code if we are able to detect version properly +            # args += ['-http_proxy', proxy] +            env = os.environ.copy() +            compat_setenv('HTTP_PROXY', proxy, env=env) +            compat_setenv('http_proxy', proxy, env=env) +          protocol = info_dict.get('protocol')          if protocol == 'rtmp': @@ -224,7 +238,7 @@ class FFmpegFD(ExternalFD):                  args += ['-rtmp_live', 'live']          args += ['-i', url, '-c', 'copy'] -        if protocol == 'm3u8': +        if protocol in ('m3u8', 'm3u8_native'):              if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':                  args += ['-f', 'mpegts']              else: @@ -239,7 +253,7 @@ class FFmpegFD(ExternalFD):          self._debug_cmd(args) -        proc = subprocess.Popen(args, stdin=subprocess.PIPE) +        proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)          try:              retval = proc.wait()          except KeyboardInterrupt: diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py index 664d87543..8f88b0241 100644 --- a/youtube_dl/downloader/f4m.py +++ b/youtube_dl/downloader/f4m.py @@ -12,37 +12,49 @@ from ..compat import (      compat_urlparse,      compat_urllib_error,      compat_urllib_parse_urlparse, +    compat_struct_pack, +    compat_struct_unpack,  )  from ..utils import (      encodeFilename,      fix_xml_ampersands,      sanitize_open, -    struct_pack, -    struct_unpack,      xpath_text,  ) +class DataTruncatedError(Exception): +    pass + +  class FlvReader(io.BytesIO):      """      Reader for Flv files      The file format is documented in https://www.adobe.com/devnet/f4v.html      """ +    def read_bytes(self, n): +        data = self.read(n) +        if len(data) < n: +            raise DataTruncatedError( +                'FlvReader error: need %d bytes while only %d bytes got' % ( +                    n, len(data))) +        return data +      # Utility functions for reading numbers and strings      def read_unsigned_long_long(self): -        return struct_unpack('!Q', self.read(8))[0] +        return compat_struct_unpack('!Q', self.read_bytes(8))[0]      def read_unsigned_int(self): -        return struct_unpack('!I', self.read(4))[0] +        return compat_struct_unpack('!I', self.read_bytes(4))[0]      def read_unsigned_char(self): -        return struct_unpack('!B', self.read(1))[0] +        return compat_struct_unpack('!B', self.read_bytes(1))[0]      def read_string(self):          res = b''          while True: -            char = self.read(1) +            char = self.read_bytes(1)              if char == b'\x00':                  break              res += char @@ -53,18 +65,18 @@ class FlvReader(io.BytesIO):          Read a box and return the info as a tuple: (box_size, box_type, box_data)          """          real_size = size = self.read_unsigned_int() -        box_type = self.read(4) +        box_type = self.read_bytes(4)          header_end = 8          if size == 1:              real_size = self.read_unsigned_long_long()              header_end = 16 -        return real_size, box_type, self.read(real_size - header_end) +        return real_size, box_type, self.read_bytes(real_size - header_end)      def read_asrt(self):          # version          self.read_unsigned_char()          # flags -        self.read(3) +        self.read_bytes(3)          quality_entry_count = self.read_unsigned_char()          # QualityEntryCount          for i in range(quality_entry_count): @@ -85,7 +97,7 @@ class FlvReader(io.BytesIO):          # version          self.read_unsigned_char()          # flags -        self.read(3) +        self.read_bytes(3)          # time scale          self.read_unsigned_int() @@ -119,7 +131,7 @@ class FlvReader(io.BytesIO):          # version          self.read_unsigned_char()          # flags -        self.read(3) +        self.read_bytes(3)          self.read_unsigned_int()  # BootstrapinfoVersion          # Profile,Live,Update,Reserved @@ -194,11 +206,11 @@ def build_fragments_list(boot_info):  def write_unsigned_int(stream, val): -    stream.write(struct_pack('!I', val)) +    stream.write(compat_struct_pack('!I', val))  def write_unsigned_int_24(stream, val): -    stream.write(struct_pack('!I', val)[1:]) +    stream.write(compat_struct_pack('!I', val)[1:])  def write_flv_header(stream): @@ -307,7 +319,7 @@ class F4mFD(FragmentFD):          doc = compat_etree_fromstring(manifest)          formats = [(int(f.attrib.get('bitrate', -1)), f)                     for f in self._get_unencrypted_media(doc)] -        if requested_bitrate is None: +        if requested_bitrate is None or len(formats) == 1:              # get the best format              formats = sorted(formats, key=lambda f: f[0])              rate, media = formats[-1] @@ -374,7 +386,17 @@ class F4mFD(FragmentFD):                  down.close()                  reader = FlvReader(down_data)                  while True: -                    _, box_type, box_data = reader.read_box_info() +                    try: +                        _, box_type, box_data = reader.read_box_info() +                    except DataTruncatedError: +                        if test: +                            # In tests, segments may be truncated, and thus +                            # FlvReader may not be able to parse the whole +                            # chunk. If so, write the segment as is +                            # See https://github.com/rg3/youtube-dl/issues/9214 +                            dest_stream.write(down_data) +                            break +                        raise                      if box_type == b'mdat':                          dest_stream.write(box_data)                          break diff --git a/youtube_dl/downloader/hls.py b/youtube_dl/downloader/hls.py index a01dac031..54f2108e9 100644 --- a/youtube_dl/downloader/hls.py +++ b/youtube_dl/downloader/hls.py @@ -4,6 +4,7 @@ import os.path  import re  from .fragment import FragmentFD +from .external import FFmpegFD  from ..compat import compat_urlparse  from ..utils import ( @@ -17,12 +18,45 @@ class HlsFD(FragmentFD):      FD_NAME = 'hlsnative' +    @staticmethod +    def can_download(manifest): +        UNSUPPORTED_FEATURES = ( +            r'#EXT-X-KEY:METHOD=(?!NONE)',  # encrypted streams [1] +            r'#EXT-X-BYTERANGE',  # playlists composed of byte ranges of media files [2] + +            # Live streams heuristic does not always work (e.g. geo restricted to Germany +            # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) +            # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)',  # live streams [3] + +            # This heuristic also is not correct since segments may not be appended as well. +            # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite +            # no segments will definitely be appended to the end of the playlist. +            # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of +            #                                 # event media playlists [4] + +            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 +            # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 +            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 +            # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 +        ) +        return all(not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) +      def real_download(self, filename, info_dict):          man_url = info_dict['url']          self.to_screen('[%s] Downloading m3u8 manifest' % self.FD_NAME)          manifest = self.ydl.urlopen(man_url).read()          s = manifest.decode('utf-8', 'ignore') + +        if not self.can_download(s): +            self.report_warning( +                'hlsnative has detected features it does not support, ' +                'extraction will be delegated to ffmpeg') +            fd = FFmpegFD(self.ydl, self.params) +            for ph in self._progress_hooks: +                fd.add_progress_hook(ph) +            return fd.real_download(filename, info_dict) +          fragment_urls = []          for line in s.splitlines():              line = line.strip()  | 
