diff options
| -rw-r--r-- | youtube_dl/downloader/dash.py | 35 | ||||
| -rw-r--r-- | youtube_dl/extractor/common.py | 31 | ||||
| -rw-r--r-- | youtube_dl/extractor/generic.py | 4 | 
3 files changed, 26 insertions, 44 deletions
| diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py index 41fc9cfc2..8437dde30 100644 --- a/youtube_dl/downloader/dash.py +++ b/youtube_dl/downloader/dash.py @@ -1,7 +1,6 @@  from __future__ import unicode_literals  import os -import re  from .fragment import FragmentFD  from ..compat import compat_urllib_error @@ -19,34 +18,32 @@ class DashSegmentsFD(FragmentFD):      FD_NAME = 'dashsegments'      def real_download(self, filename, info_dict): -        base_url = info_dict['url'] -        segment_urls = [info_dict['segment_urls'][0]] if self.params.get('test', False) else info_dict['segment_urls'] -        initialization_url = info_dict.get('initialization_url') +        segments = info_dict['fragments'][:1] if self.params.get( +            'test', False) else info_dict['fragments']          ctx = {              'filename': filename, -            'total_frags': len(segment_urls) + (1 if initialization_url else 0), +            'total_frags': len(segments),          }          self._prepare_and_start_frag_download(ctx) -        def combine_url(base_url, target_url): -            if re.match(r'^https?://', target_url): -                return target_url -            return '%s%s%s' % (base_url, '' if base_url.endswith('/') else '/', target_url) -          segments_filenames = []          fragment_retries = self.params.get('fragment_retries', 0)          skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) -        def process_segment(segment, tmp_filename, fatal): -            target_url, segment_name = segment +        def process_segment(segment, tmp_filename, num): +            segment_url = segment['url'] +            segment_name = 'Frag%d' % num              target_filename = '%s-%s' % (tmp_filename, segment_name) +            # In DASH, the first segment contains necessary headers to +            # generate a valid MP4 file, so always abort for the first segment +            fatal = num == 0 or not skip_unavailable_fragments              count = 0              while count <= fragment_retries:                  try: -                    success = ctx['dl'].download(target_filename, {'url': combine_url(base_url, target_url)}) +                    success = ctx['dl'].download(target_filename, {'url': segment_url})                      if not success:                          return False                      down, target_sanitized = sanitize_open(target_filename, 'rb') @@ -72,16 +69,8 @@ class DashSegmentsFD(FragmentFD):                  return False              return True -        segments_to_download = [(initialization_url, 'Init')] if initialization_url else [] -        segments_to_download.extend([ -            (segment_url, 'Seg%d' % i) -            for i, segment_url in enumerate(segment_urls)]) - -        for i, segment in enumerate(segments_to_download): -            # In DASH, the first segment contains necessary headers to -            # generate a valid MP4 file, so always abort for the first segment -            fatal = i == 0 or not skip_unavailable_fragments -            if not process_segment(segment, ctx['tmpfilename'], fatal): +        for i, segment in enumerate(segments): +            if not process_segment(segment, ctx['tmpfilename'], i):                  return False          self._finish_frag_download(ctx) diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index e637b33d5..f35311e7a 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -86,9 +86,10 @@ class InfoExtractor(object):                      from worst to best quality.                      Potential fields: -                    * url        Mandatory. The URL of the video file or URL of -                                 the manifest file in case of fragmented media -                                 (DASH, hls, hds). +                    * url        Mandatory. The URL of the video file +                    * manifest_url +                                 The URL of the manifest file in case of +                                 fragmented media (DASH, hls, hds)                      * ext        Will be calculated from URL if missing                      * format     A human-readable description of the format                                   ("mp4 container with h264/opus"). @@ -1528,9 +1529,10 @@ class InfoExtractor(object):          mpd_base_url = re.match(r'https?://.+/', urlh.geturl()).group()          return self._parse_mpd_formats( -            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict) +            compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, +            formats_dict=formats_dict, mpd_url=mpd_url) -    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): +    def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}, mpd_url=None):          """          Parse formats from MPD manifest.          References: @@ -1654,6 +1656,7 @@ class InfoExtractor(object):                          f = {                              'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id,                              'url': base_url, +                            'manifest_url': mpd_url,                              'ext': mimetype2ext(mime_type),                              'width': int_or_none(representation_attrib.get('width')),                              'height': int_or_none(representation_attrib.get('height')), @@ -1682,14 +1685,6 @@ class InfoExtractor(object):                                  if 'total_number' not in representation_ms_info and 'segment_duration':                                      segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])                                      representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration)) -                                representation_ms_info['segment_urls'] = [ -                                    media_template % { -                                        'Number': segment_number, -                                        'Bandwidth': representation_attrib.get('bandwidth'), -                                    } -                                    for segment_number in range( -                                        representation_ms_info['start_number'], -                                        representation_ms_info['total_number'] + representation_ms_info['start_number'])]                                  representation_ms_info['fragments'] = [{                                      'url': media_template % {                                          'Number': segment_number, @@ -1703,7 +1698,6 @@ class InfoExtractor(object):                                  # $Number*$ or $Time$ in media template with S list available                                  # Example $Number*$: http://www.svtplay.se/klipp/9023742/stopptid-om-bjorn-borg                                  # Example $Time$: https://play.arkena.com/embed/avp/v2/player/media/b41dda37-d8e7-4d3f-b1b5-9a9db578bdfe/1/129411 -                                representation_ms_info['segment_urls'] = []                                  representation_ms_info['fragments'] = []                                  segment_time = 0                                  segment_d = None @@ -1715,7 +1709,6 @@ class InfoExtractor(object):                                          'Bandwidth': representation_attrib.get('bandwidth'),                                          'Number': segment_number,                                      } -                                    representation_ms_info['segment_urls'].append(segment_url)                                      representation_ms_info['fragments'].append({                                          'url': segment_url,                                          'duration': float_or_none(segment_d, representation_ms_info['timescale']), @@ -1745,17 +1738,15 @@ class InfoExtractor(object):                                          'duration': float_or_none(s['d'], representation_ms_info['timescale']),                                      })                              representation_ms_info['fragments'] = fragments -                        if 'segment_urls' in representation_ms_info: +                        # NB: MPD manifest may contain direct URLs to unfragmented media. +                        # No fragments key is present in this case. +                        if 'fragments' in representation_ms_info:                              f.update({ -                                'segment_urls': representation_ms_info['segment_urls'],                                  'fragments': [],                                  'protocol': 'http_dash_segments',                              })                              if 'initialization_url' in representation_ms_info:                                  initialization_url = representation_ms_info['initialization_url'].replace('$RepresentationID$', representation_id) -                                f.update({ -                                    'initialization_url': initialization_url, -                                })                                  if not f.get('url'):                                      f['url'] = initialization_url                                  f['fragments'].append({'url': initialization_url}) diff --git a/youtube_dl/extractor/generic.py b/youtube_dl/extractor/generic.py index 92a6e5146..c1792c534 100644 --- a/youtube_dl/extractor/generic.py +++ b/youtube_dl/extractor/generic.py @@ -1657,7 +1657,9 @@ class GenericIE(InfoExtractor):                  return self.playlist_result(self._parse_xspf(doc, video_id), video_id)              elif re.match(r'(?i)^(?:{[^}]+})?MPD$', doc.tag):                  info_dict['formats'] = self._parse_mpd_formats( -                    doc, video_id, mpd_base_url=url.rpartition('/')[0]) +                    doc, video_id, +                    mpd_base_url=full_response.geturl().rpartition('/')[0], +                    mpd_url=url)                  self._sort_formats(info_dict['formats'])                  return info_dict              elif re.match(r'^{http://ns\.adobe\.com/f4m/[12]\.0}manifest$', doc.tag): | 
