diff options
| author | Sergey M․ <dstftw@gmail.com> | 2016-07-24 10:27:16 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2016-07-24 10:27:16 +0700 | 
| commit | f09483485728871286f2670c8b8d62f56a89b1e1 (patch) | |
| tree | 8327d10e3106999bf8c194ec7ed45d0034566585 /youtube_dl/extractor/common.py | |
| parent | 111de00289d8c019764e79247568248f8a4b11f6 (diff) | |
[extractor/common] Add support for $ in SegmentTemplate in MPD manifests
Diffstat (limited to 'youtube_dl/extractor/common.py')
| -rw-r--r-- | youtube_dl/extractor/common.py | 61 | 
1 files changed, 50 insertions, 11 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index 29544c1a8..b8a76e3cb 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -1481,6 +1481,13 @@ class InfoExtractor(object):              compat_etree_fromstring(mpd.encode('utf-8')), mpd_id, mpd_base_url, formats_dict=formats_dict)      def _parse_mpd_formats(self, mpd_doc, mpd_id=None, mpd_base_url='', formats_dict={}): +        """ +        Parse formats from MPD manifest. +        References: +         1. MPEG-DASH Standard, ISO/IEC 23009-1:2014(E), +            http://standards.iso.org/ittf/PubliclyAvailableStandards/c065274_ISO_IEC_23009-1_2014.zip +         2. https://en.wikipedia.org/wiki/Dynamic_Adaptive_Streaming_over_HTTP +        """          if mpd_doc.get('type') == 'dynamic':              return [] @@ -1513,8 +1520,16 @@ class InfoExtractor(object):                          s_e = segment_timeline.findall(_add_ns('S'))                          if s_e:                              ms_info['total_number'] = 0 +                            ms_info['s'] = []                              for s in s_e: -                                ms_info['total_number'] += 1 + int(s.get('r', '0')) +                                r = int(s.get('r', 0)) +                                ms_info['total_number'] += 1 + r +                                ms_info['s'].append({ +                                    't': int(s.get('t', 0)), +                                    # @d is mandatory (see [1, 5.3.9.6.2, Table 17, page 60]) +                                    'd': int(s.attrib['d']), +                                    'r': r, +                                })                      else:                          timescale = segment_template.get('timescale')                          if timescale: @@ -1551,7 +1566,7 @@ class InfoExtractor(object):                          continue                      representation_attrib = adaptation_set.attrib.copy()                      representation_attrib.update(representation.attrib) -                    # According to page 41 of ISO/IEC 29001-1:2014, @mimeType is mandatory +                    # According to [1, 5.3.7.2, Table 9, page 41], @mimeType is mandatory                      mime_type = representation_attrib['mimeType']                      content_type = mime_type.split('/')[0]                      if content_type == 'text': @@ -1595,16 +1610,40 @@ class InfoExtractor(object):                                  representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))                              media_template = representation_ms_info['media_template']                              media_template = media_template.replace('$RepresentationID$', representation_id) -                            media_template = re.sub(r'\$(Number|Bandwidth)\$', r'%(\1)d', media_template) -                            media_template = re.sub(r'\$(Number|Bandwidth)%([^$]+)\$', r'%(\1)\2', media_template) +                            media_template = re.sub(r'\$(Number|Bandwidth|Time)\$', r'%(\1)d', media_template) +                            media_template = re.sub(r'\$(Number|Bandwidth|Time)%([^$]+)\$', r'%(\1)\2', media_template)                              media_template.replace('$$', '$') -                            representation_ms_info['segment_urls'] = [ -                                media_template % { -                                    'Number': segment_number, -                                    'Bandwidth': representation_attrib.get('bandwidth')} -                                for segment_number in range( -                                    representation_ms_info['start_number'], -                                    representation_ms_info['total_number'] + representation_ms_info['start_number'])] + +                            # As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$ +                            # can't be used at the same time +                            if '%(Number' in media_template: +                                representation_ms_info['segment_urls'] = [ +                                    media_template % { +                                        'Number': segment_number, +                                        'Bandwidth': representation_attrib.get('bandwidth'), +                                    } +                                    for segment_number in range( +                                        representation_ms_info['start_number'], +                                        representation_ms_info['total_number'] + representation_ms_info['start_number'])] +                            else: +                                representation_ms_info['segment_urls'] = [] +                                segment_time = 0 + +                                def add_segment_url(): +                                    representation_ms_info['segment_urls'].append( +                                        media_template % { +                                            'Time': segment_time, +                                            'Bandwidth': representation_attrib.get('bandwidth'), +                                        } +                                    ) + +                                for num, s in enumerate(representation_ms_info['s']): +                                    segment_time = s.get('t') or segment_time +                                    add_segment_url() +                                    for r in range(s.get('r', 0)): +                                        segment_time += s['d'] +                                        add_segment_url() +                                    segment_time += s['d']                          if 'segment_urls' in representation_ms_info:                              f.update({                                  'segment_urls': representation_ms_info['segment_urls'], | 
