diff options
| -rw-r--r-- | youtube_dl/extractor/common.py | 25 | 
1 files changed, 17 insertions, 8 deletions
| diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py index dfd0584d3..c291bc1df 100644 --- a/youtube_dl/extractor/common.py +++ b/youtube_dl/extractor/common.py @@ -108,10 +108,13 @@ class InfoExtractor(object):                                     for RTMP - RTMP URL,                                     for HLS - URL of the M3U8 media playlist,                                     for HDS - URL of the F4M manifest, -                                   for DASH - URL of the MPD manifest or -                                              base URL representing the media -                                              if MPD manifest is parsed from -                                              a string, +                                   for DASH +                                     - HTTP URL to plain file media (in case of +                                       unfragmented media) +                                     - URL of the MPD manifest or base URL +                                       representing the media if MPD manifest +                                       is parsed froma string (in case of +                                       fragmented media)                                     for MSS - URL of the ISM manifest.                      * manifest_url                                   The URL of the manifest file in case of @@ -2137,8 +2140,6 @@ class InfoExtractor(object):                          bandwidth = int_or_none(representation_attrib.get('bandwidth'))                          f = {                              'format_id': '%s-%s' % (mpd_id, representation_id) if mpd_id else representation_id, -                            # NB: mpd_url may be empty when MPD manifest is parsed from a string -                            'url': mpd_url or base_url,                              'manifest_url': mpd_url,                              'ext': mimetype2ext(mime_type),                              'width': int_or_none(representation_attrib.get('width')), @@ -2277,10 +2278,14 @@ class InfoExtractor(object):                                      fragment['duration'] = segment_duration                                  fragments.append(fragment)                              representation_ms_info['fragments'] = fragments -                        # NB: MPD manifest may contain direct URLs to unfragmented media. -                        # No fragments key is present in this case. +                        # If there is a fragments key available then we correctly recognized fragmented media. +                        # Otherwise we will assume unfragmented media with direct access. Technically, such +                        # assumption is not necessarily correct since we may simply have no support for +                        # some forms of fragmented media renditions yet, but for now we'll use this fallback.                          if 'fragments' in representation_ms_info:                              f.update({ +                                # NB: mpd_url may be empty when MPD manifest is parsed from a string +                                'url': mpd_url or base_url,                                  'fragment_base_url': base_url,                                  'fragments': [],                                  'protocol': 'http_dash_segments', @@ -2291,6 +2296,10 @@ class InfoExtractor(object):                                      f['url'] = initialization_url                                  f['fragments'].append({location_key(initialization_url): initialization_url})                              f['fragments'].extend(representation_ms_info['fragments']) +                        else: +                            # Assuming direct URL to unfragmented media. +                            f['url'] = base_url +                          # According to [1, 5.3.5.2, Table 7, page 35] @id of Representation                          # is not necessarily unique within a Period thus formats with                          # the same `format_id` are quite possible. There are numerous examples | 
