aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-08-05 06:57:19 +0700
committerSergey M․ <dstftw@gmail.com>2017-08-05 07:40:29 +0700
commit1141e9104bc0f8d577f18cf28a1af58adea1248e (patch)
treedcabe19442a08b0a2f11bb2766c8bc0409394a99
parent8519b88f67de9c0c11cd2edd8dc55b9a4f13d110 (diff)
downloadyoutube-dl-1141e9104bc0f8d577f18cf28a1af58adea1248e.tar.xz
Use relative paths for DASH fragments (closes #12990)
10x reduced JSON size refs #13810
-rw-r--r--youtube_dl/downloader/dash.py14
-rw-r--r--youtube_dl/extractor/common.py16
2 files changed, 20 insertions, 10 deletions
diff --git a/youtube_dl/downloader/dash.py b/youtube_dl/downloader/dash.py
index 7491fdad8..576ece6db 100644
--- a/youtube_dl/downloader/dash.py
+++ b/youtube_dl/downloader/dash.py
@@ -2,6 +2,7 @@ from __future__ import unicode_literals
from .fragment import FragmentFD
from ..compat import compat_urllib_error
+from ..utils import urljoin
class DashSegmentsFD(FragmentFD):
@@ -12,12 +13,13 @@ class DashSegmentsFD(FragmentFD):
FD_NAME = 'dashsegments'
def real_download(self, filename, info_dict):
- segments = info_dict['fragments'][:1] if self.params.get(
+ fragment_base_url = info_dict.get('fragment_base_url')
+ fragments = info_dict['fragments'][:1] if self.params.get(
'test', False) else info_dict['fragments']
ctx = {
'filename': filename,
- 'total_frags': len(segments),
+ 'total_frags': len(fragments),
}
self._prepare_and_start_frag_download(ctx)
@@ -26,7 +28,7 @@ class DashSegmentsFD(FragmentFD):
skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True)
frag_index = 0
- for i, segment in enumerate(segments):
+ for i, fragment in enumerate(fragments):
frag_index += 1
if frag_index <= ctx['fragment_index']:
continue
@@ -36,7 +38,11 @@ class DashSegmentsFD(FragmentFD):
count = 0
while count <= fragment_retries:
try:
- success, frag_content = self._download_fragment(ctx, segment['url'], info_dict)
+ fragment_url = fragment.get('url')
+ if not fragment_url:
+ assert fragment_base_url
+ fragment_url = urljoin(fragment_base_url, fragment['path'])
+ success, frag_content = self._download_fragment(ctx, fragment_url, info_dict)
if not success:
return False
self._append_fragment(ctx, frag_content)
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 748b4d59f..459e7ffd6 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -1892,9 +1892,13 @@ class InfoExtractor(object):
'Bandwidth': bandwidth,
}
+ def location_key(location):
+ return 'url' if re.match(r'^https?://', location) else 'path'
+
if 'segment_urls' not in representation_ms_info and 'media' in representation_ms_info:
media_template = prepare_template('media', ('Number', 'Bandwidth', 'Time'))
+ media_location_key = location_key(media_template)
# As per [1, 5.3.9.4.4, Table 16, page 55] $Number$ and $Time$
# can't be used at the same time
@@ -1904,7 +1908,7 @@ class InfoExtractor(object):
segment_duration = float_or_none(representation_ms_info['segment_duration'], representation_ms_info['timescale'])
representation_ms_info['total_number'] = int(math.ceil(float(period_duration) / segment_duration))
representation_ms_info['fragments'] = [{
- 'url': media_template % {
+ media_location_key: media_template % {
'Number': segment_number,
'Bandwidth': bandwidth,
},
@@ -1928,7 +1932,7 @@ class InfoExtractor(object):
'Number': segment_number,
}
representation_ms_info['fragments'].append({
- 'url': segment_url,
+ media_location_key: segment_url,
'duration': float_or_none(segment_d, representation_ms_info['timescale']),
})
@@ -1952,8 +1956,9 @@ class InfoExtractor(object):
for s in representation_ms_info['s']:
duration = float_or_none(s['d'], timescale)
for r in range(s.get('r', 0) + 1):
+ segment_uri = representation_ms_info['segment_urls'][segment_index]
fragments.append({
- 'url': representation_ms_info['segment_urls'][segment_index],
+ location_key(segment_uri): segment_uri,
'duration': duration,
})
segment_index += 1
@@ -1962,6 +1967,7 @@ class InfoExtractor(object):
# No fragments key is present in this case.
if 'fragments' in representation_ms_info:
f.update({
+ 'fragment_base_url': base_url,
'fragments': [],
'protocol': 'http_dash_segments',
})
@@ -1969,10 +1975,8 @@ class InfoExtractor(object):
initialization_url = representation_ms_info['initialization_url']
if not f.get('url'):
f['url'] = initialization_url
- f['fragments'].append({'url': initialization_url})
+ f['fragments'].append({location_key(initialization_url): initialization_url})
f['fragments'].extend(representation_ms_info['fragments'])
- for fragment in f['fragments']:
- fragment['url'] = urljoin(base_url, fragment['url'])
try:
existing_format = next(
fo for fo in formats