aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey M․ <dstftw@gmail.com>2017-11-04 22:10:55 +0700
committerSergey M․ <dstftw@gmail.com>2017-11-04 22:10:55 +0700
commit48107c198bd76e611e3d4c2486cdc5403829a05a (patch)
treed9f68f3661d24e7a77996a5ba1f7b3792276b079
parentcd670befc4c823a38a88fffbaa6c493e539dd79d (diff)
downloadyoutube-dl-48107c198bd76e611e3d4c2486cdc5403829a05a.tar.xz
[f4m] Prefer baseURL for relative URLs (closes #14660)
-rw-r--r--youtube_dl/downloader/f4m.py25
-rw-r--r--youtube_dl/extractor/common.py14
2 files changed, 24 insertions, 15 deletions
diff --git a/youtube_dl/downloader/f4m.py b/youtube_dl/downloader/f4m.py
index c8fde9a89..fdb80f42a 100644
--- a/youtube_dl/downloader/f4m.py
+++ b/youtube_dl/downloader/f4m.py
@@ -243,8 +243,17 @@ def remove_encrypted_media(media):
media))
-def _add_ns(prop):
- return '{http://ns.adobe.com/f4m/1.0}%s' % prop
+def _add_ns(prop, ver=1):
+ return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop)
+
+
+def get_base_url(manifest):
+ base_url = xpath_text(
+ manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)],
+ 'base URL', default=None)
+ if base_url:
+ base_url = base_url.strip()
+ return base_url
class F4mFD(FragmentFD):
@@ -330,13 +339,13 @@ class F4mFD(FragmentFD):
rate, media = list(filter(
lambda f: int(f[0]) == requested_bitrate, formats))[0]
- base_url = compat_urlparse.urljoin(man_url, media.attrib['url'])
+ # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec.
+ man_base_url = get_base_url(doc) or man_url
+
+ base_url = compat_urlparse.urljoin(man_base_url, media.attrib['url'])
bootstrap_node = doc.find(_add_ns('bootstrapInfo'))
- # From Adobe F4M 3.0 spec:
- # The <baseURL> element SHALL be the base URL for all relative
- # (HTTP-based) URLs in the manifest. If <baseURL> is not present, said
- # URLs should be relative to the location of the containing document.
- boot_info, bootstrap_url = self._parse_bootstrap_node(bootstrap_node, man_url)
+ boot_info, bootstrap_url = self._parse_bootstrap_node(
+ bootstrap_node, man_base_url)
live = boot_info['live']
metadata_node = media.find(_add_ns('metadata'))
if metadata_node is not None:
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index a67ac4411..64fb869aa 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -29,7 +29,10 @@ from ..compat import (
compat_urlparse,
compat_xml_parse_error,
)
-from ..downloader.f4m import remove_encrypted_media
+from ..downloader.f4m import (
+ get_base_url,
+ remove_encrypted_media,
+)
from ..utils import (
NO_DEFAULT,
age_restricted,
@@ -1239,11 +1242,8 @@ class InfoExtractor(object):
media_nodes = remove_encrypted_media(media_nodes)
if not media_nodes:
return formats
- base_url = xpath_text(
- manifest, ['{http://ns.adobe.com/f4m/1.0}baseURL', '{http://ns.adobe.com/f4m/2.0}baseURL'],
- 'base URL', default=None)
- if base_url:
- base_url = base_url.strip()
+
+ manifest_base_url = get_base_url(manifest)
bootstrap_info = xpath_element(
manifest, ['{http://ns.adobe.com/f4m/1.0}bootstrapInfo', '{http://ns.adobe.com/f4m/2.0}bootstrapInfo'],
@@ -1275,7 +1275,7 @@ class InfoExtractor(object):
continue
manifest_url = (
media_url if media_url.startswith('http://') or media_url.startswith('https://')
- else ((base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
+ else ((manifest_base_url or '/'.join(manifest_url.split('/')[:-1])) + '/' + media_url))
# If media_url is itself a f4m manifest do the recursive extraction
# since bitrates in parent manifest (this one) and media_url manifest
# may differ leading to inability to resolve the format by requested