aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/common.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/common.py')
-rw-r--r--youtube_dl/extractor/common.py32
1 files changed, 24 insertions, 8 deletions
diff --git a/youtube_dl/extractor/common.py b/youtube_dl/extractor/common.py
index 8b3f04c61..fec39da8b 100644
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -245,6 +245,10 @@ class InfoExtractor(object):
specified in the URL.
end_time: Time in seconds where the reproduction should end, as
specified in the URL.
+ chapters: A list of dictionaries, with the following entries:
+ * "start_time" - The start time of the chapter in seconds
+ * "end_time" - The end time of the chapter in seconds
+ * "title" (optional, string)
The following fields should only be used when the video belongs to some logical
chapter or section:
@@ -990,6 +994,7 @@ class InfoExtractor(object):
'tbr': int_or_none(e.get('bitrate')),
'width': int_or_none(e.get('width')),
'height': int_or_none(e.get('height')),
+ 'view_count': int_or_none(e.get('interactionCount')),
})
for e in json_ld:
@@ -1334,7 +1339,7 @@ class InfoExtractor(object):
if '#EXT-X-FAXS-CM:' in m3u8_doc: # Adobe Flash Access
return []
- formats = [self._m3u8_meta_format(m3u8_url, ext, preference, m3u8_id)]
+ formats = []
format_url = lambda u: (
u
@@ -1386,6 +1391,7 @@ class InfoExtractor(object):
f = {
'format_id': '-'.join(format_id),
'url': format_url(media_url),
+ 'manifest_url': m3u8_url,
'language': media.get('LANGUAGE'),
'ext': ext,
'protocol': entry_protocol,
@@ -1438,7 +1444,7 @@ class InfoExtractor(object):
f = {
'format_id': '-'.join(format_id),
'url': manifest_url,
- 'manifest_url': manifest_url,
+ 'manifest_url': m3u8_url,
'tbr': tbr,
'ext': ext,
'fps': float_or_none(last_stream_inf.get('FRAME-RATE')),
@@ -1995,6 +2001,12 @@ class InfoExtractor(object):
compat_etree_fromstring(ism.encode('utf-8')), urlh.geturl(), ism_id)
def _parse_ism_formats(self, ism_doc, ism_url, ism_id=None):
+ """
+ Parse formats from ISM manifest.
+ References:
+ 1. [MS-SSTR]: Smooth Streaming Protocol,
+ https://msdn.microsoft.com/en-us/library/ff469518.aspx
+ """
if ism_doc.get('IsLive') == 'TRUE' or ism_doc.find('Protection') is not None:
return []
@@ -2016,8 +2028,11 @@ class InfoExtractor(object):
self.report_warning('%s is not a supported codec' % fourcc)
continue
tbr = int(track.attrib['Bitrate']) // 1000
- width = int_or_none(track.get('MaxWidth'))
- height = int_or_none(track.get('MaxHeight'))
+ # [1] does not mention Width and Height attributes. However,
+ # they're often present while MaxWidth and MaxHeight are
+ # missing, so should be used as fallbacks
+ width = int_or_none(track.get('MaxWidth') or track.get('Width'))
+ height = int_or_none(track.get('MaxHeight') or track.get('Height'))
sampling_rate = int_or_none(track.get('SamplingRate'))
track_url_pattern = re.sub(r'{[Bb]itrate}', track.attrib['Bitrate'], url_pattern)
@@ -2168,7 +2183,7 @@ class InfoExtractor(object):
def _extract_akamai_formats(self, manifest_url, video_id, hosts={}):
formats = []
hdcore_sign = 'hdcore=3.7.0'
- f4m_url = re.sub(r'(https?://[^/+])/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
+ f4m_url = re.sub(r'(https?://[^/]+)/i/', r'\1/z/', manifest_url).replace('/master.m3u8', '/manifest.f4m')
hds_host = hosts.get('hds')
if hds_host:
f4m_url = re.sub(r'(https?://)[^/]+', r'\1' + hds_host, f4m_url)
@@ -2190,8 +2205,9 @@ class InfoExtractor(object):
def _extract_wowza_formats(self, url, video_id, m3u8_entry_protocol='m3u8_native', skip_protocols=[]):
url = re.sub(r'/(?:manifest|playlist|jwplayer)\.(?:m3u8|f4m|mpd|smil)', '', url)
- url_base = self._search_regex(r'(?:https?|rtmp|rtsp)(://[^?]+)', url, 'format url')
- http_base_url = 'http' + url_base
+ url_base = self._search_regex(
+ r'(?:(?:https?|rtmp|rtsp):)?(//[^?]+)', url, 'format url')
+ http_base_url = '%s:%s' % ('http', url_base)
formats = []
if 'm3u8' not in skip_protocols:
formats.extend(self._extract_m3u8_formats(
@@ -2225,7 +2241,7 @@ class InfoExtractor(object):
for protocol in ('rtmp', 'rtsp'):
if protocol not in skip_protocols:
formats.append({
- 'url': protocol + url_base,
+ 'url': '%s:%s' % (protocol, url_base),
'format_id': protocol,
'protocol': protocol,
})