aboutsummaryrefslogtreecommitdiff
path: root/youtube_dl/extractor/youtube.py
diff options
context:
space:
mode:
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r--youtube_dl/extractor/youtube.py50
1 files changed, 35 insertions, 15 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py
index 64386f34a..92b9f3ae4 100644
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -32,6 +32,7 @@ from ..utils import (
get_element_by_attribute,
get_element_by_id,
int_or_none,
+ mimetype2ext,
orderedSet,
parse_duration,
remove_quotes,
@@ -613,7 +614,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
'params': {
'skip_download': 'requires avconv',
- }
+ },
+ 'skip': 'This live event has ended.',
},
# Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)
{
@@ -706,6 +708,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
},
{
# Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468)
+ # Also tests cut-off URL expansion in video description (see
+ # https://github.com/rg3/youtube-dl/issues/1892,
+ # https://github.com/rg3/youtube-dl/issues/8164)
'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',
'info_dict': {
'id': 'lsguqyKfVQg',
@@ -960,6 +965,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
try:
args = player_config['args']
caption_url = args['ttsurl']
+ if not caption_url:
+ self._downloader.report_warning(err_msg)
+ return {}
timestamp = args['timestamp']
# We get the available subtitles
list_params = compat_urllib_parse.urlencode({
@@ -1083,9 +1091,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
full_info.update(f)
codecs = r.attrib.get('codecs')
if codecs:
- if full_info.get('acodec') == 'none' and 'vcodec' not in full_info:
+ if full_info.get('acodec') == 'none':
full_info['vcodec'] = codecs
- elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info:
+ elif full_info.get('vcodec') == 'none':
full_info['acodec'] = codecs
formats.append(full_info)
else:
@@ -1235,10 +1243,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
video_description = re.sub(r'''(?x)
<a\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
- title="([^"]+)"\s+
+ (?:title|href)="([^"]+)"\s+
(?:[a-zA-Z-]+="[^"]+"\s+)*?
- class="yt-uix-redirect-link"\s*>
- [^<]+
+ class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*>
+ [^<]+\.{3}\s*
</a>
''', r'\1', video_description)
video_description = clean_html(video_description)
@@ -1454,15 +1462,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if 'ratebypass' not in url:
url += '&ratebypass=yes'
+ dct = {
+ 'format_id': format_id,
+ 'url': url,
+ 'player_url': player_url,
+ }
+ if format_id in self._formats:
+ dct.update(self._formats[format_id])
+
# Some itags are not included in DASH manifest thus corresponding formats will
# lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).
# Trying to extract metadata from url_encoded_fmt_stream_map entry.
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
- dct = {
- 'format_id': format_id,
- 'url': url,
- 'player_url': player_url,
+
+ more_fields = {
'filesize': int_or_none(url_data.get('clen', [None])[0]),
'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
'width': width,
@@ -1470,13 +1484,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'fps': int_or_none(url_data.get('fps', [None])[0]),
'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],
}
+ for key, value in more_fields.items():
+ if value:
+ dct[key] = value
type_ = url_data.get('type', [None])[0]
if type_:
type_split = type_.split(';')
kind_ext = type_split[0].split('/')
if len(kind_ext) == 2:
- kind, ext = kind_ext
- dct['ext'] = ext
+ kind, _ = kind_ext
+ dct['ext'] = mimetype2ext(type_split[0])
if kind in ('audio', 'video'):
codecs = None
for mobj in re.finditer(
@@ -1487,15 +1504,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if codecs:
codecs = codecs.split(',')
if len(codecs) == 2:
- acodec, vcodec = codecs[0], codecs[1]
+ acodec, vcodec = codecs[1], codecs[0]
else:
acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])
dct.update({
'acodec': acodec,
'vcodec': vcodec,
})
- if format_id in self._formats:
- dct.update(self._formats[format_id])
formats.append(dct)
elif video_info.get('hlsvp'):
manifest_url = video_info['hlsvp'][0]
@@ -1505,6 +1520,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
for a_format in formats:
a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'
else:
+ unavailable_message = self._html_search_regex(
+ r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>',
+ video_webpage, 'unavailable message', default=None)
+ if unavailable_message:
+ raise ExtractorError(unavailable_message, expected=True)
raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')
# Look for the DASH manifest