diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
-rw-r--r-- | youtube_dl/extractor/youtube.py | 50 |
1 files changed, 35 insertions, 15 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 64386f34a..92b9f3ae4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -32,6 +32,7 @@ from ..utils import ( get_element_by_attribute, get_element_by_id, int_or_none, + mimetype2ext, orderedSet, parse_duration, remove_quotes, @@ -613,7 +614,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'params': { 'skip_download': 'requires avconv', - } + }, + 'skip': 'This live event has ended.', }, # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097) { @@ -706,6 +708,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, { # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468) + # Also tests cut-off URL expansion in video description (see + # https://github.com/rg3/youtube-dl/issues/1892, + # https://github.com/rg3/youtube-dl/issues/8164) 'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg', 'info_dict': { 'id': 'lsguqyKfVQg', @@ -960,6 +965,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): try: args = player_config['args'] caption_url = args['ttsurl'] + if not caption_url: + self._downloader.report_warning(err_msg) + return {} timestamp = args['timestamp'] # We get the available subtitles list_params = compat_urllib_parse.urlencode({ @@ -1083,9 +1091,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): full_info.update(f) codecs = r.attrib.get('codecs') if codecs: - if full_info.get('acodec') == 'none' and 'vcodec' not in full_info: + if full_info.get('acodec') == 'none': full_info['vcodec'] = codecs - elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info: + elif full_info.get('vcodec') == 'none': full_info['acodec'] = codecs formats.append(full_info) else: @@ -1235,10 +1243,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor): video_description = re.sub(r'''(?x) <a\s+ (?:[a-zA-Z-]+="[^"]+"\s+)*? - title="([^"]+)"\s+ + (?:title|href)="([^"]+)"\s+ (?:[a-zA-Z-]+="[^"]+"\s+)*? - class="yt-uix-redirect-link"\s*> - [^<]+ + class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*> + [^<]+\.{3}\s* </a> ''', r'\1', video_description) video_description = clean_html(video_description) @@ -1454,15 +1462,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if 'ratebypass' not in url: url += '&ratebypass=yes' + dct = { + 'format_id': format_id, + 'url': url, + 'player_url': player_url, + } + if format_id in self._formats: + dct.update(self._formats[format_id]) + # Some itags are not included in DASH manifest thus corresponding formats will # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993). # Trying to extract metadata from url_encoded_fmt_stream_map entry. mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0]) width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) - dct = { - 'format_id': format_id, - 'url': url, - 'player_url': player_url, + + more_fields = { 'filesize': int_or_none(url_data.get('clen', [None])[0]), 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000), 'width': width, @@ -1470,13 +1484,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'fps': int_or_none(url_data.get('fps', [None])[0]), 'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0], } + for key, value in more_fields.items(): + if value: + dct[key] = value type_ = url_data.get('type', [None])[0] if type_: type_split = type_.split(';') kind_ext = type_split[0].split('/') if len(kind_ext) == 2: - kind, ext = kind_ext - dct['ext'] = ext + kind, _ = kind_ext + dct['ext'] = mimetype2ext(type_split[0]) if kind in ('audio', 'video'): codecs = None for mobj in re.finditer( @@ -1487,15 +1504,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): if codecs: codecs = codecs.split(',') if len(codecs) == 2: - acodec, vcodec = codecs[0], codecs[1] + acodec, vcodec = codecs[1], codecs[0] else: acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0]) dct.update({ 'acodec': acodec, 'vcodec': vcodec, }) - if format_id in self._formats: - dct.update(self._formats[format_id]) formats.append(dct) elif video_info.get('hlsvp'): manifest_url = video_info['hlsvp'][0] @@ -1505,6 +1520,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): for a_format in formats: a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' else: + unavailable_message = self._html_search_regex( + r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>', + video_webpage, 'unavailable message', default=None) + if unavailable_message: + raise ExtractorError(unavailable_message, expected=True) raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info') # Look for the DASH manifest |