diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 50 | 
1 files changed, 35 insertions, 15 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 64386f34a..92b9f3ae4 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -32,6 +32,7 @@ from ..utils import (      get_element_by_attribute,      get_element_by_id,      int_or_none, +    mimetype2ext,      orderedSet,      parse_duration,      remove_quotes, @@ -613,7 +614,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              },              'params': {                  'skip_download': 'requires avconv', -            } +            }, +            'skip': 'This live event has ended.',          },          # Extraction from multiple DASH manifests (https://github.com/rg3/youtube-dl/pull/6097)          { @@ -706,6 +708,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          },          {              # Title with JS-like syntax "};" (see https://github.com/rg3/youtube-dl/issues/7468) +            # Also tests cut-off URL expansion in video description (see +            # https://github.com/rg3/youtube-dl/issues/1892, +            # https://github.com/rg3/youtube-dl/issues/8164)              'url': 'https://www.youtube.com/watch?v=lsguqyKfVQg',              'info_dict': {                  'id': 'lsguqyKfVQg', @@ -960,6 +965,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          try:              args = player_config['args']              caption_url = args['ttsurl'] +            if not caption_url: +                self._downloader.report_warning(err_msg) +                return {}              timestamp = args['timestamp']              # We get the available subtitles              list_params = compat_urllib_parse.urlencode({ @@ -1083,9 +1091,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                          full_info.update(f)                          codecs = r.attrib.get('codecs')                          if codecs: -                            if full_info.get('acodec') == 'none' and 'vcodec' not in full_info: +                            if full_info.get('acodec') == 'none':                                  full_info['vcodec'] = codecs -                            elif full_info.get('vcodec') == 'none' and 'acodec' not in full_info: +                            elif full_info.get('vcodec') == 'none':                                  full_info['acodec'] = codecs                          formats.append(full_info)                      else: @@ -1235,10 +1243,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              video_description = re.sub(r'''(?x)                  <a\s+                      (?:[a-zA-Z-]+="[^"]+"\s+)*? -                    title="([^"]+)"\s+ +                    (?:title|href)="([^"]+)"\s+                      (?:[a-zA-Z-]+="[^"]+"\s+)*? -                    class="yt-uix-redirect-link"\s*> -                [^<]+ +                    class="(?:yt-uix-redirect-link|yt-uix-sessionlink[^"]*)"[^>]*> +                [^<]+\.{3}\s*                  </a>              ''', r'\1', video_description)              video_description = clean_html(video_description) @@ -1454,15 +1462,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  if 'ratebypass' not in url:                      url += '&ratebypass=yes' +                dct = { +                    'format_id': format_id, +                    'url': url, +                    'player_url': player_url, +                } +                if format_id in self._formats: +                    dct.update(self._formats[format_id]) +                  # Some itags are not included in DASH manifest thus corresponding formats will                  # lack metadata (see https://github.com/rg3/youtube-dl/pull/5993).                  # Trying to extract metadata from url_encoded_fmt_stream_map entry.                  mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])                  width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None) -                dct = { -                    'format_id': format_id, -                    'url': url, -                    'player_url': player_url, + +                more_fields = {                      'filesize': int_or_none(url_data.get('clen', [None])[0]),                      'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),                      'width': width, @@ -1470,13 +1484,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                      'fps': int_or_none(url_data.get('fps', [None])[0]),                      'format_note': url_data.get('quality_label', [None])[0] or url_data.get('quality', [None])[0],                  } +                for key, value in more_fields.items(): +                    if value: +                        dct[key] = value                  type_ = url_data.get('type', [None])[0]                  if type_:                      type_split = type_.split(';')                      kind_ext = type_split[0].split('/')                      if len(kind_ext) == 2: -                        kind, ext = kind_ext -                        dct['ext'] = ext +                        kind, _ = kind_ext +                        dct['ext'] = mimetype2ext(type_split[0])                          if kind in ('audio', 'video'):                              codecs = None                              for mobj in re.finditer( @@ -1487,15 +1504,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                              if codecs:                                  codecs = codecs.split(',')                                  if len(codecs) == 2: -                                    acodec, vcodec = codecs[0], codecs[1] +                                    acodec, vcodec = codecs[1], codecs[0]                                  else:                                      acodec, vcodec = (codecs[0], 'none') if kind == 'audio' else ('none', codecs[0])                                  dct.update({                                      'acodec': acodec,                                      'vcodec': vcodec,                                  }) -                if format_id in self._formats: -                    dct.update(self._formats[format_id])                  formats.append(dct)          elif video_info.get('hlsvp'):              manifest_url = video_info['hlsvp'][0] @@ -1505,6 +1520,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              for a_format in formats:                  a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True'          else: +            unavailable_message = self._html_search_regex( +                r'(?s)<h1[^>]+id="unavailable-message"[^>]*>(.+?)</h1>', +                video_webpage, 'unavailable message', default=None) +            if unavailable_message: +                raise ExtractorError(unavailable_message, expected=True)              raise ExtractorError('no conn, hlsvp or url_encoded_fmt_stream_map information found in video info')          # Look for the DASH manifest | 
