diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 59 | 
1 files changed, 11 insertions, 48 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 8c7842ee8..f0efaf0d9 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -256,7 +256,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '135': {'ext': 'mp4', 'height': 480, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '136': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '137': {'ext': 'mp4', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, -        '138': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40}, +        '138': {'ext': 'mp4', 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},  # Height can vary (https://github.com/rg3/youtube-dl/issues/4559)          '160': {'ext': 'mp4', 'height': 144, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '264': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '298': {'ext': 'mp4', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'h264'}, @@ -287,7 +287,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):          '272': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40},          '302': {'ext': 'webm', 'height': 720, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          '303': {'ext': 'webm', 'height': 1080, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'}, +        '308': {'ext': 'webm', 'height': 1440, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          '313': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'vcodec': 'VP9'}, +        '315': {'ext': 'webm', 'height': 2160, 'format_note': 'DASH video', 'acodec': 'none', 'preference': -40, 'fps': 60, 'vcodec': 'VP9'},          # Dash webm audio          '171': {'ext': 'webm', 'vcodec': 'none', 'format_note': 'DASH audio', 'abr': 128, 'preference': -50}, @@ -736,6 +738,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                  'format_id': format_id,                  'url': video_url,                  'width': int_or_none(r.attrib.get('width')), +                'height': int_or_none(r.attrib.get('height')),                  'tbr': int_or_none(r.attrib.get('bandwidth'), 1000),                  'asr': int_or_none(r.attrib.get('audioSamplingRate')),                  'filesize': filesize, @@ -746,7 +749,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                      fo for fo in formats                      if fo['format_id'] == format_id)              except StopIteration: -                f.update(self._formats.get(format_id, {})) +                f.update(self._formats.get(format_id, {}).items())                  formats.append(f)              else:                  existing_format.update(f) @@ -1040,6 +1043,12 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):                      self.report_warning(                          'Skipping DASH manifest: %r' % e, video_id)                  else: +                    # Hide the formats we found through non-DASH +                    dash_keys = set(df['format_id'] for df in dash_formats) +                    for f in formats: +                        if f['format_id'] in dash_keys: +                            f['format_id'] = 'nondash-%s' % f['format_id'] +                            f['preference'] = f.get('preference', 0) - 10000                      formats.extend(dash_formats)          self._sort_formats(formats) @@ -1199,9 +1208,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          if playlist_id.startswith('RD'):              # Mixes require a custom extraction process              return self._extract_mix(playlist_id) -        if playlist_id.startswith('TL'): -            raise ExtractorError('For downloading YouTube.com top lists, use ' -                                 'the "yttoplist" keyword, for example "youtube-dl \'yttoplist:music:Top Tracks\'"', expected=True)          url = self._TEMPLATE_URL % playlist_id          page = self._download_webpage(url, playlist_id) @@ -1247,49 +1253,6 @@ class YoutubePlaylistIE(YoutubeBaseInfoExtractor):          return self.playlist_result(url_results, playlist_id, playlist_title) -class YoutubeTopListIE(YoutubePlaylistIE): -    IE_NAME = 'youtube:toplist' -    IE_DESC = ('YouTube.com top lists, "yttoplist:{channel}:{list title}"' -               ' (Example: "yttoplist:music:Top Tracks")') -    _VALID_URL = r'yttoplist:(?P<chann>.*?):(?P<title>.*?)$' -    _TESTS = [{ -        'url': 'yttoplist:music:Trending', -        'playlist_mincount': 5, -        'skip': 'Only works for logged-in users', -    }] - -    def _real_extract(self, url): -        mobj = re.match(self._VALID_URL, url) -        channel = mobj.group('chann') -        title = mobj.group('title') -        query = compat_urllib_parse.urlencode({'title': title}) -        channel_page = self._download_webpage( -            'https://www.youtube.com/%s' % channel, title) -        link = self._html_search_regex( -            r'''(?x) -                <a\s+href="([^"]+)".*?>\s* -                <span\s+class="branded-page-module-title-text">\s* -                <span[^>]*>.*?%s.*?</span>''' % re.escape(query), -            channel_page, 'list') -        url = compat_urlparse.urljoin('https://www.youtube.com/', link) - -        video_re = r'data-index="\d+".*?data-video-id="([0-9A-Za-z_-]{11})"' -        ids = [] -        # sometimes the webpage doesn't contain the videos -        # retry until we get them -        for i in itertools.count(0): -            msg = 'Downloading Youtube mix' -            if i > 0: -                msg += ', retry #%d' % i - -            webpage = self._download_webpage(url, title, msg) -            ids = orderedSet(re.findall(video_re, webpage)) -            if ids: -                break -        url_results = self._ids_to_results(ids) -        return self.playlist_result(url_results, playlist_title=title) - -  class YoutubeChannelIE(InfoExtractor):      IE_DESC = 'YouTube.com channels'      _VALID_URL = r'https?://(?:youtu\.be|(?:\w+\.)?youtube(?:-nocookie)?\.com)/channel/(?P<id>[0-9A-Za-z_-]+)'  | 
