diff options
| author | Sergey M․ <dstftw@gmail.com> | 2021-04-17 00:07:32 +0700 | 
|---|---|---|
| committer | Sergey M․ <dstftw@gmail.com> | 2021-04-17 00:07:32 +0700 | 
| commit | 06159135ef148a6ddc632d0c89b90c937d5bb021 (patch) | |
| tree | 24623830102bafb3f6cf8c211d6a02e5a98d0e17 | |
| parent | 4fb25ff5a3be5206bb72e5c4046715b1529fb2c7 (diff) | |
[youtube] Improve URL to extractor routing (closes #27572, closes #28335, closes #28742)
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 35 | 
1 files changed, 23 insertions, 12 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 79e47c919..4d7f3f837 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -46,6 +46,10 @@ from ..utils import (  ) +def parse_qs(url): +    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) + +  class YoutubeBaseInfoExtractor(InfoExtractor):      """Provide base functions for Youtube extractors"""      _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' @@ -413,16 +417,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                           |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId=                           )                       )?                                                       # all until now is optional -> you can pass the naked ID -                     (?P<id>[0-9A-Za-z_-]{11})                                      # here is it! the YouTube video ID -                     (?!.*?\blist= -                        (?: -                            %(playlist_id)s|                                  # combined list/video URLs are handled by the playlist IE -                            WL                                                # WL are handled by the watch later IE -                        ) -                     ) +                     (?P<id>[0-9A-Za-z_-]{11})                                # here is it! the YouTube video ID                       (?(1).+)?                                                # if we found the ID, everything can follow                       $""" % { -        'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE,          'invidious': '|'.join(_INVIDIOUS_SITES),      }      _PLAYER_INFO_RE = ( @@ -1208,6 +1205,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},      } +    @classmethod +    def suitable(cls, url): +        qs = parse_qs(url) +        if qs.get('list', [None])[0]: +            return False +        return super(YoutubeIE, cls).suitable(url) +      def __init__(self, *args, **kwargs):          super(YoutubeIE, self).__init__(*args, **kwargs)          self._code_cache = {} @@ -2275,6 +2279,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):              'title': '#cctv9',          },          'playlist_mincount': 350, +    }, { +        'url': 'https://www.youtube.com/watch?list=PLW4dVinRY435CBE_JD3t-0SRXKfnZHS1P&feature=youtu.be&v=M9cJMXmQ_ZU', +        'only_matching': True,      }]      @classmethod @@ -2764,7 +2771,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):          url = compat_urlparse.urlunparse(              compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))          # Handle both video/playlist URLs -        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) +        qs = parse_qs(url)          video_id = qs.get('v', [None])[0]          playlist_id = qs.get('list', [None])[0]          if video_id and playlist_id: @@ -2860,12 +2867,16 @@ class YoutubePlaylistIE(InfoExtractor):      @classmethod      def suitable(cls, url): -        return False if YoutubeTabIE.suitable(url) else super( -            YoutubePlaylistIE, cls).suitable(url) +        if YoutubeTabIE.suitable(url): +            return False +        qs = parse_qs(url) +        if qs.get('v', [None])[0]: +            return False +        return super(YoutubePlaylistIE, cls).suitable(url)      def _real_extract(self, url):          playlist_id = self._match_id(url) -        qs = compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) +        qs = parse_qs(url)          if not qs:              qs = {'list': playlist_id}          return self.url_result(  | 
