diff options
Diffstat (limited to 'youtube_dl/extractor/youtube.py')
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 61 | 
1 files changed, 23 insertions, 38 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 6c1cfe7f2..6c70a98d1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -14,12 +14,11 @@ from ..compat import (      compat_chr,      compat_HTTPError,      compat_map as map, -    compat_parse_qs,      compat_str, +    compat_urllib_parse, +    compat_urllib_parse_parse_qs as compat_parse_qs,      compat_urllib_parse_unquote_plus, -    compat_urllib_parse_urlencode,      compat_urllib_parse_urlparse, -    compat_urlparse,  )  from ..jsinterp import JSInterpreter  from ..utils import ( @@ -33,6 +32,7 @@ from ..utils import (      mimetype2ext,      parse_codecs,      parse_duration, +    parse_qs,      qualities,      remove_start,      smuggle_url, @@ -50,10 +50,6 @@ from ..utils import (  ) -def parse_qs(url): -    return compat_urlparse.parse_qs(compat_urlparse.urlparse(url).query) - -  class YoutubeBaseInfoExtractor(InfoExtractor):      """Provide base functions for Youtube extractors"""      _LOGIN_URL = 'https://accounts.google.com/ServiceLogin' @@ -636,6 +632,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):                  'description': r're:(?s).{100,}About the Game\n.*?The Witcher 3: Wild Hunt.{100,}',                  'duration': 142,                  'uploader': 'The Witcher', +                'uploader_id': 'WitcherGame', +                'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/WitcherGame',                  'upload_date': '20140605',                  'thumbnail': 'https://i.ytimg.com/vi/HtVdAasjOgU/maxresdefault.jpg',                  'age_limit': 18, @@ -671,7 +669,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              },          },          { -            'note': 'Age-gated video embedable only with clientScreen=EMBED', +            'note': 'Age-gated video embeddable only with clientScreen=EMBED',              'url': 'https://youtube.com/watch?v=Tq92D6wQ1mg',              'info_dict': {                  'id': 'Tq92D6wQ1mg', @@ -1392,11 +1390,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):      @classmethod      def suitable(cls, url): -        # Hack for lazy extractors until more generic solution is implemented -        # (see #28780) -        from .youtube import parse_qs -        qs = parse_qs(url) -        if qs.get('list', [None])[0]: +        if parse_qs(url).get('list', [None])[0]:              return False          return super(YoutubeIE, cls).suitable(url) @@ -1546,7 +1540,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          if player_url.startswith('//'):              player_url = 'https:' + player_url          elif not re.match(r'https?://', player_url): -            player_url = compat_urlparse.urljoin( +            player_url = compat_urllib_parse.urljoin(                  'https://www.youtube.com', player_url)          return player_url @@ -1628,9 +1622,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):      def _unthrottle_format_urls(self, video_id, player_url, formats):          for fmt in formats: -            parsed_fmt_url = compat_urlparse.urlparse(fmt['url']) -            qs = compat_urlparse.parse_qs(parsed_fmt_url.query) -            n_param = qs.get('n') +            parsed_fmt_url = compat_urllib_parse.urlparse(fmt['url']) +            n_param = compat_parse_qs(parsed_fmt_url.query).get('n')              if not n_param:                  continue              n_param = n_param[-1] @@ -1638,9 +1631,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              if n_response is None:                  # give up if descrambling failed                  break -            qs['n'] = [n_response] -            fmt['url'] = compat_urlparse.urlunparse( -                parsed_fmt_url._replace(query=compat_urllib_parse_urlencode(qs, True))) +            fmt['url'] = update_url( +                parsed_fmt_url, query_update={'n': [n_response]})      # from yt-dlp, with tweaks      def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False): @@ -1669,20 +1661,17 @@ class YoutubeIE(YoutubeBaseInfoExtractor):              lambda x: x['playbackTracking']['videostatsPlaybackUrl']['baseUrl']))          if not playback_url:              return -        parsed_playback_url = compat_urlparse.urlparse(playback_url) -        qs = compat_urlparse.parse_qs(parsed_playback_url.query)          # cpn generation algorithm is reverse engineered from base.js.          # In fact it works even with dummy cpn.          CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'          cpn = ''.join((CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16))) -        qs.update({ -            'ver': ['2'], -            'cpn': [cpn], -        }) -        playback_url = compat_urlparse.urlunparse( -            parsed_playback_url._replace(query=compat_urllib_parse_urlencode(qs, True))) +        playback_url = update_url( +            playback_url, query_update={ +                'ver': ['2'], +                'cpn': [cpn], +            })          self._download_webpage(              playback_url, video_id, 'Marking watched', @@ -2075,9 +2064,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):          thumbnails = []          for container in (video_details, microformat): -            for thumbnail in (try_get( +            for thumbnail in try_get(                      container, -                    lambda x: x['thumbnail']['thumbnails'], list) or []): +                    lambda x: x['thumbnail']['thumbnails'], list) or []:                  thumbnail_url = url_or_none(thumbnail.get('url'))                  if not thumbnail_url:                      continue @@ -3287,11 +3276,7 @@ class YoutubePlaylistIE(InfoExtractor):      def suitable(cls, url):          if YoutubeTabIE.suitable(url):              return False -        # Hack for lazy extractors until more generic solution is implemented -        # (see #28780) -        from .youtube import parse_qs -        qs = parse_qs(url) -        if qs.get('v', [None])[0]: +        if parse_qs(url).get('v', [None])[0]:              return False          return super(YoutubePlaylistIE, cls).suitable(url) @@ -3430,9 +3415,9 @@ class YoutubeSearchURLIE(YoutubeBaseInfoExtractor):      }]      def _real_extract(self, url): -        qs = compat_parse_qs(compat_urllib_parse_urlparse(url).query) -        query = (qs.get('search_query') or qs.get('q'))[0] -        params = qs.get('sp', ('',))[0] +        qs = parse_qs(url) +        query = (qs.get('search_query') or qs.get('q'))[-1] +        params = qs.get('sp', ('',))[-1]          return self.playlist_result(self._search_results(query, params), query, query)  | 
