diff options
| author | dirkf <fieldhouse@gmx.net> | 2023-04-23 22:58:35 +0100 | 
|---|---|---|
| committer | dirkf <fieldhouse@gmx.net> | 2023-04-23 22:58:35 +0100 | 
| commit | 64d6dd64c8b7a35a87655d27fc83f2e98ef6ce13 (patch) | |
| tree | 032609a4726680751d5e90040c093a319fa11d46 | |
| parent | 211cbfd5d46025a8e4d8f9f3d424aaada4698974 (diff) | |
[YouTube] Support Releases tab
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 114 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 9 | 
2 files changed, 74 insertions, 49 deletions
| diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 80fff7ada..0411c49f1 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -31,6 +31,7 @@ from ..utils import (      extract_attributes,      get_element_by_attribute,      int_or_none, +    join_nonempty,      js_to_json,      LazyList,      merge_dicts, @@ -45,6 +46,7 @@ from ..utils import (      str_to_int,      traverse_obj,      try_get, +    txt_or_none,      unescapeHTML,      unified_strdate,      unsmuggle_url, @@ -2609,6 +2611,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):          },          'playlist_mincount': 75,      }, { +        # Releases tab +        'url': 'https://www.youtube.com/@daftpunk/releases', +        'info_dict': { +            'id': 'UC_kRDKYrUlrbtrSiyu5Tflg', +            'title': 'Daft Punk - Releases', +            'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel', +            'uploader_id': '@daftpunk', +            'uploader': 'Daft Punk', +        }, +        'playlist_mincount': 36, +    }, {          'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',          'only_matching': True,      }, { @@ -2822,6 +2835,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):                  continue              return renderer +    @staticmethod +    def _get_text(r, k): +        return traverse_obj( +            r, (k, 'runs', 0, 'text'), (k, 'simpleText'), +            expected_type=txt_or_none) +      def _grid_entries(self, grid_renderer):          for item in grid_renderer['items']:              if not isinstance(item, dict): @@ -2829,9 +2848,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):              renderer = self._extract_grid_item_renderer(item)              if not isinstance(renderer, dict):                  continue -            title = try_get( -                renderer, (lambda x: x['title']['runs'][0]['text'], -                           lambda x: x['title']['simpleText']), compat_str) +            title = self._get_text(renderer, 'title')              # playlist              playlist_id = renderer.get('playlistId')              if playlist_id: @@ -2848,8 +2865,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):              # channel              channel_id = renderer.get('channelId')              if channel_id: -                title = try_get( -                    renderer, lambda x: x['title']['simpleText'], compat_str) +                title = self._get_text(renderer, 'title')                  yield self.url_result(                      'https://www.youtube.com/channel/%s' % channel_id,                      ie=YoutubeTabIE.ie_key(), video_title=title) @@ -2958,15 +2974,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):      def _rich_grid_entries(self, contents):          for content in contents: -            video_renderer = try_get( -                content, -                (lambda x: x['richItemRenderer']['content']['videoRenderer'], -                 lambda x: x['richItemRenderer']['content']['reelItemRenderer']), -                dict) +            content = traverse_obj( +                content, ('richItemRenderer', 'content'), +                expected_type=dict) or {} +            video_renderer = traverse_obj( +                content, 'videoRenderer', 'reelItemRenderer', +                expected_type=dict)              if video_renderer:                  entry = self._video_entry(video_renderer)                  if entry:                      yield entry +            # playlist +            renderer = traverse_obj( +                content, 'playlistRenderer', expected_type=dict) or {} +            title = self._get_text(renderer, 'title') +            playlist_id = renderer.get('playlistId') +            if playlist_id: +                yield self.url_result( +                    'https://www.youtube.com/playlist?list=%s' % playlist_id, +                    ie=YoutubeTabIE.ie_key(), video_id=playlist_id, +                    video_title=title)      @staticmethod      def _build_continuation_query(continuation, ctp=None): @@ -3071,6 +3098,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):                  return              for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):                  yield entry +              continuation = self._extract_continuation(rich_grid_renderer)          ytcfg = self._extract_ytcfg(item_id, webpage) @@ -3213,50 +3241,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):          uploader['channel'] = uploader['uploader']          return uploader -    @staticmethod -    def _extract_alert(data): +    @classmethod +    def _extract_alert(cls, data):          alerts = [] -        for alert in try_get(data, lambda x: x['alerts'], list) or []: -            if not isinstance(alert, dict): -                continue -            alert_text = try_get( -                alert, lambda x: x['alertRenderer']['text'], dict) +        for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict): +            alert_text = traverse_obj( +                alert, (None, lambda x: x['alertRenderer']['text']), get_all=False)              if not alert_text:                  continue -            text = try_get( -                alert_text, -                (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']), -                compat_str) +            text = cls._get_text(alert_text, 'text')              if text:                  alerts.append(text)          return '\n'.join(alerts)      def _extract_from_tabs(self, item_id, webpage, data, tabs):          selected_tab = self._extract_selected_tab(tabs) -        renderer = try_get( -            data, lambda x: x['metadata']['channelMetadataRenderer'], dict) +        renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'), +                                expected_type=dict) or {}          playlist_id = item_id          title = description = None          if renderer: -            channel_title = renderer.get('title') or item_id -            tab_title = selected_tab.get('title') -            title = channel_title or item_id -            if tab_title: -                title += ' - %s' % tab_title -            if selected_tab.get('expandedText'): -                title += ' - %s' % selected_tab['expandedText'] -            description = renderer.get('description') -            playlist_id = renderer.get('externalId') +            channel_title = txt_or_none(renderer.get('title')) or item_id +            tab_title = txt_or_none(selected_tab.get('title')) +            title = join_nonempty( +                channel_title or item_id, tab_title, +                txt_or_none(selected_tab.get('expandedText')), +                delim=' - ') +            description = txt_or_none(renderer.get('description')) +            playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id          else: -            renderer = try_get( -                data, lambda x: x['metadata']['playlistMetadataRenderer'], dict) -            if renderer: -                title = renderer.get('title') -            else: -                renderer = try_get( -                    data, lambda x: x['header']['hashtagHeaderRenderer'], dict) -                if renderer: -                    title = try_get(renderer, lambda x: x['hashtag']['simpleText']) +            renderer = traverse_obj(data, +                                    ('metadata', 'playlistMetadataRenderer'), +                                    ('header', 'hashtagHeaderRenderer'), +                                    expected_type=dict) or {} +            title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'), +                                 expected_type=txt_or_none)          playlist = self.playlist_result(              self._entries(selected_tab, item_id, webpage),              playlist_id=playlist_id, playlist_title=title, @@ -3264,15 +3283,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):          return merge_dicts(playlist, self._extract_uploader(renderer, data))      def _extract_from_playlist(self, item_id, url, data, playlist): -        title = playlist.get('title') or try_get( -            data, lambda x: x['titleText']['simpleText'], compat_str) -        playlist_id = playlist.get('playlistId') or item_id +        title = traverse_obj((playlist, data), +                             (0, 'title'), (1, 'titleText', 'simpleText'), +                             expected_type=txt_or_none) +        playlist_id = txt_or_none(playlist.get('playlistId')) or item_id          # Inline playlist rendition continuation does not always work          # at Youtube side, so delegating regular tab-based playlist URL          # processing whenever possible. -        playlist_url = urljoin(url, try_get( -            playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'], -            compat_str)) +        playlist_url = urljoin(url, traverse_obj( +            playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), +            expected_type=url_or_none))          if playlist_url and playlist_url != url:              return self.url_result(                  playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index d80ceb007..65ddb3b0f 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -3753,6 +3753,11 @@ def strip_or_none(v, default=None):      return v.strip() if isinstance(v, compat_str) else default +def txt_or_none(v, default=None): +    """ Combine str/strip_or_none, disallow blank value (for traverse_obj) """ +    return default if v is None else (compat_str(v).strip() or default) + +  def url_or_none(url):      if not url or not isinstance(url, compat_str):          return None @@ -4096,8 +4101,8 @@ def escape_url(url):      ).geturl() -def parse_qs(url): -    return compat_parse_qs(compat_urllib_parse.urlparse(url).query) +def parse_qs(url, **kwargs): +    return compat_parse_qs(compat_urllib_parse.urlparse(url).query, **kwargs)  def read_batch_urls(batch_fd): | 
