diff options
| -rw-r--r-- | youtube_dl/extractor/youtube.py | 117 | 
1 files changed, 50 insertions, 67 deletions
diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 87bdc1677..f57099f8c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -308,6 +308,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):                  r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',                  default='{}'), video_id, fatal=False) +    def _extract_video(self, renderer): +        video_id = renderer['videoId'] +        title = try_get( +            renderer, +            (lambda x: x['title']['runs'][0]['text'], +             lambda x: x['title']['simpleText']), compat_str) +        description = try_get( +            renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'], +            compat_str) +        duration = parse_duration(try_get( +            renderer, lambda x: x['lengthText']['simpleText'], compat_str)) +        view_count_text = try_get( +            renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or '' +        view_count = str_to_int(self._search_regex( +            r'^([\d,]+)', re.sub(r'\s', '', view_count_text), +            'view count', default=None)) +        uploader = try_get( +            renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str) +        return { +            '_type': 'url_transparent', +            'ie_key': YoutubeIE.ie_key(), +            'id': video_id, +            'url': video_id, +            'title': title, +            'description': description, +            'duration': duration, +            'view_count': view_count, +            'uploader': uploader, +        } +  class YoutubeIE(YoutubeBaseInfoExtractor):      IE_DESC = 'YouTube.com' @@ -2765,36 +2795,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):              if renderer:                  return renderer -    def _extract_video(self, renderer): -        video_id = renderer.get('videoId') -        title = try_get( -            renderer, -            (lambda x: x['title']['runs'][0]['text'], -             lambda x: x['title']['simpleText']), compat_str) -        description = try_get( -            renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'], -            compat_str) -        duration = parse_duration(try_get( -            renderer, lambda x: x['lengthText']['simpleText'], compat_str)) -        view_count_text = try_get( -            renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or '' -        view_count = str_to_int(self._search_regex( -            r'^([\d,]+)', re.sub(r'\s', '', view_count_text), -            'view count', default=None)) -        uploader = try_get( -            renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str) -        return { -            '_type': 'url_transparent', -            'ie_key': YoutubeIE.ie_key(), -            'id': video_id, -            'url': video_id, -            'title': title, -            'description': description, -            'duration': duration, -            'view_count': view_count, -            'uploader': uploader, -        } -      def _grid_entries(self, grid_renderer):          for item in grid_renderer['items']:              if not isinstance(item, dict): @@ -3417,46 +3417,29 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):                  list)              if not slr_contents:                  break -            isr_contents = try_get( -                slr_contents, -                lambda x: x[0]['itemSectionRenderer']['contents'], -                list) -            if not isr_contents: -                break -            for content in isr_contents: -                if not isinstance(content, dict): -                    continue -                video = content.get('videoRenderer') -                if not isinstance(video, dict): -                    continue -                video_id = video.get('videoId') -                if not video_id: +            for slr_content in slr_contents: +                isr_contents = try_get( +                    slr_content, +                    lambda x: x['itemSectionRenderer']['contents'], +                    list) +                if not isr_contents:                      continue -                title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str) -                description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str) -                duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str)) -                view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or '' -                view_count = int_or_none(self._search_regex( -                    r'^(\d+)', re.sub(r'\s', '', view_count_text), -                    'view count', default=None)) -                uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str) -                total += 1 -                yield { -                    '_type': 'url_transparent', -                    'ie_key': YoutubeIE.ie_key(), -                    'id': video_id, -                    'url': video_id, -                    'title': title, -                    'description': description, -                    'duration': duration, -                    'view_count': view_count, -                    'uploader': uploader, -                } -                if total == n: -                    return +                for content in isr_contents: +                    if not isinstance(content, dict): +                        continue +                    video = content.get('videoRenderer') +                    if not isinstance(video, dict): +                        continue +                    video_id = video.get('videoId') +                    if not video_id: +                        continue +                    yield self._extract_video(video) +                    total += 1 +                    if total == n: +                        return              token = try_get(                  slr_contents, -                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'], +                lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],                  compat_str)              if not token:                  break  | 
