diff options
Diffstat (limited to 'yt_dlp/extractor/tiktok.py')
-rw-r--r-- | yt_dlp/extractor/tiktok.py | 77 |
1 files changed, 43 insertions, 34 deletions
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index aa9daa2e8..aa8356796 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -320,7 +320,7 @@ class TikTokBaseIE(InfoExtractor): if is_generic_og_trackname: music_track, music_author = contained_music_track or 'original sound', contained_music_author else: - music_track, music_author = music_info.get('title'), music_info.get('author') + music_track, music_author = music_info.get('title'), traverse_obj(music_info, ('author', {str})) return { 'id': aweme_id, @@ -336,15 +336,16 @@ class TikTokBaseIE(InfoExtractor): 'comment_count': 'comment_count', }, expected_type=int_or_none), **traverse_obj(author_info, { - 'uploader': 'unique_id', - 'uploader_id': 'uid', - 'creator': 'nickname', - 'channel_id': 'sec_uid', - }, expected_type=str_or_none), + 'uploader': ('unique_id', {str}), + 'uploader_id': ('uid', {str_or_none}), + 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat + 'channel': ('nickname', {str}), + 'channel_id': ('sec_uid', {str}), + }), 'uploader_url': user_url, 'track': music_track, 'album': str_or_none(music_info.get('album')) or None, - 'artist': music_author or None, + 'artists': re.split(r'(?:, | & )', music_author) if music_author else None, 'formats': formats, 'subtitles': self.extract_subtitles(aweme_detail, aweme_id), 'thumbnails': thumbnails, @@ -405,7 +406,8 @@ class TikTokBaseIE(InfoExtractor): 'timestamp': ('createTime', {int_or_none}), }), **traverse_obj(author_info or aweme_detail, { - 'creator': ('nickname', {str}), + 'creators': ('nickname', {str}, {lambda x: [x] if x else None}), # for compat + 'channel': ('nickname', {str}), 'uploader': (('uniqueId', 'author'), {str}), 'uploader_id': (('authorId', 'uid', 'id'), {str_or_none}), }, get_all=False), @@ -416,10 +418,10 @@ class TikTokBaseIE(InfoExtractor): 'comment_count': 'commentCount', }, expected_type=int_or_none), **traverse_obj(music_info, { - 'track': 'title', - 'album': ('album', {lambda x: x or None}), - 'artist': 'authorName', - }, expected_type=str), + 'track': ('title', {str}), + 'album': ('album', {str}, {lambda x: x or None}), + 'artists': ('authorName', {str}, {lambda x: [x] if x else None}), + }), 'channel_id': channel_id, 'uploader_url': user_url, 'formats': formats, @@ -476,7 +478,8 @@ class TikTokIE(TikTokBaseIE): 'uploader_id': '18702747', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', 'channel_id': 'MS4wLjABAAAAiFnldaILebi5heDoVU6bn4jBWWycX6-9U3xuNPqZ8Ws', - 'creator': 'patroX', + 'channel': 'patroX', + 'creators': ['patroX'], 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20190930', 'timestamp': 1569860870, @@ -484,7 +487,7 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, - 'artist': 'Evan Todd, Jessica Keenan Wynn, Alice Lee, Barrett Wilbert Weed & Jon Eidson', + 'artists': ['Evan Todd', 'Jessica Keenan Wynn', 'Alice Lee', 'Barrett Wilbert Weed', 'Jon Eidson'], 'track': 'Big Fun', }, }, { @@ -496,12 +499,13 @@ class TikTokIE(TikTokBaseIE): 'title': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥', 'description': 'Balas @yolaaftwsr hayu yu ? #SquadRandom_ 🔥', 'uploader': 'barudakhb_', - 'creator': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6', + 'channel': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6', + 'creators': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'], 'uploader_id': '6974687867511718913', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'channel_id': 'MS4wLjABAAAAbhBwQC-R1iKoix6jDFsF-vBdfx2ABoDjaZrM9fX6arU3w71q3cOWgWuTXn1soZ7d', 'track': 'Boka Dance', - 'artist': 'md5:29f238c49bc0c176cb3cef1a9cea9fa6', + 'artists': ['md5:29f238c49bc0c176cb3cef1a9cea9fa6'], 'timestamp': 1626121503, 'duration': 18, 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', @@ -520,7 +524,8 @@ class TikTokIE(TikTokBaseIE): 'title': 'Slap and Run!', 'description': 'Slap and Run!', 'uploader': 'user440922249', - 'creator': 'Slap And Run', + 'channel': 'Slap And Run', + 'creators': ['Slap And Run'], 'uploader_id': '7036055384943690754', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', 'channel_id': 'MS4wLjABAAAATh8Vewkn0LYM7Fo03iec3qKdeCUOcBIouRk1mkiag6h3o_pQu_dUXvZ2EZlGST7_', @@ -544,7 +549,8 @@ class TikTokIE(TikTokBaseIE): 'title': 'TikTok video #7059698374567611694', 'description': '', 'uploader': 'pokemonlife22', - 'creator': 'Pokemon', + 'channel': 'Pokemon', + 'creators': ['Pokemon'], 'uploader_id': '6820838815978423302', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', 'channel_id': 'MS4wLjABAAAA0tF1nBwQVVMyrGu3CqttkNgM68Do1OXUFuCY0CRQk8fEtSVDj89HqoqvbSTmUP2W', @@ -553,7 +559,7 @@ class TikTokIE(TikTokBaseIE): 'duration': 6, 'thumbnail': r're:^https?://[\w\/\.\-]+(~[\w\-]+\.image)?', 'upload_date': '20220201', - 'artist': 'Pokemon', + 'artists': ['Pokemon'], 'view_count': int, 'like_count': int, 'repost_count': int, @@ -590,12 +596,13 @@ class TikTokIE(TikTokBaseIE): 'ext': 'mp3', 'title': 'TikTok video #7139980461132074283', 'description': '', - 'creator': 'Antaura', + 'channel': 'Antaura', + 'creators': ['Antaura'], 'uploader': '_le_cannibale_', 'uploader_id': '6604511138619654149', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', 'channel_id': 'MS4wLjABAAAAoShJqaw_5gvy48y3azFeFcT4jeyKWbB0VVYasOCt2tTLwjNFIaDcHAM4D-QGXFOP', - 'artist': 'nathan !', + 'artists': ['nathan !'], 'track': 'grahamscott canon', 'upload_date': '20220905', 'timestamp': 1662406249, @@ -603,18 +610,18 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'repost_count': int, 'comment_count': int, - 'thumbnail': r're:^https://.+\.webp', + 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)', }, }, { # only available via web - 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', + 'url': 'https://www.tiktok.com/@moxypatch/video/7206382937372134662', # FIXME 'md5': '6aba7fad816e8709ff2c149679ace165', 'info_dict': { 'id': '7206382937372134662', 'ext': 'mp4', 'title': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', 'description': 'md5:1d95c0b96560ca0e8a231af4172b2c0a', - 'creator': 'MoxyPatch', + 'channel': 'MoxyPatch', 'creators': ['MoxyPatch'], 'uploader': 'moxypatch', 'uploader_id': '7039142049363379205', @@ -635,7 +642,7 @@ class TikTokIE(TikTokBaseIE): 'expected_warnings': ['Unable to find video in feed'], }, { # 1080p format - 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', + 'url': 'https://www.tiktok.com/@tatemcrae/video/7107337212743830830', # FIXME 'md5': '982512017a8a917124d5a08c8ae79621', 'info_dict': { 'id': '7107337212743830830', @@ -646,8 +653,9 @@ class TikTokIE(TikTokBaseIE): 'uploader_id': '86328792343818240', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', 'channel_id': 'MS4wLjABAAAA-0bQT0CqebTRr6I4IkYvMDMKSRSJHLNPBo5HrSklJwyA2psXLSZG5FP-LMNpHnJd', - 'creator': 'tate mcrae', - 'artist': 'tate mcrae', + 'channel': 'tate mcrae', + 'creators': ['tate mcrae'], + 'artists': ['tate mcrae'], 'track': 'original sound', 'upload_date': '20220609', 'timestamp': 1654805899, @@ -672,8 +680,9 @@ class TikTokIE(TikTokBaseIE): 'uploader_id': '6582536342634676230', 'uploader_url': 'https://www.tiktok.com/@MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', 'channel_id': 'MS4wLjABAAAAIAlDxriiPWLE-p8p1R_0Bx8qWKfi-7zwmGhzU8Mv25W8sNxjfIKrol31qTczzuLB', - 'creator': 'лампочка', - 'artist': 'Øneheart', + 'channel': 'лампочка', + 'creators': ['лампочка'], + 'artists': ['Øneheart'], 'album': 'watching the stars', 'track': 'watching the stars', 'upload_date': '20230708', @@ -682,7 +691,7 @@ class TikTokIE(TikTokBaseIE): 'like_count': int, 'comment_count': int, 'repost_count': int, - 'thumbnail': r're:^https://.+\.webp', + 'thumbnail': r're:^https://.+\.(?:webp|jpe?g)', }, }, { # Auto-captions available @@ -949,7 +958,7 @@ class DouyinIE(TikTokBaseIE): 'uploader_id': '110403406559', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', - 'creator': '杨超越', + 'channel': '杨超越', 'creators': ['杨超越'], 'duration': 19, 'timestamp': 1620905839, @@ -974,7 +983,7 @@ class DouyinIE(TikTokBaseIE): 'uploader_id': '408654318141572', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', 'channel_id': 'MS4wLjABAAAAZJpnglcjW2f_CMVcnqA_6oVBXKWMpH0F8LIHuUu8-lA', - 'creator': '杨超越工作室', + 'channel': '杨超越工作室', 'creators': ['杨超越工作室'], 'duration': 42, 'timestamp': 1625739481, @@ -999,7 +1008,7 @@ class DouyinIE(TikTokBaseIE): 'uploader_id': '110403406559', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', - 'creator': '杨超越', + 'channel': '杨超越', 'creators': ['杨超越'], 'duration': 17, 'timestamp': 1619098692, @@ -1041,7 +1050,7 @@ class DouyinIE(TikTokBaseIE): 'uploader_id': '110403406559', 'uploader_url': 'https://www.douyin.com/user/MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', 'channel_id': 'MS4wLjABAAAAEKnfa654JAJ_N5lgZDQluwsxmY0lhfmEYNQBBkwGG98', - 'creator': '杨超越', + 'channel': '杨超越', 'creators': ['杨超越'], 'duration': 15, 'timestamp': 1621261163, |