diff options
author | bashonly <88596187+bashonly@users.noreply.github.com> | 2024-09-24 17:10:42 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-09-24 22:10:42 +0000 |
commit | 3ad0b7f422d547204df687b6d0b2d9110fff3990 (patch) | |
tree | a8dd3098dca7812e4a4a685abb75c5c86288eed1 | |
parent | 4a9bc8c3630378bc29f0266126b503f6190c0430 (diff) |
[ie/tiktok] Fix web formats extraction (#11074)
Closes #11034
Authored by: bashonly
-rw-r--r-- | yt_dlp/extractor/tiktok.py | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/yt_dlp/extractor/tiktok.py b/yt_dlp/extractor/tiktok.py index 9d823a315..f7e103fe9 100644 --- a/yt_dlp/extractor/tiktok.py +++ b/yt_dlp/extractor/tiktok.py @@ -542,16 +542,12 @@ class TikTokBaseIE(InfoExtractor): **COMMON_FORMAT_INFO, 'format_id': 'download', 'url': self._proto_relative_url(download_url), + 'format_note': 'watermarked', + 'preference': -2, }) self._remove_duplicate_formats(formats) - for f in traverse_obj(formats, lambda _, v: 'unwatermarked' not in v['url']): - f.update({ - 'format_note': join_nonempty(f.get('format_note'), 'watermarked', delim=', '), - 'preference': f.get('preference') or -2, - }) - # Is it a slideshow with only audio for download? if not formats and traverse_obj(aweme_detail, ('music', 'playUrl', {url_or_none})): audio_url = aweme_detail['music']['playUrl'] @@ -565,7 +561,8 @@ class TikTokBaseIE(InfoExtractor): 'vcodec': 'none', }) - return formats + # Filter out broken formats, see https://github.com/yt-dlp/yt-dlp/issues/11034 + return [f for f in formats if urllib.parse.urlparse(f['url']).hostname != 'www.tiktok.com'] def _parse_aweme_video_web(self, aweme_detail, webpage_url, video_id, extract_flat=False): author_info = traverse_obj(aweme_detail, (('authorInfo', 'author', None), { |