diff options
| -rw-r--r-- | test/test_traversal.py | 101 | ||||
| -rw-r--r-- | youtube_dl/traversal.py | 1 | ||||
| -rw-r--r-- | youtube_dl/utils.py | 45 |
3 files changed, 147 insertions, 0 deletions
diff --git a/test/test_traversal.py b/test/test_traversal.py index 5d08b8dbb..504cdee37 100644 --- a/test/test_traversal.py +++ b/test/test_traversal.py @@ -16,6 +16,7 @@ from youtube_dl.traversal import ( dict_get, get_first, require, + subs_list_to_dict, T, traverse_obj, unpack, @@ -30,6 +31,7 @@ from youtube_dl.compat import ( compat_zip as zip, ) from youtube_dl.utils import ( + determine_ext, ExtractorError, int_or_none, join_nonempty, @@ -495,6 +497,105 @@ class TestTraversalHelpers(_TestCase): traverse_obj(_TEST_DATA, ('str', T(require('value')))), 'str', '`require` should pass through non-`None` values') + def test_subs_list_to_dict(self): + self.assertEqual(traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.vtt'}, + {'name': 'en', 'url': 'https://example.com/subs/en1.ass'}, + {'name': 'en', 'url': 'https://example.com/subs/en2.ass'}, + ], [Ellipsis, { + 'id': 'name', + 'url': 'url', + }, all, T(subs_list_to_dict)]), { + 'de': [{'url': 'https://example.com/subs/de.vtt'}], + 'en': [ + {'url': 'https://example.com/subs/en1.ass'}, + {'url': 'https://example.com/subs/en2.ass'}, + ], + }, 'function should build subtitle dict from list of subtitles') + self.assertEqual(traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.ass'}, + {'name': 'de'}, + {'name': 'en', 'content': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], [Ellipsis, { + 'id': 'name', + 'data': 'content', + 'url': 'url', + }, all, T(subs_list_to_dict(lang=None))]), { + 'de': [{'url': 'https://example.com/subs/de.ass'}], + 'en': [{'data': 'content'}], + }, 'subs with mandatory items missing should be filtered') + self.assertEqual(traverse_obj([ + {'url': 'https://example.com/subs/de.ass', 'name': 'de'}, + {'url': 'https://example.com/subs/en', 'name': 'en'}, + ], [Ellipsis, { + 'id': 'name', + 'ext': ['url', T(determine_ext(default_ext=None))], + 'url': 'url', + }, all, T(subs_list_to_dict(ext='ext'))]), { + 'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}], + 'en': [{'url': 'https://example.com/subs/en', 'ext': 'ext'}], + }, '`ext` should set default ext but leave existing value untouched') + self.assertEqual(traverse_obj([ + {'name': 'en', 'url': 'https://example.com/subs/en2', 'prio': True}, + {'name': 'en', 'url': 'https://example.com/subs/en1', 'prio': False}, + ], [Ellipsis, { + 'id': 'name', + 'quality': ['prio', T(int)], + 'url': 'url', + }, all, T(subs_list_to_dict(ext='ext'))]), {'en': [ + {'url': 'https://example.com/subs/en1', 'ext': 'ext'}, + {'url': 'https://example.com/subs/en2', 'ext': 'ext'}, + ]}, '`quality` key should sort subtitle list accordingly') + self.assertEqual(traverse_obj([ + {'name': 'de', 'url': 'https://example.com/subs/de.ass'}, + {'name': 'de'}, + {'name': 'en', 'content': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], [Ellipsis, { + 'id': 'name', + 'url': 'url', + 'data': 'content', + }, all, T(subs_list_to_dict(lang='en'))]), { + 'de': [{'url': 'https://example.com/subs/de.ass'}], + 'en': [ + {'data': 'content'}, + {'url': 'https://example.com/subs/en'}, + ], + }, 'optionally provided lang should be used if no id available') + self.assertEqual(traverse_obj([ + {'name': 1, 'url': 'https://example.com/subs/de1'}, + {'name': {}, 'url': 'https://example.com/subs/de2'}, + {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'}, + {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'}, + ], [Ellipsis, { + 'id': 'name', + 'url': 'url', + 'ext': 'ext', + }, all, T(subs_list_to_dict(lang=None))]), { + 'de': [ + {'url': 'https://example.com/subs/de3'}, + {'url': 'https://example.com/subs/de4'}, + ], + }, 'non str types should be ignored for id and ext') + self.assertEqual(traverse_obj([ + {'name': 1, 'url': 'https://example.com/subs/de1'}, + {'name': {}, 'url': 'https://example.com/subs/de2'}, + {'name': 'de', 'ext': 1, 'url': 'https://example.com/subs/de3'}, + {'name': 'de', 'ext': {}, 'url': 'https://example.com/subs/de4'}, + ], [Ellipsis, { + 'id': 'name', + 'url': 'url', + 'ext': 'ext', + }, all, T(subs_list_to_dict(lang='de'))]), { + 'de': [ + {'url': 'https://example.com/subs/de1'}, + {'url': 'https://example.com/subs/de2'}, + {'url': 'https://example.com/subs/de3'}, + {'url': 'https://example.com/subs/de4'}, + ], + }, 'non str types should be replaced by default id') + def test_unpack(self): self.assertEqual( unpack(lambda *x: ''.join(map(compat_str, x)))([1, 2, 3]), '123') diff --git a/youtube_dl/traversal.py b/youtube_dl/traversal.py index e4e8758c6..1de48b145 100644 --- a/youtube_dl/traversal.py +++ b/youtube_dl/traversal.py @@ -6,6 +6,7 @@ from .utils import ( dict_get, get_first, require, + subs_list_to_dict, T, traverse_obj, unpack, diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py index c88d02d35..bd8d62572 100644 --- a/youtube_dl/utils.py +++ b/youtube_dl/utils.py @@ -6599,6 +6599,51 @@ class require(ExtractorError): return value +@partial_application +# typing: (subs: list[dict], /, *, lang='und', ext=None) -> dict[str, list[dict] +def subs_list_to_dict(subs, lang='und', ext=None): + """ + Convert subtitles from a traversal into a subtitle dict. + The path should have an `all` immediately before this function. + + Arguments: + `lang` The default language tag for subtitle dicts with no + `lang` (`und`: undefined) + `ext` The default value for `ext` in the subtitle dicts + + In the dict you can set the following additional items: + `id` The language tag to which the subtitle dict should be added + `quality` The sort order for each subtitle dict + """ + + result = collections.defaultdict(list) + + for sub in subs: + tn_url = url_or_none(sub.pop('url', None)) + if tn_url: + sub['url'] = tn_url + elif not sub.get('data'): + continue + sub_lang = sub.pop('id', None) + if not isinstance(sub_lang, compat_str): + if not lang: + continue + sub_lang = lang + sub_ext = sub.get('ext') + if not isinstance(sub_ext, compat_str): + if not ext: + sub.pop('ext', None) + else: + sub['ext'] = ext + result[sub_lang].append(sub) + result = dict(result) + + for subs in result.values(): + subs.sort(key=lambda x: x.pop('quality', 0) or 0) + + return result + + def unpack(func, **kwargs): """Make a function that applies `partial(func, **kwargs)` to its argument as *args""" @functools.wraps(func) |
