aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Wilk <jwilk@jwilk.net>2019-05-10 20:42:32 +0200
committerSergey M <dstftw@gmail.com>2019-05-11 01:42:31 +0700
commitfd35d8cdfdc77ca6ec6d87677fe0d00df0cbb22a (patch)
tree266fb842f011010adb2d0b0a3a55810cde82f95c
parent4eec112740910621a7fd9c50158fb2388649d8b7 (diff)
[utils] Transliterate "þ" as "th" (#20897)
Despite visual similarity "þ" is unrelated to "p". It is normally transliterated as "th": $ echo þ-Þ | iconv -t ASCII//TRANSLIT th-TH
-rw-r--r--test/test_utils.py2
-rw-r--r--youtube_dl/utils.py4
2 files changed, 3 insertions, 3 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index ca6d832a4..9ef0e422b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -183,7 +183,7 @@ class TestUtil(unittest.TestCase):
self.assertEqual(sanitize_filename(
'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True),
- 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYPssaaaaaaaeceeeeiiiionooooooooeuuuuuypy')
+ 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy')
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 71713f63a..99ee54942 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -125,8 +125,8 @@ KNOWN_EXTENSIONS = (
# needed for sanitizing filenames in restricted mode
ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
- itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUYP', ['ss'],
- 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuypy')))
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
DATE_FORMATS = (
'%d %B %Y',