aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Thalhammer <s3544305@student.rmit.edu.au>2016-05-02 13:21:39 +1000
committerAdam Thalhammer <s3544305@student.rmit.edu.au>2016-05-02 13:21:39 +1000
commit79a2e94e79e65cdf4898bc2dedb6a1bb4ca9af3c (patch)
tree97cd368a2089b073d2f0c1638bc66bd75fe11443
parent686cc8963441c37105c0447f31c5ea21405be05a (diff)
downloadyoutube-dl-79a2e94e79e65cdf4898bc2dedb6a1bb4ca9af3c.tar.xz
Instead of replacing accented characters with an underscore when sanitizing file names in restricted mode, replace them with their non-accented equivalents fixes #9347
-rw-r--r--test/test_utils.py9
-rw-r--r--youtube_dl/utils.py9
2 files changed, 14 insertions, 4 deletions
diff --git a/test/test_utils.py b/test/test_utils.py
index e16a6761b..0072ba241 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -139,8 +139,8 @@ class TestUtil(unittest.TestCase):
self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True))
self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True))
- tests = 'a\xe4b\u4e2d\u56fd\u7684c'
- self.assertEqual(sanitize_filename(tests, restricted=True), 'a_b_c')
+ tests = 'aäb\u4e2d\u56fd\u7684c'
+ self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c')
self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename
forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#'
@@ -155,6 +155,11 @@ class TestUtil(unittest.TestCase):
self.assertTrue(sanitize_filename('-', restricted=True) != '')
self.assertTrue(sanitize_filename(':', restricted=True) != '')
+ self.assertEqual(sanitize_filename(
+ 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', restricted=True),
+ 'AAAAAAAECEEEEIIIIDNOOOOOOUUUUYPssaaaaaaaeceeeeiiiionoooooouuuuypy')
+ pass
+
def test_sanitize_ids(self):
self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw')
self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw')
diff --git a/youtube_dl/utils.py b/youtube_dl/utils.py
index 7bcc85e2b..f74f62268 100644
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -14,8 +14,8 @@ import email.utils
import errno
import functools
import gzip
-import itertools
import io
+import itertools
import json
import locale
import math
@@ -24,8 +24,8 @@ import os
import pipes
import platform
import re
-import ssl
import socket
+import ssl
import struct
import subprocess
import sys
@@ -365,6 +365,11 @@ def sanitize_filename(s, restricted=False, is_id=False):
Set is_id if this is not an arbitrary string, but an ID that should be kept if possible
"""
def replace_insane(char):
+ accents = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ',
+ itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOUUUUYP', ['ss'],
+ 'aaaaaa', ['ae'], 'ceeeeiiiionoooooouuuuypy')))
+ if restricted and char in accents:
+ return accents[char]
if char == '?' or ord(char) < 32 or ord(char) == 127:
return ''
elif char == '"':