diff options
Diffstat (limited to 'test/lint/lint-locale-dependence.py')
-rwxr-xr-x | test/lint/lint-locale-dependence.py | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/test/lint/lint-locale-dependence.py b/test/lint/lint-locale-dependence.py new file mode 100755 index 0000000000..2abf1be6b3 --- /dev/null +++ b/test/lint/lint-locale-dependence.py @@ -0,0 +1,259 @@ +#!/usr/bin/env python3 +# Copyright (c) 2018-2022 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# +# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt +# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup, +# whereas no such call is made in bitcoind. +# +# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale +# specified by the user's LC_ALL (or LC_*) environment variable as the new +# C locale. +# +# In contrast, bitcoind does not opt in to localization -- no call to +# setlocale(LC_ALL, "") is made and the environment variables LC_* are +# thus ignored. +# +# This results in situations where bitcoind is guaranteed to be running +# with the classic locale ("C") whereas the locale of bitcoin-qt will vary +# depending on the user's environment variables. +# +# An example: Assuming the environment variable LC_ALL=de_DE then the +# call std::to_string(1.23) will return "1.230000" in bitcoind but +# "1,230000" in bitcoin-qt. +# +# From the Qt documentation: +# "On Unix/Linux Qt is configured to use the system locale settings by default. +# This can cause a conflict when using POSIX functions, for instance, when +# converting between data types such as floats and strings, since the notation +# may differ between locales. To get around this problem, call the POSIX function +# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication +# or QCoreApplication to reset the locale that is used for number formatting to +# "C"-locale." +# +# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and +# https://stackoverflow.com/a/34878283 for more details. +# +# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf. + +import re +import sys + +from subprocess import check_output, CalledProcessError + + +KNOWN_VIOLATIONS = [ + "src/dbwrapper.cpp:.*vsnprintf", + "src/test/dbwrapper_tests.cpp:.*snprintf", + "src/test/fuzz/locale.cpp:.*setlocale", + "src/test/fuzz/string.cpp:.*strtol", + "src/test/fuzz/string.cpp:.*strtoul", + "src/test/util_tests.cpp:.*strtoll" +] + +REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [ + "src/crypto/ctaes/", + "src/leveldb/", + "src/secp256k1/", + "src/minisketch/", + "src/tinyformat.h", + "src/univalue/" +] + +LOCALE_DEPENDENT_FUNCTIONS = [ + "alphasort", # LC_COLLATE (via strcoll) + "asctime", # LC_TIME (directly) + "asprintf", # (via vasprintf) + "atof", # LC_NUMERIC (via strtod) + "atoi", # LC_NUMERIC (via strtol) + "atol", # LC_NUMERIC (via strtol) + "atoll", # (via strtoll) + "atoq", + "btowc", # LC_CTYPE (directly) + "ctime", # (via asctime or localtime) + "dprintf", # (via vdprintf) + "fgetwc", + "fgetws", + "fold_case", # boost::locale::fold_case + "fprintf", # (via vfprintf) + "fputwc", + "fputws", + "fscanf", # (via __vfscanf) + "fwprintf", # (via __vfwprintf) + "getdate", # via __getdate_r => isspace // __localtime_r + "getwc", + "getwchar", + "is_digit", # boost::algorithm::is_digit + "is_space", # boost::algorithm::is_space + "isalnum", # LC_CTYPE + "isalpha", # LC_CTYPE + "isblank", # LC_CTYPE + "iscntrl", # LC_CTYPE + "isctype", # LC_CTYPE + "isdigit", # LC_CTYPE + "isgraph", # LC_CTYPE + "islower", # LC_CTYPE + "isprint", # LC_CTYPE + "ispunct", # LC_CTYPE + "isspace", # LC_CTYPE + "isupper", # LC_CTYPE + "iswalnum", # LC_CTYPE + "iswalpha", # LC_CTYPE + "iswblank", # LC_CTYPE + "iswcntrl", # LC_CTYPE + "iswctype", # LC_CTYPE + "iswdigit", # LC_CTYPE + "iswgraph", # LC_CTYPE + "iswlower", # LC_CTYPE + "iswprint", # LC_CTYPE + "iswpunct", # LC_CTYPE + "iswspace", # LC_CTYPE + "iswupper", # LC_CTYPE + "iswxdigit", # LC_CTYPE + "isxdigit", # LC_CTYPE + "localeconv", # LC_NUMERIC + LC_MONETARY + "mblen", # LC_CTYPE + "mbrlen", + "mbrtowc", + "mbsinit", + "mbsnrtowcs", + "mbsrtowcs", + "mbstowcs", # LC_CTYPE + "mbtowc", # LC_CTYPE + "mktime", + "normalize", # boost::locale::normalize + "printf", # LC_NUMERIC + "putwc", + "putwchar", + "scanf", # LC_NUMERIC + "setlocale", + "snprintf", + "sprintf", + "sscanf", + "std::locale::global", + "std::to_string", + "stod", + "stof", + "stoi", + "stol", + "stold", + "stoll", + "stoul", + "stoull", + "strcasecmp", + "strcasestr", + "strcoll", # LC_COLLATE + #"strerror", + "strfmon", + "strftime", # LC_TIME + "strncasecmp", + "strptime", + "strtod", # LC_NUMERIC + "strtof", + "strtoimax", + "strtol", # LC_NUMERIC + "strtold", + "strtoll", + "strtoq", + "strtoul", # LC_NUMERIC + "strtoull", + "strtoumax", + "strtouq", + "strxfrm", # LC_COLLATE + "swprintf", + "to_lower", # boost::locale::to_lower + "to_title", # boost::locale::to_title + "to_upper", # boost::locale::to_upper + "tolower", # LC_CTYPE + "toupper", # LC_CTYPE + "towctrans", + "towlower", # LC_CTYPE + "towupper", # LC_CTYPE + "trim", # boost::algorithm::trim + "trim_left", # boost::algorithm::trim_left + "trim_right", # boost::algorithm::trim_right + "ungetwc", + "vasprintf", + "vdprintf", + "versionsort", + "vfprintf", + "vfscanf", + "vfwprintf", + "vprintf", + "vscanf", + "vsnprintf", + "vsprintf", + "vsscanf", + "vswprintf", + "vwprintf", + "wcrtomb", + "wcscasecmp", + "wcscoll", # LC_COLLATE + "wcsftime", # LC_TIME + "wcsncasecmp", + "wcsnrtombs", + "wcsrtombs", + "wcstod", # LC_NUMERIC + "wcstof", + "wcstoimax", + "wcstol", # LC_NUMERIC + "wcstold", + "wcstoll", + "wcstombs", # LC_CTYPE + "wcstoul", # LC_NUMERIC + "wcstoull", + "wcstoumax", + "wcswidth", + "wcsxfrm", # LC_COLLATE + "wctob", + "wctomb", # LC_CTYPE + "wctrans", + "wctype", + "wcwidth", + "wprintf" +] + + +def find_locale_dependent_function_uses(): + regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS) + exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS] + git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args + git_grep_output = list() + + try: + git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines() + except CalledProcessError as e: + if e.returncode > 1: + raise e + + return git_grep_output + + +def main(): + exit_code = 0 + + regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS) + git_grep_output = find_locale_dependent_function_uses() + + for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS: + matches = [line for line in git_grep_output + if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line) + and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line) + and not re.search(regexp_ignore_known_violations, line)] + if matches: + print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:") + for match in matches: + print(match) + print("") + exit_code = 1 + + if exit_code == 1: + print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n") + print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}") + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() |