aboutsummaryrefslogtreecommitdiff
path: root/test/lint/lint-locale-dependence.py
diff options
context:
space:
mode:
Diffstat (limited to 'test/lint/lint-locale-dependence.py')
-rwxr-xr-xtest/lint/lint-locale-dependence.py259
1 files changed, 259 insertions, 0 deletions
diff --git a/test/lint/lint-locale-dependence.py b/test/lint/lint-locale-dependence.py
new file mode 100755
index 0000000000..2abf1be6b3
--- /dev/null
+++ b/test/lint/lint-locale-dependence.py
@@ -0,0 +1,259 @@
+#!/usr/bin/env python3
+# Copyright (c) 2018-2022 The Bitcoin Core developers
+# Distributed under the MIT software license, see the accompanying
+# file COPYING or http://www.opensource.org/licenses/mit-license.php.
+#
+# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
+# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
+# whereas no such call is made in bitcoind.
+#
+# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
+# specified by the user's LC_ALL (or LC_*) environment variable as the new
+# C locale.
+#
+# In contrast, bitcoind does not opt in to localization -- no call to
+# setlocale(LC_ALL, "") is made and the environment variables LC_* are
+# thus ignored.
+#
+# This results in situations where bitcoind is guaranteed to be running
+# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
+# depending on the user's environment variables.
+#
+# An example: Assuming the environment variable LC_ALL=de_DE then the
+# call std::to_string(1.23) will return "1.230000" in bitcoind but
+# "1,230000" in bitcoin-qt.
+#
+# From the Qt documentation:
+# "On Unix/Linux Qt is configured to use the system locale settings by default.
+# This can cause a conflict when using POSIX functions, for instance, when
+# converting between data types such as floats and strings, since the notation
+# may differ between locales. To get around this problem, call the POSIX function
+# setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
+# or QCoreApplication to reset the locale that is used for number formatting to
+# "C"-locale."
+#
+# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
+# https://stackoverflow.com/a/34878283 for more details.
+#
+# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
+
+import re
+import sys
+
+from subprocess import check_output, CalledProcessError
+
+
+KNOWN_VIOLATIONS = [
+ "src/dbwrapper.cpp:.*vsnprintf",
+ "src/test/dbwrapper_tests.cpp:.*snprintf",
+ "src/test/fuzz/locale.cpp:.*setlocale",
+ "src/test/fuzz/string.cpp:.*strtol",
+ "src/test/fuzz/string.cpp:.*strtoul",
+ "src/test/util_tests.cpp:.*strtoll"
+]
+
+REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS = [
+ "src/crypto/ctaes/",
+ "src/leveldb/",
+ "src/secp256k1/",
+ "src/minisketch/",
+ "src/tinyformat.h",
+ "src/univalue/"
+]
+
+LOCALE_DEPENDENT_FUNCTIONS = [
+ "alphasort", # LC_COLLATE (via strcoll)
+ "asctime", # LC_TIME (directly)
+ "asprintf", # (via vasprintf)
+ "atof", # LC_NUMERIC (via strtod)
+ "atoi", # LC_NUMERIC (via strtol)
+ "atol", # LC_NUMERIC (via strtol)
+ "atoll", # (via strtoll)
+ "atoq",
+ "btowc", # LC_CTYPE (directly)
+ "ctime", # (via asctime or localtime)
+ "dprintf", # (via vdprintf)
+ "fgetwc",
+ "fgetws",
+ "fold_case", # boost::locale::fold_case
+ "fprintf", # (via vfprintf)
+ "fputwc",
+ "fputws",
+ "fscanf", # (via __vfscanf)
+ "fwprintf", # (via __vfwprintf)
+ "getdate", # via __getdate_r => isspace // __localtime_r
+ "getwc",
+ "getwchar",
+ "is_digit", # boost::algorithm::is_digit
+ "is_space", # boost::algorithm::is_space
+ "isalnum", # LC_CTYPE
+ "isalpha", # LC_CTYPE
+ "isblank", # LC_CTYPE
+ "iscntrl", # LC_CTYPE
+ "isctype", # LC_CTYPE
+ "isdigit", # LC_CTYPE
+ "isgraph", # LC_CTYPE
+ "islower", # LC_CTYPE
+ "isprint", # LC_CTYPE
+ "ispunct", # LC_CTYPE
+ "isspace", # LC_CTYPE
+ "isupper", # LC_CTYPE
+ "iswalnum", # LC_CTYPE
+ "iswalpha", # LC_CTYPE
+ "iswblank", # LC_CTYPE
+ "iswcntrl", # LC_CTYPE
+ "iswctype", # LC_CTYPE
+ "iswdigit", # LC_CTYPE
+ "iswgraph", # LC_CTYPE
+ "iswlower", # LC_CTYPE
+ "iswprint", # LC_CTYPE
+ "iswpunct", # LC_CTYPE
+ "iswspace", # LC_CTYPE
+ "iswupper", # LC_CTYPE
+ "iswxdigit", # LC_CTYPE
+ "isxdigit", # LC_CTYPE
+ "localeconv", # LC_NUMERIC + LC_MONETARY
+ "mblen", # LC_CTYPE
+ "mbrlen",
+ "mbrtowc",
+ "mbsinit",
+ "mbsnrtowcs",
+ "mbsrtowcs",
+ "mbstowcs", # LC_CTYPE
+ "mbtowc", # LC_CTYPE
+ "mktime",
+ "normalize", # boost::locale::normalize
+ "printf", # LC_NUMERIC
+ "putwc",
+ "putwchar",
+ "scanf", # LC_NUMERIC
+ "setlocale",
+ "snprintf",
+ "sprintf",
+ "sscanf",
+ "std::locale::global",
+ "std::to_string",
+ "stod",
+ "stof",
+ "stoi",
+ "stol",
+ "stold",
+ "stoll",
+ "stoul",
+ "stoull",
+ "strcasecmp",
+ "strcasestr",
+ "strcoll", # LC_COLLATE
+ #"strerror",
+ "strfmon",
+ "strftime", # LC_TIME
+ "strncasecmp",
+ "strptime",
+ "strtod", # LC_NUMERIC
+ "strtof",
+ "strtoimax",
+ "strtol", # LC_NUMERIC
+ "strtold",
+ "strtoll",
+ "strtoq",
+ "strtoul", # LC_NUMERIC
+ "strtoull",
+ "strtoumax",
+ "strtouq",
+ "strxfrm", # LC_COLLATE
+ "swprintf",
+ "to_lower", # boost::locale::to_lower
+ "to_title", # boost::locale::to_title
+ "to_upper", # boost::locale::to_upper
+ "tolower", # LC_CTYPE
+ "toupper", # LC_CTYPE
+ "towctrans",
+ "towlower", # LC_CTYPE
+ "towupper", # LC_CTYPE
+ "trim", # boost::algorithm::trim
+ "trim_left", # boost::algorithm::trim_left
+ "trim_right", # boost::algorithm::trim_right
+ "ungetwc",
+ "vasprintf",
+ "vdprintf",
+ "versionsort",
+ "vfprintf",
+ "vfscanf",
+ "vfwprintf",
+ "vprintf",
+ "vscanf",
+ "vsnprintf",
+ "vsprintf",
+ "vsscanf",
+ "vswprintf",
+ "vwprintf",
+ "wcrtomb",
+ "wcscasecmp",
+ "wcscoll", # LC_COLLATE
+ "wcsftime", # LC_TIME
+ "wcsncasecmp",
+ "wcsnrtombs",
+ "wcsrtombs",
+ "wcstod", # LC_NUMERIC
+ "wcstof",
+ "wcstoimax",
+ "wcstol", # LC_NUMERIC
+ "wcstold",
+ "wcstoll",
+ "wcstombs", # LC_CTYPE
+ "wcstoul", # LC_NUMERIC
+ "wcstoull",
+ "wcstoumax",
+ "wcswidth",
+ "wcsxfrm", # LC_COLLATE
+ "wctob",
+ "wctomb", # LC_CTYPE
+ "wctrans",
+ "wctype",
+ "wcwidth",
+ "wprintf"
+]
+
+
+def find_locale_dependent_function_uses():
+ regexp_locale_dependent_functions = "|".join(LOCALE_DEPENDENT_FUNCTIONS)
+ exclude_args = [":(exclude)" + excl for excl in REGEXP_EXTERNAL_DEPENDENCIES_EXCLUSIONS]
+ git_grep_command = ["git", "grep", "-E", "[^a-zA-Z0-9_\\`'\"<>](" + regexp_locale_dependent_functions + "(_r|_s)?)[^a-zA-Z0-9_\\`'\"<>]", "--", "*.cpp", "*.h"] + exclude_args
+ git_grep_output = list()
+
+ try:
+ git_grep_output = check_output(git_grep_command, universal_newlines=True, encoding="utf8").splitlines()
+ except CalledProcessError as e:
+ if e.returncode > 1:
+ raise e
+
+ return git_grep_output
+
+
+def main():
+ exit_code = 0
+
+ regexp_ignore_known_violations = "|".join(KNOWN_VIOLATIONS)
+ git_grep_output = find_locale_dependent_function_uses()
+
+ for locale_dependent_function in LOCALE_DEPENDENT_FUNCTIONS:
+ matches = [line for line in git_grep_output
+ if re.search("[^a-zA-Z0-9_\\`'\"<>]" + locale_dependent_function + "(_r|_s)?[^a-zA-Z0-9_\\`'\"<>]", line)
+ and not re.search("\\.(c|cpp|h):\\s*(//|\\*|/\\*|\").*" + locale_dependent_function, line)
+ and not re.search(regexp_ignore_known_violations, line)]
+ if matches:
+ print(f"The locale dependent function {locale_dependent_function}(...) appears to be used:")
+ for match in matches:
+ print(match)
+ print("")
+ exit_code = 1
+
+ if exit_code == 1:
+ print("Unnecessary locale depedence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible.\n")
+ print(f"Advice not applicable in this specific case? Add an exception by updating the ignore list in {sys.argv[0]}")
+
+ sys.exit(exit_code)
+
+
+if __name__ == "__main__":
+ main()