aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2018-06-07 08:55:26 +0200
committerWladimir J. van der Laan <laanwj@gmail.com>2018-06-07 08:56:46 +0200
commit5779dc4f76ad8a79d5e1f229a64beb5d7b8d7c84 (patch)
tree23c0aa9847f943d23ef9d04ad519ff87f50fe079
parente4082d59f53d25ccafb96fc829cadbdd13b25b16 (diff)
parent698cfd081144845f6246171b8a2a0cfa8daaecdb (diff)
Merge #13041: build: Add linter checking for accidental introduction of locale dependence
698cfd081144845f6246171b8a2a0cfa8daaecdb docs: Mention lint-locale-dependence.sh in developer-notes.md (practicalswift) 0a4ea2f4589961868ab4d25e0277485c31938e20 build: Add linter for checking accidental locale dependence (practicalswift) Pull request description: This linter will check for code accidentally introducing locale dependencies. Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. We should avoid using locale dependent functions if possible. Context: https://github.com/bitcoin/bitcoin/pull/12881#issuecomment-378564722 Example output: ``` $ contrib/devtools/lint-locale-dependence.sh The locale dependent function tolower(...) appears to be used: src/init.cpp: if (s[0] == '0' && std::tolower(s[1]) == 'x') { Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix. Please avoid using locale dependent functions if possible. Advice not applicable in this specific case? Add an exception by updating the ignore list in contrib/devtools/lint-locale-dependence.sh ``` **Note to reviewers:** What is the most appropriate `LOCALE_DEPENDENT_FUNCTIONS` function list? What should be added or removed? Tree-SHA512: 14e448828804bb02bf59070647e38b52fce120c700c903a4a8472769a2cee5dd529bd3fc182386993cb8720482cf4250b63a0a477db61b941ae4babe5c65025f
-rw-r--r--doc/developer-notes.md30
-rwxr-xr-xtest/lint/lint-locale-dependence.sh229
2 files changed, 258 insertions, 1 deletions
diff --git a/doc/developer-notes.md b/doc/developer-notes.md
index 9081cab911..8f6c662f19 100644
--- a/doc/developer-notes.md
+++ b/doc/developer-notes.md
@@ -499,7 +499,35 @@ Strings and formatting
- Use `ParseInt32`, `ParseInt64`, `ParseUInt32`, `ParseUInt64`, `ParseDouble` from `utilstrencodings.h` for number parsing
- - *Rationale*: These functions do overflow checking, and avoid pesky locale issues
+ - *Rationale*: These functions do overflow checking, and avoid pesky locale issues.
+
+- Avoid using locale dependent functions if possible. You can use the provided
+ [`lint-locale-dependence.sh`](/contrib/devtools/lint-locale-dependence.sh)
+ to check for accidental use of locale dependent functions.
+
+ - *Rationale*: Unnecessary locale dependence can cause bugs that are very tricky to isolate and fix.
+
+ - These functions are known to be locale dependent:
+ `alphasort`, `asctime`, `asprintf`, `atof`, `atoi`, `atol`, `atoll`, `atoq`,
+ `btowc`, `ctime`, `dprintf`, `fgetwc`, `fgetws`, `fprintf`, `fputwc`,
+ `fputws`, `fscanf`, `fwprintf`, `getdate`, `getwc`, `getwchar`, `isalnum`,
+ `isalpha`, `isblank`, `iscntrl`, `isdigit`, `isgraph`, `islower`, `isprint`,
+ `ispunct`, `isspace`, `isupper`, `iswalnum`, `iswalpha`, `iswblank`,
+ `iswcntrl`, `iswctype`, `iswdigit`, `iswgraph`, `iswlower`, `iswprint`,
+ `iswpunct`, `iswspace`, `iswupper`, `iswxdigit`, `isxdigit`, `mblen`,
+ `mbrlen`, `mbrtowc`, `mbsinit`, `mbsnrtowcs`, `mbsrtowcs`, `mbstowcs`,
+ `mbtowc`, `mktime`, `putwc`, `putwchar`, `scanf`, `snprintf`, `sprintf`,
+ `sscanf`, `stoi`, `stol`, `stoll`, `strcasecmp`, `strcasestr`, `strcoll`,
+ `strfmon`, `strftime`, `strncasecmp`, `strptime`, `strtod`, `strtof`,
+ `strtoimax`, `strtol`, `strtold`, `strtoll`, `strtoq`, `strtoul`,
+ `strtoull`, `strtoumax`, `strtouq`, `strxfrm`, `swprintf`, `tolower`,
+ `toupper`, `towctrans`, `towlower`, `towupper`, `ungetwc`, `vasprintf`,
+ `vdprintf`, `versionsort`, `vfprintf`, `vfscanf`, `vfwprintf`, `vprintf`,
+ `vscanf`, `vsnprintf`, `vsprintf`, `vsscanf`, `vswprintf`, `vwprintf`,
+ `wcrtomb`, `wcscasecmp`, `wcscoll`, `wcsftime`, `wcsncasecmp`, `wcsnrtombs`,
+ `wcsrtombs`, `wcstod`, `wcstof`, `wcstoimax`, `wcstol`, `wcstold`,
+ `wcstoll`, `wcstombs`, `wcstoul`, `wcstoull`, `wcstoumax`, `wcswidth`,
+ `wcsxfrm`, `wctob`, `wctomb`, `wctrans`, `wctype`, `wcwidth`, `wprintf`
- For `strprintf`, `LogPrint`, `LogPrintf` formatting characters don't need size specifiers
diff --git a/test/lint/lint-locale-dependence.sh b/test/lint/lint-locale-dependence.sh
new file mode 100755
index 0000000000..3144f2c841
--- /dev/null
+++ b/test/lint/lint-locale-dependence.sh
@@ -0,0 +1,229 @@
+#!/bin/bash
+
+KNOWN_VIOLATIONS=(
+ "src/base58.cpp:.*isspace"
+ "src/bitcoin-tx.cpp.*stoul"
+ "src/bitcoin-tx.cpp.*trim_right"
+ "src/bitcoin-tx.cpp:.*atoi"
+ "src/core_read.cpp.*is_digit"
+ "src/dbwrapper.cpp.*stoul"
+ "src/dbwrapper.cpp:.*vsnprintf"
+ "src/httprpc.cpp.*trim"
+ "src/init.cpp:.*atoi"
+ "src/netbase.cpp.*to_lower"
+ "src/qt/rpcconsole.cpp:.*atoi"
+ "src/qt/rpcconsole.cpp:.*isdigit"
+ "src/rest.cpp:.*strtol"
+ "src/rpc/server.cpp.*to_upper"
+ "src/test/dbwrapper_tests.cpp:.*snprintf"
+ "src/test/getarg_tests.cpp.*split"
+ "src/torcontrol.cpp:.*atoi"
+ "src/torcontrol.cpp:.*strtol"
+ "src/uint256.cpp:.*isspace"
+ "src/uint256.cpp:.*tolower"
+ "src/util.cpp:.*atoi"
+ "src/util.cpp:.*fprintf"
+ "src/util.cpp:.*tolower"
+ "src/utilmoneystr.cpp:.*isdigit"
+ "src/utilmoneystr.cpp:.*isspace"
+ "src/utilstrencodings.cpp:.*atoi"
+ "src/utilstrencodings.cpp:.*isspace"
+ "src/utilstrencodings.cpp:.*strtol"
+ "src/utilstrencodings.cpp:.*strtoll"
+ "src/utilstrencodings.cpp:.*strtoul"
+ "src/utilstrencodings.cpp:.*strtoull"
+ "src/utilstrencodings.h:.*atoi"
+)
+
+REGEXP_IGNORE_EXTERNAL_DEPENDENCIES="^src/(crypto/ctaes/|leveldb/|secp256k1/|tinyformat.h|univalue/)"
+
+LOCALE_DEPENDENT_FUNCTIONS=(
+ alphasort # LC_COLLATE (via strcoll)
+ asctime # LC_TIME (directly)
+ asprintf # (via vasprintf)
+ atof # LC_NUMERIC (via strtod)
+ atoi # LC_NUMERIC (via strtol)
+ atol # LC_NUMERIC (via strtol)
+ atoll # (via strtoll)
+ atoq
+ btowc # LC_CTYPE (directly)
+ ctime # (via asctime or localtime)
+ dprintf # (via vdprintf)
+ fgetwc
+ fgetws
+ fold_case # boost::locale::fold_case
+ fprintf # (via vfprintf)
+ fputwc
+ fputws
+ fscanf # (via __vfscanf)
+ fwprintf # (via __vfwprintf)
+ getdate # via __getdate_r => isspace // __localtime_r
+ getwc
+ getwchar
+ is_digit # boost::algorithm::is_digit
+ is_space # boost::algorithm::is_space
+ isalnum # LC_CTYPE
+ isalpha # LC_CTYPE
+ isblank # LC_CTYPE
+ iscntrl # LC_CTYPE
+ isctype # LC_CTYPE
+ isdigit # LC_CTYPE
+ isgraph # LC_CTYPE
+ islower # LC_CTYPE
+ isprint # LC_CTYPE
+ ispunct # LC_CTYPE
+ isspace # LC_CTYPE
+ isupper # LC_CTYPE
+ iswalnum # LC_CTYPE
+ iswalpha # LC_CTYPE
+ iswblank # LC_CTYPE
+ iswcntrl # LC_CTYPE
+ iswctype # LC_CTYPE
+ iswdigit # LC_CTYPE
+ iswgraph # LC_CTYPE
+ iswlower # LC_CTYPE
+ iswprint # LC_CTYPE
+ iswpunct # LC_CTYPE
+ iswspace # LC_CTYPE
+ iswupper # LC_CTYPE
+ iswxdigit # LC_CTYPE
+ isxdigit # LC_CTYPE
+ localeconv # LC_NUMERIC + LC_MONETARY
+ mblen # LC_CTYPE
+ mbrlen
+ mbrtowc
+ mbsinit
+ mbsnrtowcs
+ mbsrtowcs
+ mbstowcs # LC_CTYPE
+ mbtowc # LC_CTYPE
+ mktime
+ normalize # boost::locale::normalize
+# printf # LC_NUMERIC
+ putwc
+ putwchar
+ scanf # LC_NUMERIC
+ setlocale
+ snprintf
+ sprintf
+ sscanf
+ stod
+ stof
+ stoi
+ stol
+ stold
+ stoll
+ stoul
+ stoull
+ strcasecmp
+ strcasestr
+ strcoll # LC_COLLATE
+# strerror
+ strfmon
+ strftime # LC_TIME
+ strncasecmp
+ strptime
+ strtod # LC_NUMERIC
+ strtof
+ strtoimax
+ strtol # LC_NUMERIC
+ strtold
+ strtoll
+ strtoq
+ strtoul # LC_NUMERIC
+ strtoull
+ strtoumax
+ strtouq
+ strxfrm # LC_COLLATE
+ swprintf
+ to_lower # boost::locale::to_lower
+ to_title # boost::locale::to_title
+ to_upper # boost::locale::to_upper
+ tolower # LC_CTYPE
+ toupper # LC_CTYPE
+ towctrans
+ towlower # LC_CTYPE
+ towupper # LC_CTYPE
+ trim # boost::algorithm::trim
+ trim_left # boost::algorithm::trim_left
+ trim_right # boost::algorithm::trim_right
+ ungetwc
+ vasprintf
+ vdprintf
+ versionsort
+ vfprintf
+ vfscanf
+ vfwprintf
+ vprintf
+ vscanf
+ vsnprintf
+ vsprintf
+ vsscanf
+ vswprintf
+ vwprintf
+ wcrtomb
+ wcscasecmp
+ wcscoll # LC_COLLATE
+ wcsftime # LC_TIME
+ wcsncasecmp
+ wcsnrtombs
+ wcsrtombs
+ wcstod # LC_NUMERIC
+ wcstof
+ wcstoimax
+ wcstol # LC_NUMERIC
+ wcstold
+ wcstoll
+ wcstombs # LC_CTYPE
+ wcstoul # LC_NUMERIC
+ wcstoull
+ wcstoumax
+ wcswidth
+ wcsxfrm # LC_COLLATE
+ wctob
+ wctomb # LC_CTYPE
+ wctrans
+ wctype
+ wcwidth
+ wprintf
+)
+
+function join_array {
+ local IFS="$1"
+ shift
+ echo "$*"
+}
+
+REGEXP_IGNORE_KNOWN_VIOLATIONS=$(join_array "|" "${KNOWN_VIOLATIONS[@]}")
+
+# Invoke "git grep" only once in order to minimize run-time
+REGEXP_LOCALE_DEPENDENT_FUNCTIONS=$(join_array "|" "${LOCALE_DEPENDENT_FUNCTIONS[@]}")
+GIT_GREP_OUTPUT=$(git grep -E "[^a-zA-Z0-9_\`'\"<>](${REGEXP_LOCALE_DEPENDENT_FUNCTIONS}(|_r|_s))[^a-zA-Z0-9_\`'\"<>]" -- "*.cpp" "*.h")
+
+EXIT_CODE=0
+for LOCALE_DEPENDENT_FUNCTION in "${LOCALE_DEPENDENT_FUNCTIONS[@]}"; do
+ MATCHES=$(grep -E "[^a-zA-Z0-9_\`'\"<>]${LOCALE_DEPENDENT_FUNCTION}(|_r|_s)[^a-zA-Z0-9_\`'\"<>]" <<< "${GIT_GREP_OUTPUT}" | \
+ grep -vE "\.(c|cpp|h):\s*(//|\*|/\*|\").*${LOCALE_DEPENDENT_FUNCTION}" | \
+ grep -vE 'fprintf\(.*(stdout|stderr)')
+ if [[ ${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES} != "" ]]; then
+ MATCHES=$(grep -vE "${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES}" <<< "${MATCHES}")
+ fi
+ if [[ ${REGEXP_IGNORE_KNOWN_VIOLATIONS} != "" ]]; then
+ MATCHES=$(grep -vE "${REGEXP_IGNORE_KNOWN_VIOLATIONS}" <<< "${MATCHES}")
+ fi
+ if [[ ${MATCHES} != "" ]]; then
+ echo "The locale dependent function ${LOCALE_DEPENDENT_FUNCTION}(...) appears to be used:"
+ echo "${MATCHES}"
+ echo
+ EXIT_CODE=1
+ fi
+done
+if [[ ${EXIT_CODE} != 0 ]]; then
+ echo "Unnecessary locale dependence can cause bugs that are very"
+ echo "tricky to isolate and fix. Please avoid using locale dependent"
+ echo "functions if possible."
+ echo
+ echo "Advice not applicable in this specific case? Add an exception"
+ echo "by updating the ignore list in $0"
+fi
+exit ${EXIT_CODE}