aboutsummaryrefslogtreecommitdiff
path: root/test/lint/lint-locale-dependence.sh
blob: 0b839824d55e69fb24e513623260b00f358e5d13 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
#!/usr/bin/env bash
# Copyright (c) 2018-2020 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.

export LC_ALL=C

# Be aware that bitcoind and bitcoin-qt differ in terms of localization: Qt
# opts in to POSIX localization by running setlocale(LC_ALL, "") on startup,
# whereas no such call is made in bitcoind.
#
# Qt runs setlocale(LC_ALL, "") on initialization. This installs the locale
# specified by the user's LC_ALL (or LC_*) environment variable as the new
# C locale.
#
# In contrast, bitcoind does not opt in to localization -- no call to
# setlocale(LC_ALL, "") is made and the environment variables LC_* are
# thus ignored.
#
# This results in situations where bitcoind is guaranteed to be running
# with the classic locale ("C") whereas the locale of bitcoin-qt will vary
# depending on the user's environment variables.
#
# An example: Assuming the environment variable LC_ALL=de_DE then the
# call std::to_string(1.23) will return "1.230000" in bitcoind but
# "1,230000" in bitcoin-qt.
#
# From the Qt documentation:
# "On Unix/Linux Qt is configured to use the system locale settings by default.
#  This can cause a conflict when using POSIX functions, for instance, when
#  converting between data types such as floats and strings, since the notation
#  may differ between locales. To get around this problem, call the POSIX function
#  setlocale(LC_NUMERIC,"C") right after initializing QApplication, QGuiApplication
#  or QCoreApplication to reset the locale that is used for number formatting to
#  "C"-locale."
#
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
# https://stackoverflow.com/a/34878283 for more details.

KNOWN_VIOLATIONS=(
    "src/bitcoin-tx.cpp.*stoul"
    "src/bitcoin-tx.cpp.*trim_right"
    "src/dbwrapper.cpp.*stoul"
    "src/dbwrapper.cpp:.*vsnprintf"
    "src/httprpc.cpp.*trim"
    "src/init.cpp:.*atoi"
    "src/qt/rpcconsole.cpp:.*atoi"
    "src/rest.cpp:.*strtol"
    "src/test/dbwrapper_tests.cpp:.*snprintf"
    "src/test/fuzz/locale.cpp"
    "src/test/fuzz/parse_numbers.cpp:.*atoi"
    "src/torcontrol.cpp:.*atoi"
    "src/torcontrol.cpp:.*strtol"
    "src/util/strencodings.cpp:.*atoi"
    "src/util/strencodings.cpp:.*strtol"
    "src/util/strencodings.cpp:.*strtoul"
    "src/util/strencodings.h:.*atoi"
    "src/util/system.cpp:.*atoi"
)

REGEXP_IGNORE_EXTERNAL_DEPENDENCIES="^src/(crypto/ctaes/|leveldb/|secp256k1/|tinyformat.h|univalue/)"

LOCALE_DEPENDENT_FUNCTIONS=(
    alphasort    # LC_COLLATE (via strcoll)
    asctime      # LC_TIME (directly)
    asprintf     # (via vasprintf)
    atof         # LC_NUMERIC (via strtod)
    atoi         # LC_NUMERIC (via strtol)
    atol         # LC_NUMERIC (via strtol)
    atoll        # (via strtoll)
    atoq
    btowc        # LC_CTYPE (directly)
    ctime        # (via asctime or localtime)
    dprintf      # (via vdprintf)
    fgetwc
    fgetws
    fold_case    # boost::locale::fold_case
    fprintf      # (via vfprintf)
    fputwc
    fputws
    fscanf       # (via __vfscanf)
    fwprintf     # (via __vfwprintf)
    getdate      # via __getdate_r => isspace // __localtime_r
    getwc
    getwchar
    is_digit     # boost::algorithm::is_digit
    is_space     # boost::algorithm::is_space
    isalnum      # LC_CTYPE
    isalpha      # LC_CTYPE
    isblank      # LC_CTYPE
    iscntrl      # LC_CTYPE
    isctype      # LC_CTYPE
    isdigit      # LC_CTYPE
    isgraph      # LC_CTYPE
    islower      # LC_CTYPE
    isprint      # LC_CTYPE
    ispunct      # LC_CTYPE
    isspace      # LC_CTYPE
    isupper      # LC_CTYPE
    iswalnum     # LC_CTYPE
    iswalpha     # LC_CTYPE
    iswblank     # LC_CTYPE
    iswcntrl     # LC_CTYPE
    iswctype     # LC_CTYPE
    iswdigit     # LC_CTYPE
    iswgraph     # LC_CTYPE
    iswlower     # LC_CTYPE
    iswprint     # LC_CTYPE
    iswpunct     # LC_CTYPE
    iswspace     # LC_CTYPE
    iswupper     # LC_CTYPE
    iswxdigit    # LC_CTYPE
    isxdigit     # LC_CTYPE
    localeconv   # LC_NUMERIC + LC_MONETARY
    mblen        # LC_CTYPE
    mbrlen
    mbrtowc
    mbsinit
    mbsnrtowcs
    mbsrtowcs
    mbstowcs     # LC_CTYPE
    mbtowc       # LC_CTYPE
    mktime
    normalize    # boost::locale::normalize
    printf       # LC_NUMERIC
    putwc
    putwchar
    scanf        # LC_NUMERIC
    setlocale
    snprintf
    sprintf
    sscanf
    std::to_string
    stod
    stof
    stoi
    stol
    stold
    stoll
    stoul
    stoull
    strcasecmp
    strcasestr
    strcoll      # LC_COLLATE
#   strerror
    strfmon
    strftime     # LC_TIME
    strncasecmp
    strptime
    strtod       # LC_NUMERIC
    strtof
    strtoimax
    strtol       # LC_NUMERIC
    strtold
    strtoll
    strtoq
    strtoul      # LC_NUMERIC
    strtoull
    strtoumax
    strtouq
    strxfrm      # LC_COLLATE
    swprintf
    to_lower     # boost::locale::to_lower
    to_title     # boost::locale::to_title
    to_upper     # boost::locale::to_upper
    tolower      # LC_CTYPE
    toupper      # LC_CTYPE
    towctrans
    towlower     # LC_CTYPE
    towupper     # LC_CTYPE
    trim         # boost::algorithm::trim
    trim_left    # boost::algorithm::trim_left
    trim_right   # boost::algorithm::trim_right
    ungetwc
    vasprintf
    vdprintf
    versionsort
    vfprintf
    vfscanf
    vfwprintf
    vprintf
    vscanf
    vsnprintf
    vsprintf
    vsscanf
    vswprintf
    vwprintf
    wcrtomb
    wcscasecmp
    wcscoll      # LC_COLLATE
    wcsftime     # LC_TIME
    wcsncasecmp
    wcsnrtombs
    wcsrtombs
    wcstod       # LC_NUMERIC
    wcstof
    wcstoimax
    wcstol       # LC_NUMERIC
    wcstold
    wcstoll
    wcstombs     # LC_CTYPE
    wcstoul      # LC_NUMERIC
    wcstoull
    wcstoumax
    wcswidth
    wcsxfrm      # LC_COLLATE
    wctob
    wctomb       # LC_CTYPE
    wctrans
    wctype
    wcwidth
    wprintf
)

function join_array {
    local IFS="$1"
    shift
    echo "$*"
}

REGEXP_IGNORE_KNOWN_VIOLATIONS=$(join_array "|" "${KNOWN_VIOLATIONS[@]}")

# Invoke "git grep" only once in order to minimize run-time
REGEXP_LOCALE_DEPENDENT_FUNCTIONS=$(join_array "|" "${LOCALE_DEPENDENT_FUNCTIONS[@]}")
GIT_GREP_OUTPUT=$(git grep -E "[^a-zA-Z0-9_\`'\"<>](${REGEXP_LOCALE_DEPENDENT_FUNCTIONS}(_r|_s)?)[^a-zA-Z0-9_\`'\"<>]" -- "*.cpp" "*.h")

EXIT_CODE=0
for LOCALE_DEPENDENT_FUNCTION in "${LOCALE_DEPENDENT_FUNCTIONS[@]}"; do
    MATCHES=$(grep -E "[^a-zA-Z0-9_\`'\"<>]${LOCALE_DEPENDENT_FUNCTION}(_r|_s)?[^a-zA-Z0-9_\`'\"<>]" <<< "${GIT_GREP_OUTPUT}" | \
        grep -vE "\.(c|cpp|h):\s*(//|\*|/\*|\").*${LOCALE_DEPENDENT_FUNCTION}")
    if [[ ${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES} != "" ]]; then
        MATCHES=$(grep -vE "${REGEXP_IGNORE_EXTERNAL_DEPENDENCIES}" <<< "${MATCHES}")
    fi
    if [[ ${REGEXP_IGNORE_KNOWN_VIOLATIONS} != "" ]]; then
        MATCHES=$(grep -vE "${REGEXP_IGNORE_KNOWN_VIOLATIONS}" <<< "${MATCHES}")
    fi
    if [[ ${MATCHES} != "" ]]; then
        echo "The locale dependent function ${LOCALE_DEPENDENT_FUNCTION}(...) appears to be used:"
        echo "${MATCHES}"
        echo
        EXIT_CODE=1
    fi
done
if [[ ${EXIT_CODE} != 0 ]]; then
    echo "Unnecessary locale dependence can cause bugs that are very"
    echo "tricky to isolate and fix. Please avoid using locale dependent"
    echo "functions if possible."
    echo
    echo "Advice not applicable in this specific case? Add an exception"
    echo "by updating the ignore list in $0"
fi
exit ${EXIT_CODE}