aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpracticalswift <practicalswift@users.noreply.github.com>2021-09-30 14:18:50 +0000
committerpracticalswift <practicalswift@users.noreply.github.com>2021-09-30 14:21:17 +0000
commit4343f114cc661cf031ec915538c11b9b030e2e15 (patch)
tree0b0cc9ec8fbea9cc65490dd2ae437478b78c3a72
parent2d8e0c0c3c0d3c4cee7bb52d1edf501f40c53463 (diff)
Replace use of locale dependent atoi(…) with locale-independent std::from_chars(…) (C++17)
test: Add test cases for LocaleIndependentAtoi fuzz: Assert legacy atoi(s) == LocaleIndependentAtoi<int>(s) fuzz: Assert legacy atoi64(s) == LocaleIndependentAtoi<int64_t>(s)
-rw-r--r--src/core_read.cpp2
-rw-r--r--src/node/blockstorage.cpp2
-rw-r--r--src/qt/rpcconsole.cpp2
-rw-r--r--src/rpc/mining.cpp2
-rw-r--r--src/test/fuzz/locale.cpp6
-rw-r--r--src/test/fuzz/parse_numbers.cpp4
-rw-r--r--src/test/fuzz/string.cpp24
-rw-r--r--src/test/util_tests.cpp71
-rw-r--r--src/torcontrol.cpp2
-rw-r--r--src/util/moneystr.cpp3
-rw-r--r--src/util/strencodings.cpp14
-rw-r--r--src/util/strencodings.h30
-rw-r--r--src/util/system.cpp20
-rw-r--r--src/wallet/transaction.h4
-rwxr-xr-xtest/lint/lint-locale-dependence.sh11
15 files changed, 145 insertions, 52 deletions
diff --git a/src/core_read.cpp b/src/core_read.cpp
index 6108961010..320811b9e9 100644
--- a/src/core_read.cpp
+++ b/src/core_read.cpp
@@ -69,7 +69,7 @@ CScript ParseScript(const std::string& s)
(w->front() == '-' && w->size() > 1 && std::all_of(w->begin()+1, w->end(), ::IsDigit)))
{
// Number
- int64_t n = atoi64(*w);
+ int64_t n = LocaleIndependentAtoi<int64_t>(*w);
//limit the range of numbers ParseScript accepts in decimal
//since numbers outside -0xFFFFFFFF...0xFFFFFFFF are illegal in scripts
diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp
index 5ddcf95c84..104fe8bf01 100644
--- a/src/node/blockstorage.cpp
+++ b/src/node/blockstorage.cpp
@@ -85,7 +85,7 @@ void CleanupBlockRevFiles()
// start removing block files.
int nContigCounter = 0;
for (const std::pair<const std::string, fs::path>& item : mapBlockFiles) {
- if (atoi(item.first) == nContigCounter) {
+ if (LocaleIndependentAtoi<int>(item.first) == nContigCounter) {
nContigCounter++;
continue;
}
diff --git a/src/qt/rpcconsole.cpp b/src/qt/rpcconsole.cpp
index 1c8ed22ada..3c0dc5aa40 100644
--- a/src/qt/rpcconsole.cpp
+++ b/src/qt/rpcconsole.cpp
@@ -250,7 +250,7 @@ bool RPCConsole::RPCParseCommandLine(interfaces::Node* node, std::string &strRes
for(char argch: curarg)
if (!IsDigit(argch))
throw std::runtime_error("Invalid result query");
- subelement = lastResult[atoi(curarg.c_str())];
+ subelement = lastResult[LocaleIndependentAtoi<int>(curarg)];
}
else if (lastResult.isObject())
subelement = find_value(lastResult, curarg);
diff --git a/src/rpc/mining.cpp b/src/rpc/mining.cpp
index 066a60b71b..8cad51a710 100644
--- a/src/rpc/mining.cpp
+++ b/src/rpc/mining.cpp
@@ -702,7 +702,7 @@ static RPCHelpMan getblocktemplate()
std::string lpstr = lpval.get_str();
hashWatchedChain = ParseHashV(lpstr.substr(0, 64), "longpollid");
- nTransactionsUpdatedLastLP = atoi64(lpstr.substr(64));
+ nTransactionsUpdatedLastLP = LocaleIndependentAtoi<int64_t>(lpstr.substr(64));
}
else
{
diff --git a/src/test/fuzz/locale.cpp b/src/test/fuzz/locale.cpp
index 5b1acae57b..4ad8123554 100644
--- a/src/test/fuzz/locale.cpp
+++ b/src/test/fuzz/locale.cpp
@@ -50,8 +50,6 @@ FUZZ_TARGET(locale)
const bool parseint32_without_locale = ParseInt32(random_string, &parseint32_out_without_locale);
int64_t parseint64_out_without_locale;
const bool parseint64_without_locale = ParseInt64(random_string, &parseint64_out_without_locale);
- const int64_t atoi64_without_locale = atoi64(random_string);
- const int atoi_without_locale = atoi(random_string);
const int64_t random_int64 = fuzzed_data_provider.ConsumeIntegral<int64_t>();
const std::string tostring_without_locale = ToString(random_int64);
// The variable `random_int32` is no longer used, but the harness still needs to
@@ -77,10 +75,6 @@ FUZZ_TARGET(locale)
if (parseint64_without_locale) {
assert(parseint64_out_without_locale == parseint64_out_with_locale);
}
- const int64_t atoi64_with_locale = atoi64(random_string);
- assert(atoi64_without_locale == atoi64_with_locale);
- const int atoi_with_locale = atoi(random_string);
- assert(atoi_without_locale == atoi_with_locale);
const std::string tostring_with_locale = ToString(random_int64);
assert(tostring_without_locale == tostring_with_locale);
const std::string strprintf_int_with_locale = strprintf("%d", random_int64);
diff --git a/src/test/fuzz/parse_numbers.cpp b/src/test/fuzz/parse_numbers.cpp
index 69e58c3f63..6a302e1e06 100644
--- a/src/test/fuzz/parse_numbers.cpp
+++ b/src/test/fuzz/parse_numbers.cpp
@@ -25,13 +25,13 @@ FUZZ_TARGET(parse_numbers)
int32_t i32;
(void)ParseInt32(random_string, &i32);
- (void)atoi(random_string);
+ (void)LocaleIndependentAtoi<int>(random_string);
uint32_t u32;
(void)ParseUInt32(random_string, &u32);
int64_t i64;
- (void)atoi64(random_string);
+ (void)LocaleIndependentAtoi<int64_t>(random_string);
(void)ParseFixedPoint(random_string, 3, &i64);
(void)ParseInt64(random_string, &i64);
diff --git a/src/test/fuzz/string.cpp b/src/test/fuzz/string.cpp
index dc2bf7c860..ab646c68fc 100644
--- a/src/test/fuzz/string.cpp
+++ b/src/test/fuzz/string.cpp
@@ -122,6 +122,12 @@ bool LegacyParseUInt64(const std::string& str, uint64_t* out)
return endp && *endp == 0 && !errno &&
n <= std::numeric_limits<uint64_t>::max();
}
+
+// For backwards compatibility checking.
+int64_t atoi64_legacy(const std::string& str)
+{
+ return strtoll(str.c_str(), nullptr, 10);
+}
}; // namespace
FUZZ_TARGET(string)
@@ -268,4 +274,22 @@ FUZZ_TARGET(string)
assert(u8 == u8_legacy);
}
}
+
+ {
+ const int atoi_result = atoi(random_string_1.c_str());
+ const int locale_independent_atoi_result = LocaleIndependentAtoi<int>(random_string_1);
+ const int64_t atoi64_result = atoi64_legacy(random_string_1);
+ const bool out_of_range = atoi64_result < std::numeric_limits<int>::min() || atoi64_result > std::numeric_limits<int>::max();
+ if (out_of_range) {
+ assert(locale_independent_atoi_result == 0);
+ } else {
+ assert(atoi_result == locale_independent_atoi_result);
+ }
+ }
+
+ {
+ const int64_t atoi64_result = atoi64_legacy(random_string_1);
+ const int64_t locale_independent_atoi_result = LocaleIndependentAtoi<int64_t>(random_string_1);
+ assert(atoi64_result == locale_independent_atoi_result || locale_independent_atoi_result == 0);
+ }
}
diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp
index a13700d733..5d1ad2ebf6 100644
--- a/src/test/util_tests.cpp
+++ b/src/test/util_tests.cpp
@@ -1549,6 +1549,77 @@ BOOST_AUTO_TEST_CASE(test_ToIntegral)
BOOST_CHECK(!ToIntegral<uint8_t>("256"));
}
+BOOST_AUTO_TEST_CASE(test_LocaleIndependentAtoi)
+{
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1234"), 1'234);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("0"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("01234"), 1'234);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1234"), -1'234);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" 1"), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1 "), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1a"), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1.1"), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1.9"), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+01.9"), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1"), -1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" -1"), -1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1 "), -1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" -1 "), -1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+1"), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" +1"), 1);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" +1 "), 1);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+-1"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-+1"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("++1"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("--1"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(""), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("aap"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("0x1"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-32482348723847471234"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("32482348723847471234"), 0);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775809"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775808"), -9'223'372'036'854'775'807LL - 1LL);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775807"), 9'223'372'036'854'775'807);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775808"), 0);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("-1"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("0"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551615"), 18'446'744'073'709'551'615ULL);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551616"), 0U);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483649"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483648"), -2'147'483'648LL);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483647"), 2'147'483'647);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483648"), 0);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("-1"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("0"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967295"), 4'294'967'295U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967296"), 0U);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32769"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32768"), -32'768);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32767"), 32'767);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32768"), 0);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("-1"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("0"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65535"), 65'535U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65536"), 0U);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-129"), 0);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-128"), -128);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("127"), 127);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("128"), 0);
+
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("-1"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("0"), 0U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("255"), 255U);
+ BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("256"), 0U);
+}
+
BOOST_AUTO_TEST_CASE(test_ParseInt64)
{
int64_t n;
diff --git a/src/torcontrol.cpp b/src/torcontrol.cpp
index bb296456ba..4bd19dcf45 100644
--- a/src/torcontrol.cpp
+++ b/src/torcontrol.cpp
@@ -83,7 +83,7 @@ void TorControlConnection::readcb(struct bufferevent *bev, void *ctx)
if (s.size() < 4) // Short line
continue;
// <status>(-|+| )<data><CRLF>
- self->message.code = atoi(s.substr(0,3));
+ self->message.code = LocaleIndependentAtoi<int>(s.substr(0,3));
self->message.lines.push_back(s.substr(4));
char ch = s[3]; // '-','+' or ' '
if (ch == ' ') {
diff --git a/src/util/moneystr.cpp b/src/util/moneystr.cpp
index d3f4029607..95a919ad12 100644
--- a/src/util/moneystr.cpp
+++ b/src/util/moneystr.cpp
@@ -77,8 +77,7 @@ std::optional<CAmount> ParseMoney(const std::string& money_string)
return std::nullopt;
if (nUnits < 0 || nUnits > COIN)
return std::nullopt;
- int64_t nWhole = atoi64(strWhole);
-
+ int64_t nWhole = LocaleIndependentAtoi<int64_t>(strWhole);
CAmount value = nWhole * COIN + nUnits;
if (!MoneyRange(value)) {
diff --git a/src/util/strencodings.cpp b/src/util/strencodings.cpp
index 0aa80ea0ae..88fec6740a 100644
--- a/src/util/strencodings.cpp
+++ b/src/util/strencodings.cpp
@@ -403,20 +403,6 @@ std::string FormatParagraph(const std::string& in, size_t width, size_t indent)
return out.str();
}
-int64_t atoi64(const std::string& str)
-{
-#ifdef _MSC_VER
- return _atoi64(str.c_str());
-#else
- return strtoll(str.c_str(), nullptr, 10);
-#endif
-}
-
-int atoi(const std::string& str)
-{
- return atoi(str.c_str());
-}
-
/** Upper bound for mantissa.
* 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer.
* Larger integers cannot consist of arbitrary combinations of 0-9:
diff --git a/src/util/strencodings.h b/src/util/strencodings.h
index 1217572c45..166352c42f 100644
--- a/src/util/strencodings.h
+++ b/src/util/strencodings.h
@@ -11,6 +11,7 @@
#include <attributes.h>
#include <span.h>
+#include <util/string.h>
#include <charconv>
#include <cstdint>
@@ -68,8 +69,33 @@ std::string EncodeBase32(Span<const unsigned char> input, bool pad = true);
std::string EncodeBase32(const std::string& str, bool pad = true);
void SplitHostPort(std::string in, uint16_t& portOut, std::string& hostOut);
-int64_t atoi64(const std::string& str);
-int atoi(const std::string& str);
+
+// LocaleIndependentAtoi is provided for backwards compatibility reasons.
+//
+// New code should use the ParseInt64/ParseUInt64/ParseInt32/ParseUInt32 functions
+// which provide parse error feedback.
+//
+// The goal of LocaleIndependentAtoi is to replicate the exact defined behaviour
+// of atoi and atoi64 as they behave under the "C" locale.
+template <typename T>
+T LocaleIndependentAtoi(const std::string& str)
+{
+ static_assert(std::is_integral<T>::value);
+ T result;
+ // Emulate atoi(...) handling of white space and leading +/-.
+ std::string s = TrimString(str);
+ if (!s.empty() && s[0] == '+') {
+ if (s.length() >= 2 && s[1] == '-') {
+ return 0;
+ }
+ s = s.substr(1);
+ }
+ auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
+ if (error_condition != std::errc{}) {
+ return 0;
+ }
+ return result;
+}
/**
* Tests if the given character is a decimal digit.
diff --git a/src/util/system.cpp b/src/util/system.cpp
index 4defeed4ce..79c08816fa 100644
--- a/src/util/system.cpp
+++ b/src/util/system.cpp
@@ -158,16 +158,14 @@ std::streampos GetFileSize(const char* path, std::streamsize max) {
/**
* Interpret a string argument as a boolean.
*
- * The definition of atoi() requires that non-numeric string values like "foo",
- * return 0. This means that if a user unintentionally supplies a non-integer
- * argument here, the return value is always false. This means that -foo=false
- * does what the user probably expects, but -foo=true is well defined but does
- * not do what they probably expected.
+ * The definition of LocaleIndependentAtoi<int>() requires that non-numeric string values
+ * like "foo", return 0. This means that if a user unintentionally supplies a
+ * non-integer argument here, the return value is always false. This means that
+ * -foo=false does what the user probably expects, but -foo=true is well defined
+ * but does not do what they probably expected.
*
- * The return value of atoi() is undefined when given input not representable as
- * an int. On most systems this means string value between "-2147483648" and
- * "2147483647" are well defined (this method will return true). Setting
- * -txindex=2147483648 on most systems, however, is probably undefined.
+ * The return value of LocaleIndependentAtoi<int>(...) is zero when given input not
+ * representable as an int.
*
* For a more extensive discussion of this topic (and a wide range of opinions
* on the Right Way to change this code), see PR12713.
@@ -176,7 +174,7 @@ static bool InterpretBool(const std::string& strValue)
{
if (strValue.empty())
return true;
- return (atoi(strValue) != 0);
+ return (LocaleIndependentAtoi<int>(strValue) != 0);
}
static std::string SettingName(const std::string& arg)
@@ -594,7 +592,7 @@ std::string ArgsManager::GetArg(const std::string& strArg, const std::string& st
int64_t ArgsManager::GetIntArg(const std::string& strArg, int64_t nDefault) const
{
const util::SettingsValue value = GetSetting(strArg);
- return value.isNull() ? nDefault : value.isFalse() ? 0 : value.isTrue() ? 1 : value.isNum() ? value.get_int64() : atoi64(value.get_str());
+ return value.isNull() ? nDefault : value.isFalse() ? 0 : value.isTrue() ? 1 : value.isNum() ? value.get_int64() : LocaleIndependentAtoi<int64_t>(value.get_str());
}
bool ArgsManager::GetBoolArg(const std::string& strArg, bool fDefault) const
diff --git a/src/wallet/transaction.h b/src/wallet/transaction.h
index 0cd91b9ebe..223901586e 100644
--- a/src/wallet/transaction.h
+++ b/src/wallet/transaction.h
@@ -216,9 +216,9 @@ public:
}
const auto it_op = mapValue.find("n");
- nOrderPos = (it_op != mapValue.end()) ? atoi64(it_op->second) : -1;
+ nOrderPos = (it_op != mapValue.end()) ? LocaleIndependentAtoi<int64_t>(it_op->second) : -1;
const auto it_ts = mapValue.find("timesmart");
- nTimeSmart = (it_ts != mapValue.end()) ? static_cast<unsigned int>(atoi64(it_ts->second)) : 0;
+ nTimeSmart = (it_ts != mapValue.end()) ? static_cast<unsigned int>(LocaleIndependentAtoi<int64_t>(it_ts->second)) : 0;
mapValue.erase("fromaccount");
mapValue.erase("spent");
diff --git a/test/lint/lint-locale-dependence.sh b/test/lint/lint-locale-dependence.sh
index fcc4883d0b..3015c4f9b9 100755
--- a/test/lint/lint-locale-dependence.sh
+++ b/test/lint/lint-locale-dependence.sh
@@ -37,23 +37,18 @@ export LC_ALL=C
# See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and
# https://stackoverflow.com/a/34878283 for more details.
+# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent stoul/strtol with locale
+# independent ToIntegral<T>(...).
+# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf.
KNOWN_VIOLATIONS=(
"src/bitcoin-tx.cpp.*stoul"
"src/dbwrapper.cpp.*stoul"
"src/dbwrapper.cpp:.*vsnprintf"
- "src/node/blockstorage.cpp:.*atoi"
- "src/qt/rpcconsole.cpp:.*atoi"
"src/rest.cpp:.*strtol"
"src/test/dbwrapper_tests.cpp:.*snprintf"
"src/test/fuzz/locale.cpp"
- "src/test/fuzz/parse_numbers.cpp:.*atoi"
"src/test/fuzz/string.cpp"
- "src/torcontrol.cpp:.*atoi"
"src/torcontrol.cpp:.*strtol"
- "src/util/strencodings.cpp:.*atoi"
- "src/util/strencodings.cpp:.*strtoll"
- "src/util/strencodings.h:.*atoi"
- "src/util/system.cpp:.*atoi"
)
REGEXP_IGNORE_EXTERNAL_DEPENDENCIES="^src/(crypto/ctaes/|leveldb/|secp256k1/|tinyformat.h|univalue/)"