diff options
author | W. J. van der Laan <laanwj@protonmail.com> | 2021-10-04 12:49:04 +0200 |
---|---|---|
committer | W. J. van der Laan <laanwj@protonmail.com> | 2021-10-04 12:59:28 +0200 |
commit | cdb4dfcbf1c82ef4c4eb7bb982b35219d5dbf827 (patch) | |
tree | a89fc72c12ef05a3c880e7268fd26962e5a50816 | |
parent | c6f710ec985518ccff6dd69426625f2ed0d102f8 (diff) | |
parent | 4343f114cc661cf031ec915538c11b9b030e2e15 (diff) |
Merge bitcoin/bitcoin#20452: util: Replace use of locale dependent atoi(…) with locale-independent std::from_chars(…) (C++17)
4343f114cc661cf031ec915538c11b9b030e2e15 Replace use of locale dependent atoi(…) with locale-independent std::from_chars(…) (C++17) (practicalswift)
Pull request description:
Replace use of locale dependent `atoi(…)` with locale-independent `std::from_chars(…)` (C++17).
ACKs for top commit:
laanwj:
Code review ACK 4343f114cc661cf031ec915538c11b9b030e2e15
jonatack:
Code review ACK 4343f114cc661cf031ec915538c11b9b030e2e15
Tree-SHA512: e4909da282b6cefc5ca34e13b02cc489af56cab339a77ae5c35ac9ef355d9b941b129a2bfddc1b37426b11c79a21c8b729fbb5255e6d9eaa344406b18b825494
-rw-r--r-- | src/core_read.cpp | 2 | ||||
-rw-r--r-- | src/node/blockstorage.cpp | 2 | ||||
-rw-r--r-- | src/qt/rpcconsole.cpp | 2 | ||||
-rw-r--r-- | src/rpc/mining.cpp | 2 | ||||
-rw-r--r-- | src/test/fuzz/locale.cpp | 6 | ||||
-rw-r--r-- | src/test/fuzz/parse_numbers.cpp | 4 | ||||
-rw-r--r-- | src/test/fuzz/string.cpp | 24 | ||||
-rw-r--r-- | src/test/util_tests.cpp | 71 | ||||
-rw-r--r-- | src/torcontrol.cpp | 2 | ||||
-rw-r--r-- | src/util/moneystr.cpp | 3 | ||||
-rw-r--r-- | src/util/strencodings.cpp | 14 | ||||
-rw-r--r-- | src/util/strencodings.h | 30 | ||||
-rw-r--r-- | src/util/system.cpp | 20 | ||||
-rw-r--r-- | src/wallet/transaction.h | 4 | ||||
-rwxr-xr-x | test/lint/lint-locale-dependence.sh | 11 |
15 files changed, 145 insertions, 52 deletions
diff --git a/src/core_read.cpp b/src/core_read.cpp index 6108961010..320811b9e9 100644 --- a/src/core_read.cpp +++ b/src/core_read.cpp @@ -69,7 +69,7 @@ CScript ParseScript(const std::string& s) (w->front() == '-' && w->size() > 1 && std::all_of(w->begin()+1, w->end(), ::IsDigit))) { // Number - int64_t n = atoi64(*w); + int64_t n = LocaleIndependentAtoi<int64_t>(*w); //limit the range of numbers ParseScript accepts in decimal //since numbers outside -0xFFFFFFFF...0xFFFFFFFF are illegal in scripts diff --git a/src/node/blockstorage.cpp b/src/node/blockstorage.cpp index 5ddcf95c84..104fe8bf01 100644 --- a/src/node/blockstorage.cpp +++ b/src/node/blockstorage.cpp @@ -85,7 +85,7 @@ void CleanupBlockRevFiles() // start removing block files. int nContigCounter = 0; for (const std::pair<const std::string, fs::path>& item : mapBlockFiles) { - if (atoi(item.first) == nContigCounter) { + if (LocaleIndependentAtoi<int>(item.first) == nContigCounter) { nContigCounter++; continue; } diff --git a/src/qt/rpcconsole.cpp b/src/qt/rpcconsole.cpp index 1c8ed22ada..3c0dc5aa40 100644 --- a/src/qt/rpcconsole.cpp +++ b/src/qt/rpcconsole.cpp @@ -250,7 +250,7 @@ bool RPCConsole::RPCParseCommandLine(interfaces::Node* node, std::string &strRes for(char argch: curarg) if (!IsDigit(argch)) throw std::runtime_error("Invalid result query"); - subelement = lastResult[atoi(curarg.c_str())]; + subelement = lastResult[LocaleIndependentAtoi<int>(curarg)]; } else if (lastResult.isObject()) subelement = find_value(lastResult, curarg); diff --git a/src/rpc/mining.cpp b/src/rpc/mining.cpp index 066a60b71b..8cad51a710 100644 --- a/src/rpc/mining.cpp +++ b/src/rpc/mining.cpp @@ -702,7 +702,7 @@ static RPCHelpMan getblocktemplate() std::string lpstr = lpval.get_str(); hashWatchedChain = ParseHashV(lpstr.substr(0, 64), "longpollid"); - nTransactionsUpdatedLastLP = atoi64(lpstr.substr(64)); + nTransactionsUpdatedLastLP = LocaleIndependentAtoi<int64_t>(lpstr.substr(64)); } else { diff --git a/src/test/fuzz/locale.cpp b/src/test/fuzz/locale.cpp index 5b1acae57b..4ad8123554 100644 --- a/src/test/fuzz/locale.cpp +++ b/src/test/fuzz/locale.cpp @@ -50,8 +50,6 @@ FUZZ_TARGET(locale) const bool parseint32_without_locale = ParseInt32(random_string, &parseint32_out_without_locale); int64_t parseint64_out_without_locale; const bool parseint64_without_locale = ParseInt64(random_string, &parseint64_out_without_locale); - const int64_t atoi64_without_locale = atoi64(random_string); - const int atoi_without_locale = atoi(random_string); const int64_t random_int64 = fuzzed_data_provider.ConsumeIntegral<int64_t>(); const std::string tostring_without_locale = ToString(random_int64); // The variable `random_int32` is no longer used, but the harness still needs to @@ -77,10 +75,6 @@ FUZZ_TARGET(locale) if (parseint64_without_locale) { assert(parseint64_out_without_locale == parseint64_out_with_locale); } - const int64_t atoi64_with_locale = atoi64(random_string); - assert(atoi64_without_locale == atoi64_with_locale); - const int atoi_with_locale = atoi(random_string); - assert(atoi_without_locale == atoi_with_locale); const std::string tostring_with_locale = ToString(random_int64); assert(tostring_without_locale == tostring_with_locale); const std::string strprintf_int_with_locale = strprintf("%d", random_int64); diff --git a/src/test/fuzz/parse_numbers.cpp b/src/test/fuzz/parse_numbers.cpp index 69e58c3f63..6a302e1e06 100644 --- a/src/test/fuzz/parse_numbers.cpp +++ b/src/test/fuzz/parse_numbers.cpp @@ -25,13 +25,13 @@ FUZZ_TARGET(parse_numbers) int32_t i32; (void)ParseInt32(random_string, &i32); - (void)atoi(random_string); + (void)LocaleIndependentAtoi<int>(random_string); uint32_t u32; (void)ParseUInt32(random_string, &u32); int64_t i64; - (void)atoi64(random_string); + (void)LocaleIndependentAtoi<int64_t>(random_string); (void)ParseFixedPoint(random_string, 3, &i64); (void)ParseInt64(random_string, &i64); diff --git a/src/test/fuzz/string.cpp b/src/test/fuzz/string.cpp index dc2bf7c860..ab646c68fc 100644 --- a/src/test/fuzz/string.cpp +++ b/src/test/fuzz/string.cpp @@ -122,6 +122,12 @@ bool LegacyParseUInt64(const std::string& str, uint64_t* out) return endp && *endp == 0 && !errno && n <= std::numeric_limits<uint64_t>::max(); } + +// For backwards compatibility checking. +int64_t atoi64_legacy(const std::string& str) +{ + return strtoll(str.c_str(), nullptr, 10); +} }; // namespace FUZZ_TARGET(string) @@ -268,4 +274,22 @@ FUZZ_TARGET(string) assert(u8 == u8_legacy); } } + + { + const int atoi_result = atoi(random_string_1.c_str()); + const int locale_independent_atoi_result = LocaleIndependentAtoi<int>(random_string_1); + const int64_t atoi64_result = atoi64_legacy(random_string_1); + const bool out_of_range = atoi64_result < std::numeric_limits<int>::min() || atoi64_result > std::numeric_limits<int>::max(); + if (out_of_range) { + assert(locale_independent_atoi_result == 0); + } else { + assert(atoi_result == locale_independent_atoi_result); + } + } + + { + const int64_t atoi64_result = atoi64_legacy(random_string_1); + const int64_t locale_independent_atoi_result = LocaleIndependentAtoi<int64_t>(random_string_1); + assert(atoi64_result == locale_independent_atoi_result || locale_independent_atoi_result == 0); + } } diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp index a13700d733..5d1ad2ebf6 100644 --- a/src/test/util_tests.cpp +++ b/src/test/util_tests.cpp @@ -1549,6 +1549,77 @@ BOOST_AUTO_TEST_CASE(test_ToIntegral) BOOST_CHECK(!ToIntegral<uint8_t>("256")); } +BOOST_AUTO_TEST_CASE(test_LocaleIndependentAtoi) +{ + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1234"), 1'234); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("0"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("01234"), 1'234); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1234"), -1'234); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" 1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1 "), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1a"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1.1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("1.9"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+01.9"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1"), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" -1"), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-1 "), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" -1 "), -1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" +1"), 1); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(" +1 "), 1); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("+-1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-+1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("++1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("--1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>(""), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("aap"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("0x1"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-32482348723847471234"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("32482348723847471234"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775809"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("-9223372036854775808"), -9'223'372'036'854'775'807LL - 1LL); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775807"), 9'223'372'036'854'775'807); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int64_t>("9223372036854775808"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551615"), 18'446'744'073'709'551'615ULL); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint64_t>("18446744073709551616"), 0U); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483649"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("-2147483648"), -2'147'483'648LL); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483647"), 2'147'483'647); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int32_t>("2147483648"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967295"), 4'294'967'295U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint32_t>("4294967296"), 0U); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32769"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("-32768"), -32'768); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32767"), 32'767); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int16_t>("32768"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65535"), 65'535U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint16_t>("65536"), 0U); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-129"), 0); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("-128"), -128); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("127"), 127); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<int8_t>("128"), 0); + + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("-1"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("0"), 0U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("255"), 255U); + BOOST_CHECK_EQUAL(LocaleIndependentAtoi<uint8_t>("256"), 0U); +} + BOOST_AUTO_TEST_CASE(test_ParseInt64) { int64_t n; diff --git a/src/torcontrol.cpp b/src/torcontrol.cpp index bb296456ba..4bd19dcf45 100644 --- a/src/torcontrol.cpp +++ b/src/torcontrol.cpp @@ -83,7 +83,7 @@ void TorControlConnection::readcb(struct bufferevent *bev, void *ctx) if (s.size() < 4) // Short line continue; // <status>(-|+| )<data><CRLF> - self->message.code = atoi(s.substr(0,3)); + self->message.code = LocaleIndependentAtoi<int>(s.substr(0,3)); self->message.lines.push_back(s.substr(4)); char ch = s[3]; // '-','+' or ' ' if (ch == ' ') { diff --git a/src/util/moneystr.cpp b/src/util/moneystr.cpp index d3f4029607..95a919ad12 100644 --- a/src/util/moneystr.cpp +++ b/src/util/moneystr.cpp @@ -77,8 +77,7 @@ std::optional<CAmount> ParseMoney(const std::string& money_string) return std::nullopt; if (nUnits < 0 || nUnits > COIN) return std::nullopt; - int64_t nWhole = atoi64(strWhole); - + int64_t nWhole = LocaleIndependentAtoi<int64_t>(strWhole); CAmount value = nWhole * COIN + nUnits; if (!MoneyRange(value)) { diff --git a/src/util/strencodings.cpp b/src/util/strencodings.cpp index 0aa80ea0ae..88fec6740a 100644 --- a/src/util/strencodings.cpp +++ b/src/util/strencodings.cpp @@ -403,20 +403,6 @@ std::string FormatParagraph(const std::string& in, size_t width, size_t indent) return out.str(); } -int64_t atoi64(const std::string& str) -{ -#ifdef _MSC_VER - return _atoi64(str.c_str()); -#else - return strtoll(str.c_str(), nullptr, 10); -#endif -} - -int atoi(const std::string& str) -{ - return atoi(str.c_str()); -} - /** Upper bound for mantissa. * 10^18-1 is the largest arbitrary decimal that will fit in a signed 64-bit integer. * Larger integers cannot consist of arbitrary combinations of 0-9: diff --git a/src/util/strencodings.h b/src/util/strencodings.h index 1217572c45..166352c42f 100644 --- a/src/util/strencodings.h +++ b/src/util/strencodings.h @@ -11,6 +11,7 @@ #include <attributes.h> #include <span.h> +#include <util/string.h> #include <charconv> #include <cstdint> @@ -68,8 +69,33 @@ std::string EncodeBase32(Span<const unsigned char> input, bool pad = true); std::string EncodeBase32(const std::string& str, bool pad = true); void SplitHostPort(std::string in, uint16_t& portOut, std::string& hostOut); -int64_t atoi64(const std::string& str); -int atoi(const std::string& str); + +// LocaleIndependentAtoi is provided for backwards compatibility reasons. +// +// New code should use the ParseInt64/ParseUInt64/ParseInt32/ParseUInt32 functions +// which provide parse error feedback. +// +// The goal of LocaleIndependentAtoi is to replicate the exact defined behaviour +// of atoi and atoi64 as they behave under the "C" locale. +template <typename T> +T LocaleIndependentAtoi(const std::string& str) +{ + static_assert(std::is_integral<T>::value); + T result; + // Emulate atoi(...) handling of white space and leading +/-. + std::string s = TrimString(str); + if (!s.empty() && s[0] == '+') { + if (s.length() >= 2 && s[1] == '-') { + return 0; + } + s = s.substr(1); + } + auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result); + if (error_condition != std::errc{}) { + return 0; + } + return result; +} /** * Tests if the given character is a decimal digit. diff --git a/src/util/system.cpp b/src/util/system.cpp index 4defeed4ce..79c08816fa 100644 --- a/src/util/system.cpp +++ b/src/util/system.cpp @@ -158,16 +158,14 @@ std::streampos GetFileSize(const char* path, std::streamsize max) { /** * Interpret a string argument as a boolean. * - * The definition of atoi() requires that non-numeric string values like "foo", - * return 0. This means that if a user unintentionally supplies a non-integer - * argument here, the return value is always false. This means that -foo=false - * does what the user probably expects, but -foo=true is well defined but does - * not do what they probably expected. + * The definition of LocaleIndependentAtoi<int>() requires that non-numeric string values + * like "foo", return 0. This means that if a user unintentionally supplies a + * non-integer argument here, the return value is always false. This means that + * -foo=false does what the user probably expects, but -foo=true is well defined + * but does not do what they probably expected. * - * The return value of atoi() is undefined when given input not representable as - * an int. On most systems this means string value between "-2147483648" and - * "2147483647" are well defined (this method will return true). Setting - * -txindex=2147483648 on most systems, however, is probably undefined. + * The return value of LocaleIndependentAtoi<int>(...) is zero when given input not + * representable as an int. * * For a more extensive discussion of this topic (and a wide range of opinions * on the Right Way to change this code), see PR12713. @@ -176,7 +174,7 @@ static bool InterpretBool(const std::string& strValue) { if (strValue.empty()) return true; - return (atoi(strValue) != 0); + return (LocaleIndependentAtoi<int>(strValue) != 0); } static std::string SettingName(const std::string& arg) @@ -594,7 +592,7 @@ std::string ArgsManager::GetArg(const std::string& strArg, const std::string& st int64_t ArgsManager::GetIntArg(const std::string& strArg, int64_t nDefault) const { const util::SettingsValue value = GetSetting(strArg); - return value.isNull() ? nDefault : value.isFalse() ? 0 : value.isTrue() ? 1 : value.isNum() ? value.get_int64() : atoi64(value.get_str()); + return value.isNull() ? nDefault : value.isFalse() ? 0 : value.isTrue() ? 1 : value.isNum() ? value.get_int64() : LocaleIndependentAtoi<int64_t>(value.get_str()); } bool ArgsManager::GetBoolArg(const std::string& strArg, bool fDefault) const diff --git a/src/wallet/transaction.h b/src/wallet/transaction.h index 0cd91b9ebe..223901586e 100644 --- a/src/wallet/transaction.h +++ b/src/wallet/transaction.h @@ -216,9 +216,9 @@ public: } const auto it_op = mapValue.find("n"); - nOrderPos = (it_op != mapValue.end()) ? atoi64(it_op->second) : -1; + nOrderPos = (it_op != mapValue.end()) ? LocaleIndependentAtoi<int64_t>(it_op->second) : -1; const auto it_ts = mapValue.find("timesmart"); - nTimeSmart = (it_ts != mapValue.end()) ? static_cast<unsigned int>(atoi64(it_ts->second)) : 0; + nTimeSmart = (it_ts != mapValue.end()) ? static_cast<unsigned int>(LocaleIndependentAtoi<int64_t>(it_ts->second)) : 0; mapValue.erase("fromaccount"); mapValue.erase("spent"); diff --git a/test/lint/lint-locale-dependence.sh b/test/lint/lint-locale-dependence.sh index fcc4883d0b..3015c4f9b9 100755 --- a/test/lint/lint-locale-dependence.sh +++ b/test/lint/lint-locale-dependence.sh @@ -37,23 +37,18 @@ export LC_ALL=C # See https://doc.qt.io/qt-5/qcoreapplication.html#locale-settings and # https://stackoverflow.com/a/34878283 for more details. +# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent stoul/strtol with locale +# independent ToIntegral<T>(...). +# TODO: Reduce KNOWN_VIOLATIONS by replacing uses of locale dependent snprintf with strprintf. KNOWN_VIOLATIONS=( "src/bitcoin-tx.cpp.*stoul" "src/dbwrapper.cpp.*stoul" "src/dbwrapper.cpp:.*vsnprintf" - "src/node/blockstorage.cpp:.*atoi" - "src/qt/rpcconsole.cpp:.*atoi" "src/rest.cpp:.*strtol" "src/test/dbwrapper_tests.cpp:.*snprintf" "src/test/fuzz/locale.cpp" - "src/test/fuzz/parse_numbers.cpp:.*atoi" "src/test/fuzz/string.cpp" - "src/torcontrol.cpp:.*atoi" "src/torcontrol.cpp:.*strtol" - "src/util/strencodings.cpp:.*atoi" - "src/util/strencodings.cpp:.*strtoll" - "src/util/strencodings.h:.*atoi" - "src/util/system.cpp:.*atoi" ) REGEXP_IGNORE_EXTERNAL_DEPENDENCIES="^src/(crypto/ctaes/|leveldb/|secp256k1/|tinyformat.h|univalue/)" |