From 4747db876154ddd828c03d9eda10ecf8b25d8dc8 Mon Sep 17 00:00:00 2001 From: practicalswift Date: Sat, 18 Sep 2021 04:30:30 +0000 Subject: =?UTF-8?q?util:=20Introduce=20ToIntegral(const=20std::string&)?= =?UTF-8?q?=20for=20locale=20independent=20parsing=20using=20std::from=5Fc?= =?UTF-8?q?hars(=E2=80=A6)=20(C++17)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit util: Avoid locale dependent functions strtol/strtoll/strtoul/strtoull in ParseInt32/ParseInt64/ParseUInt32/ParseUInt64 fuzz: Assert equivalence between new and old Parse{Int,Uint}{8,32,64} functions test: Add unit tests for ToIntegral(const std::string&) --- src/test/fuzz/string.cpp | 135 ++++++++++++++++++++++++++++++++++++ src/test/util_tests.cpp | 75 ++++++++++++++++++++ src/util/strencodings.cpp | 110 ++++++++++------------------- src/util/strencodings.h | 20 ++++++ test/lint/lint-locale-dependence.sh | 4 +- 5 files changed, 270 insertions(+), 74 deletions(-) diff --git a/src/test/fuzz/string.cpp b/src/test/fuzz/string.cpp index 0c1b45b86c..dc2bf7c860 100644 --- a/src/test/fuzz/string.cpp +++ b/src/test/fuzz/string.cpp @@ -31,9 +31,99 @@ #include #include +#include #include #include +namespace { +bool LegacyParsePrechecks(const std::string& str) +{ + if (str.empty()) // No empty string allowed + return false; + if (str.size() >= 1 && (IsSpace(str[0]) || IsSpace(str[str.size() - 1]))) // No padding allowed + return false; + if (!ValidAsCString(str)) // No embedded NUL characters allowed + return false; + return true; +} + +bool LegacyParseInt32(const std::string& str, int32_t* out) +{ + if (!LegacyParsePrechecks(str)) + return false; + char* endp = nullptr; + errno = 0; // strtol will not set errno if valid + long int n = strtol(str.c_str(), &endp, 10); + if (out) *out = (int32_t)n; + // Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow + // we still have to check that the returned value is within the range of an *int32_t*. On 64-bit + // platforms the size of these types may be different. + return endp && *endp == 0 && !errno && + n >= std::numeric_limits::min() && + n <= std::numeric_limits::max(); +} + +bool LegacyParseInt64(const std::string& str, int64_t* out) +{ + if (!LegacyParsePrechecks(str)) + return false; + char* endp = nullptr; + errno = 0; // strtoll will not set errno if valid + long long int n = strtoll(str.c_str(), &endp, 10); + if (out) *out = (int64_t)n; + // Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow + // we still have to check that the returned value is within the range of an *int64_t*. + return endp && *endp == 0 && !errno && + n >= std::numeric_limits::min() && + n <= std::numeric_limits::max(); +} + +bool LegacyParseUInt32(const std::string& str, uint32_t* out) +{ + if (!LegacyParsePrechecks(str)) + return false; + if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range + return false; + char* endp = nullptr; + errno = 0; // strtoul will not set errno if valid + unsigned long int n = strtoul(str.c_str(), &endp, 10); + if (out) *out = (uint32_t)n; + // Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow + // we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit + // platforms the size of these types may be different. + return endp && *endp == 0 && !errno && + n <= std::numeric_limits::max(); +} + +bool LegacyParseUInt8(const std::string& str, uint8_t* out) +{ + uint32_t u32; + if (!LegacyParseUInt32(str, &u32) || u32 > std::numeric_limits::max()) { + return false; + } + if (out != nullptr) { + *out = static_cast(u32); + } + return true; +} + +bool LegacyParseUInt64(const std::string& str, uint64_t* out) +{ + if (!LegacyParsePrechecks(str)) + return false; + if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range + return false; + char* endp = nullptr; + errno = 0; // strtoull will not set errno if valid + unsigned long long int n = strtoull(str.c_str(), &endp, 10); + if (out) *out = (uint64_t)n; + // Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow + // we still have to check that the returned value is within the range of an *uint64_t*. + return endp && *endp == 0 && !errno && + n <= std::numeric_limits::max(); +} +}; // namespace + FUZZ_TARGET(string) { FuzzedDataProvider fuzzed_data_provider(buffer.data(), buffer.size()); @@ -133,4 +223,49 @@ FUZZ_TARGET(string) const bilingual_str bs2{random_string_2, random_string_1}; (void)(bs1 + bs2); } + { + int32_t i32; + int64_t i64; + uint32_t u32; + uint64_t u64; + uint8_t u8; + const bool ok_i32 = ParseInt32(random_string_1, &i32); + const bool ok_i64 = ParseInt64(random_string_1, &i64); + const bool ok_u32 = ParseUInt32(random_string_1, &u32); + const bool ok_u64 = ParseUInt64(random_string_1, &u64); + const bool ok_u8 = ParseUInt8(random_string_1, &u8); + + int32_t i32_legacy; + int64_t i64_legacy; + uint32_t u32_legacy; + uint64_t u64_legacy; + uint8_t u8_legacy; + const bool ok_i32_legacy = LegacyParseInt32(random_string_1, &i32_legacy); + const bool ok_i64_legacy = LegacyParseInt64(random_string_1, &i64_legacy); + const bool ok_u32_legacy = LegacyParseUInt32(random_string_1, &u32_legacy); + const bool ok_u64_legacy = LegacyParseUInt64(random_string_1, &u64_legacy); + const bool ok_u8_legacy = LegacyParseUInt8(random_string_1, &u8_legacy); + + assert(ok_i32 == ok_i32_legacy); + assert(ok_i64 == ok_i64_legacy); + assert(ok_u32 == ok_u32_legacy); + assert(ok_u64 == ok_u64_legacy); + assert(ok_u8 == ok_u8_legacy); + + if (ok_i32) { + assert(i32 == i32_legacy); + } + if (ok_i64) { + assert(i64 == i64_legacy); + } + if (ok_u32) { + assert(u32 == u32_legacy); + } + if (ok_u64) { + assert(u64 == u64_legacy); + } + if (ok_u8) { + assert(u8 == u8_legacy); + } + } } diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp index a62abf9b9c..5ee522ea0a 100644 --- a/src/test/util_tests.cpp +++ b/src/test/util_tests.cpp @@ -1474,6 +1474,81 @@ BOOST_AUTO_TEST_CASE(test_ParseInt32) BOOST_CHECK(!ParseInt32("32482348723847471234", nullptr)); } +BOOST_AUTO_TEST_CASE(test_ToIntegral) +{ + BOOST_CHECK_EQUAL(ToIntegral("1234").value(), 1'234); + BOOST_CHECK_EQUAL(ToIntegral("0").value(), 0); + BOOST_CHECK_EQUAL(ToIntegral("01234").value(), 1'234); + BOOST_CHECK_EQUAL(ToIntegral("00000000000000001234").value(), 1'234); + BOOST_CHECK_EQUAL(ToIntegral("-00000000000000001234").value(), -1'234); + BOOST_CHECK_EQUAL(ToIntegral("00000000000000000000").value(), 0); + BOOST_CHECK_EQUAL(ToIntegral("-00000000000000000000").value(), 0); + BOOST_CHECK_EQUAL(ToIntegral("-1234").value(), -1'234); + BOOST_CHECK_EQUAL(ToIntegral("-1").value(), -1); + + BOOST_CHECK(!ToIntegral(" 1")); + BOOST_CHECK(!ToIntegral("1 ")); + BOOST_CHECK(!ToIntegral("1a")); + BOOST_CHECK(!ToIntegral("1.1")); + BOOST_CHECK(!ToIntegral("1.9")); + BOOST_CHECK(!ToIntegral("+01.9")); + BOOST_CHECK(!ToIntegral(" -1")); + BOOST_CHECK(!ToIntegral("-1 ")); + BOOST_CHECK(!ToIntegral(" -1 ")); + BOOST_CHECK(!ToIntegral("+1")); + BOOST_CHECK(!ToIntegral(" +1")); + BOOST_CHECK(!ToIntegral(" +1 ")); + BOOST_CHECK(!ToIntegral("+-1")); + BOOST_CHECK(!ToIntegral("-+1")); + BOOST_CHECK(!ToIntegral("++1")); + BOOST_CHECK(!ToIntegral("--1")); + BOOST_CHECK(!ToIntegral("")); + BOOST_CHECK(!ToIntegral("aap")); + BOOST_CHECK(!ToIntegral("0x1")); + BOOST_CHECK(!ToIntegral("-32482348723847471234")); + BOOST_CHECK(!ToIntegral("32482348723847471234")); + + BOOST_CHECK(!ToIntegral("-9223372036854775809")); + BOOST_CHECK_EQUAL(ToIntegral("-9223372036854775808").value(), -9'223'372'036'854'775'807LL - 1LL); + BOOST_CHECK_EQUAL(ToIntegral("9223372036854775807").value(), 9'223'372'036'854'775'807); + BOOST_CHECK(!ToIntegral("9223372036854775808")); + + BOOST_CHECK(!ToIntegral("-1")); + BOOST_CHECK_EQUAL(ToIntegral("0").value(), 0U); + BOOST_CHECK_EQUAL(ToIntegral("18446744073709551615").value(), 18'446'744'073'709'551'615ULL); + BOOST_CHECK(!ToIntegral("18446744073709551616")); + + BOOST_CHECK(!ToIntegral("-2147483649")); + BOOST_CHECK_EQUAL(ToIntegral("-2147483648").value(), -2'147'483'648LL); + BOOST_CHECK_EQUAL(ToIntegral("2147483647").value(), 2'147'483'647); + BOOST_CHECK(!ToIntegral("2147483648")); + + BOOST_CHECK(!ToIntegral("-1")); + BOOST_CHECK_EQUAL(ToIntegral("0").value(), 0U); + BOOST_CHECK_EQUAL(ToIntegral("4294967295").value(), 4'294'967'295U); + BOOST_CHECK(!ToIntegral("4294967296")); + + BOOST_CHECK(!ToIntegral("-32769")); + BOOST_CHECK_EQUAL(ToIntegral("-32768").value(), -32'768); + BOOST_CHECK_EQUAL(ToIntegral("32767").value(), 32'767); + BOOST_CHECK(!ToIntegral("32768")); + + BOOST_CHECK(!ToIntegral("-1")); + BOOST_CHECK_EQUAL(ToIntegral("0").value(), 0U); + BOOST_CHECK_EQUAL(ToIntegral("65535").value(), 65'535U); + BOOST_CHECK(!ToIntegral("65536")); + + BOOST_CHECK(!ToIntegral("-129")); + BOOST_CHECK_EQUAL(ToIntegral("-128").value(), -128); + BOOST_CHECK_EQUAL(ToIntegral("127").value(), 127); + BOOST_CHECK(!ToIntegral("128")); + + BOOST_CHECK(!ToIntegral("-1")); + BOOST_CHECK_EQUAL(ToIntegral("0").value(), 0U); + BOOST_CHECK_EQUAL(ToIntegral("255").value(), 255U); + BOOST_CHECK(!ToIntegral("256")); +} + BOOST_AUTO_TEST_CASE(test_ParseInt64) { int64_t n; diff --git a/src/util/strencodings.cpp b/src/util/strencodings.cpp index f514613f0d..0aa80ea0ae 100644 --- a/src/util/strencodings.cpp +++ b/src/util/strencodings.cpp @@ -11,8 +11,7 @@ #include #include #include -#include -#include +#include static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -282,6 +281,32 @@ std::string DecodeBase32(const std::string& str, bool* pf_invalid) return std::string((const char*)vchRet.data(), vchRet.size()); } +[[nodiscard]] static bool ParsePrechecks(const std::string&); + +namespace { +template +bool ParseIntegral(const std::string& str, T* out) +{ + static_assert(std::is_integral::value); + if (!ParsePrechecks(str)) { + return false; + } + // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when + // handling leading +/- for backwards compatibility. + if (str.length() >= 2 && str[0] == '+' && str[1] == '-') { + return false; + } + const std::optional opt_int = ToIntegral((!str.empty() && str[0] == '+') ? str.substr(1) : str); + if (!opt_int) { + return false; + } + if (out != nullptr) { + *out = *opt_int; + } + return true; +} +}; // namespace + [[nodiscard]] static bool ParsePrechecks(const std::string& str) { if (str.empty()) // No empty string allowed @@ -293,95 +318,36 @@ std::string DecodeBase32(const std::string& str, bool* pf_invalid) return true; } -bool ParseInt32(const std::string& str, int32_t *out) +bool ParseInt32(const std::string& str, int32_t* out) { - if (!ParsePrechecks(str)) - return false; - char *endp = nullptr; - errno = 0; // strtol will not set errno if valid - long int n = strtol(str.c_str(), &endp, 10); - if(out) *out = (int32_t)n; - // Note that strtol returns a *long int*, so even if strtol doesn't report an over/underflow - // we still have to check that the returned value is within the range of an *int32_t*. On 64-bit - // platforms the size of these types may be different. - return endp && *endp == 0 && !errno && - n >= std::numeric_limits::min() && - n <= std::numeric_limits::max(); + return ParseIntegral(str, out); } -bool ParseInt64(const std::string& str, int64_t *out) +bool ParseInt64(const std::string& str, int64_t* out) { - if (!ParsePrechecks(str)) - return false; - char *endp = nullptr; - errno = 0; // strtoll will not set errno if valid - long long int n = strtoll(str.c_str(), &endp, 10); - if(out) *out = (int64_t)n; - // Note that strtoll returns a *long long int*, so even if strtol doesn't report an over/underflow - // we still have to check that the returned value is within the range of an *int64_t*. - return endp && *endp == 0 && !errno && - n >= std::numeric_limits::min() && - n <= std::numeric_limits::max(); + return ParseIntegral(str, out); } -bool ParseUInt8(const std::string& str, uint8_t *out) +bool ParseUInt8(const std::string& str, uint8_t* out) { - uint32_t u32; - if (!ParseUInt32(str, &u32) || u32 > std::numeric_limits::max()) { - return false; - } - if (out != nullptr) { - *out = static_cast(u32); - } - return true; + return ParseIntegral(str, out); } bool ParseUInt16(const std::string& str, uint16_t* out) { - uint32_t u32; - if (!ParseUInt32(str, &u32) || u32 > std::numeric_limits::max()) { - return false; - } - if (out != nullptr) { - *out = static_cast(u32); - } - return true; + return ParseIntegral(str, out); } -bool ParseUInt32(const std::string& str, uint32_t *out) +bool ParseUInt32(const std::string& str, uint32_t* out) { - if (!ParsePrechecks(str)) - return false; - if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoul accepts these by default if they fit in the range - return false; - char *endp = nullptr; - errno = 0; // strtoul will not set errno if valid - unsigned long int n = strtoul(str.c_str(), &endp, 10); - if(out) *out = (uint32_t)n; - // Note that strtoul returns a *unsigned long int*, so even if it doesn't report an over/underflow - // we still have to check that the returned value is within the range of an *uint32_t*. On 64-bit - // platforms the size of these types may be different. - return endp && *endp == 0 && !errno && - n <= std::numeric_limits::max(); + return ParseIntegral(str, out); } -bool ParseUInt64(const std::string& str, uint64_t *out) +bool ParseUInt64(const std::string& str, uint64_t* out) { - if (!ParsePrechecks(str)) - return false; - if (str.size() >= 1 && str[0] == '-') // Reject negative values, unfortunately strtoull accepts these by default if they fit in the range - return false; - char *endp = nullptr; - errno = 0; // strtoull will not set errno if valid - unsigned long long int n = strtoull(str.c_str(), &endp, 10); - if(out) *out = (uint64_t)n; - // Note that strtoull returns a *unsigned long long int*, so even if it doesn't report an over/underflow - // we still have to check that the returned value is within the range of an *uint64_t*. - return endp && *endp == 0 && !errno && - n <= std::numeric_limits::max(); + return ParseIntegral(str, out); } - bool ParseDouble(const std::string& str, double *out) { if (!ParsePrechecks(str)) diff --git a/src/util/strencodings.h b/src/util/strencodings.h index 26dc0a0ce3..1217572c45 100644 --- a/src/util/strencodings.h +++ b/src/util/strencodings.h @@ -12,8 +12,10 @@ #include #include +#include #include #include +#include #include #include @@ -94,6 +96,24 @@ constexpr inline bool IsSpace(char c) noexcept { return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v'; } +/** + * Convert string to integral type T. + * + * @returns std::nullopt if the entire string could not be parsed, or if the + * parsed value is not in the range representable by the type T. + */ +template +std::optional ToIntegral(const std::string& str) +{ + static_assert(std::is_integral::value); + T result; + const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result); + if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) { + return std::nullopt; + } + return {result}; +} + /** * Convert string to signed 32-bit integer with strict parse error feedback. * @returns true if the entire string could be parsed as valid integer, diff --git a/test/lint/lint-locale-dependence.sh b/test/lint/lint-locale-dependence.sh index d6312270e7..fcc4883d0b 100755 --- a/test/lint/lint-locale-dependence.sh +++ b/test/lint/lint-locale-dependence.sh @@ -47,11 +47,11 @@ KNOWN_VIOLATIONS=( "src/test/dbwrapper_tests.cpp:.*snprintf" "src/test/fuzz/locale.cpp" "src/test/fuzz/parse_numbers.cpp:.*atoi" + "src/test/fuzz/string.cpp" "src/torcontrol.cpp:.*atoi" "src/torcontrol.cpp:.*strtol" "src/util/strencodings.cpp:.*atoi" - "src/util/strencodings.cpp:.*strtol" - "src/util/strencodings.cpp:.*strtoul" + "src/util/strencodings.cpp:.*strtoll" "src/util/strencodings.h:.*atoi" "src/util/system.cpp:.*atoi" ) -- cgit v1.2.3