aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfanquake <fanquake@gmail.com>2023-02-27 14:21:58 +0000
committerfanquake <fanquake@gmail.com>2023-02-27 14:27:50 +0000
commita2877f7ad3a58327215f782003a85a6f46486e4c (patch)
tree813877349c25ac5871a272290a2d459b7dac0e56
parent873dcc19102f6017dac070fa83e2333f8bf6845b (diff)
parentfaab273e060d27e166b5fb7fe7692614ec9e5c76 (diff)
Merge bitcoin/bitcoin#25227: Handle invalid hex encoding in ParseHex
faab273e060d27e166b5fb7fe7692614ec9e5c76 util: Return empty vector on invalid hex encoding (MarcoFalke) fa3549a77bf6a15d8309d36056237f3126baf721 test: Add hex parse unit tests (MarcoFalke) Pull request description: Seems a bit confusing to happily accept random bytes and pretend they are hex encoded strings. ACKs for top commit: stickies-v: re-ACK faab273e060d27e166b5fb7fe7692614ec9e5c76 Tree-SHA512: a808135f744f50aece03d4bf5a71481c7bdca1fcdd0d5b113abdb0c8b382bf81cafee6d17c239041fb49b59f4e19970f24a475378e7f711c3a47d6438de2bdab
-rw-r--r--src/test/util_tests.cpp38
-rw-r--r--src/util/strencodings.cpp7
-rw-r--r--src/util/strencodings.h10
3 files changed, 44 insertions, 11 deletions
diff --git a/src/test/util_tests.cpp b/src/test/util_tests.cpp
index f0dcee7a9b..a13552653e 100644
--- a/src/test/util_tests.cpp
+++ b/src/test/util_tests.cpp
@@ -141,26 +141,52 @@ BOOST_AUTO_TEST_CASE(parse_hex)
// Basic test vector
result = ParseHex("04678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5f");
BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
+ result = TryParseHex<uint8_t>("04678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5f").value();
+ BOOST_CHECK_EQUAL_COLLECTIONS(result.begin(), result.end(), expected.begin(), expected.end());
// Spaces between bytes must be supported
result = ParseHex("12 34 56 78");
BOOST_CHECK(result.size() == 4 && result[0] == 0x12 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
+ result = TryParseHex<uint8_t>("12 34 56 78").value();
+ BOOST_CHECK(result.size() == 4 && result[0] == 0x12 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
// Leading space must be supported (used in BerkeleyEnvironment::Salvage)
result = ParseHex(" 89 34 56 78");
BOOST_CHECK(result.size() == 4 && result[0] == 0x89 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
+ result = TryParseHex<uint8_t>(" 89 34 56 78").value();
+ BOOST_CHECK(result.size() == 4 && result[0] == 0x89 && result[1] == 0x34 && result[2] == 0x56 && result[3] == 0x78);
+
+ // Mixed case and spaces are supported
+ result = ParseHex(" Ff aA ");
+ BOOST_CHECK(result.size() == 2 && result[0] == 0xff && result[1] == 0xaa);
+ result = TryParseHex<uint8_t>(" Ff aA ").value();
+ BOOST_CHECK(result.size() == 2 && result[0] == 0xff && result[1] == 0xaa);
- // Embedded null is treated as end
+ // Empty string is supported
+ result = ParseHex("");
+ BOOST_CHECK(result.size() == 0);
+ result = TryParseHex<uint8_t>("").value();
+ BOOST_CHECK(result.size() == 0);
+
+ // Spaces between nibbles is treated as invalid
+ BOOST_CHECK_EQUAL(ParseHex("AAF F").size(), 0);
+ BOOST_CHECK(!TryParseHex("AAF F").has_value());
+
+ // Embedded null is treated as invalid
const std::string with_embedded_null{" 11 "s
" \0 "
" 22 "s};
BOOST_CHECK_EQUAL(with_embedded_null.size(), 11);
- result = ParseHex(with_embedded_null);
- BOOST_CHECK(result.size() == 1 && result[0] == 0x11);
+ BOOST_CHECK_EQUAL(ParseHex(with_embedded_null).size(), 0);
+ BOOST_CHECK(!TryParseHex(with_embedded_null).has_value());
+
+ // Non-hex is treated as invalid
+ BOOST_CHECK_EQUAL(ParseHex("1234 invalid 1234").size(), 0);
+ BOOST_CHECK(!TryParseHex("1234 invalid 1234").has_value());
- // Stop parsing at invalid value
- result = ParseHex("1234 invalid 1234");
- BOOST_CHECK(result.size() == 2 && result[0] == 0x12 && result[1] == 0x34);
+ // Truncated input is treated as invalid
+ BOOST_CHECK_EQUAL(ParseHex("12 3").size(), 0);
+ BOOST_CHECK(!TryParseHex("12 3").has_value());
}
BOOST_AUTO_TEST_CASE(util_HexStr)
diff --git a/src/util/strencodings.cpp b/src/util/strencodings.cpp
index 45a01429e1..03459dcf20 100644
--- a/src/util/strencodings.cpp
+++ b/src/util/strencodings.cpp
@@ -78,18 +78,19 @@ bool IsHexNumber(std::string_view str)
}
template <typename Byte>
-std::vector<Byte> ParseHex(std::string_view str)
+std::optional<std::vector<Byte>> TryParseHex(std::string_view str)
{
std::vector<Byte> vch;
auto it = str.begin();
- while (it != str.end() && it + 1 != str.end()) {
+ while (it != str.end()) {
if (IsSpace(*it)) {
++it;
continue;
}
auto c1 = HexDigit(*(it++));
+ if (it == str.end()) return std::nullopt;
auto c2 = HexDigit(*(it++));
- if (c1 < 0 || c2 < 0) break;
+ if (c1 < 0 || c2 < 0) return std::nullopt;
vch.push_back(Byte(c1 << 4) | Byte(c2));
}
return vch;
diff --git a/src/util/strencodings.h b/src/util/strencodings.h
index 626e48f499..05e7b957c4 100644
--- a/src/util/strencodings.h
+++ b/src/util/strencodings.h
@@ -57,9 +57,15 @@ enum class ByteUnit : uint64_t {
* @return A new string without unsafe chars
*/
std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
-/** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. */
+/** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. Returns nullopt on invalid input. */
+template <typename Byte = std::byte>
+std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
+/** Like TryParseHex, but returns an empty vector on invalid input. */
template <typename Byte = uint8_t>
-std::vector<Byte> ParseHex(std::string_view str);
+std::vector<Byte> ParseHex(std::string_view hex_str)
+{
+ return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
+}
signed char HexDigit(char c);
/* Returns true if each character in str is a hex character, and has an even
* number of hex digits.*/