diff options
Diffstat (limited to 'src/util/strencodings.cpp')
-rw-r--r-- | src/util/strencodings.cpp | 237 |
1 files changed, 102 insertions, 135 deletions
diff --git a/src/util/strencodings.cpp b/src/util/strencodings.cpp index 940fa90da2..b5ac151374 100644 --- a/src/util/strencodings.cpp +++ b/src/util/strencodings.cpp @@ -3,16 +3,17 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or http://www.opensource.org/licenses/mit-license.php. +#include <span.h> #include <util/strencodings.h> -#include <util/string.h> -#include <tinyformat.h> - -#include <algorithm> -#include <cstdlib> +#include <array> +#include <cassert> #include <cstring> #include <limits> #include <optional> +#include <ostream> +#include <string> +#include <vector> static const std::string CHARS_ALPHA_NUM = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; @@ -24,15 +25,15 @@ static const std::string SAFE_CHARS[] = CHARS_ALPHA_NUM + "!*'();:@&=+$,/?#[]-_.~%", // SAFE_CHARS_URI }; -std::string SanitizeString(const std::string& str, int rule) +std::string SanitizeString(std::string_view str, int rule) { - std::string strResult; - for (std::string::size_type i = 0; i < str.size(); i++) - { - if (SAFE_CHARS[rule].find(str[i]) != std::string::npos) - strResult.push_back(str[i]); + std::string result; + for (char c : str) { + if (SAFE_CHARS[rule].find(c) != std::string::npos) { + result.push_back(c); + } } - return strResult; + return result; } const signed char p_util_hexdigit[256] = @@ -58,56 +59,45 @@ signed char HexDigit(char c) return p_util_hexdigit[(unsigned char)c]; } -bool IsHex(const std::string& str) +bool IsHex(std::string_view str) { - for(std::string::const_iterator it(str.begin()); it != str.end(); ++it) - { - if (HexDigit(*it) < 0) - return false; + for (char c : str) { + if (HexDigit(c) < 0) return false; } return (str.size() > 0) && (str.size()%2 == 0); } -bool IsHexNumber(const std::string& str) +bool IsHexNumber(std::string_view str) { - size_t starting_location = 0; - if (str.size() > 2 && *str.begin() == '0' && *(str.begin()+1) == 'x') { - starting_location = 2; - } - for (const char c : str.substr(starting_location)) { + if (str.substr(0, 2) == "0x") str.remove_prefix(2); + for (char c : str) { if (HexDigit(c) < 0) return false; } // Return false for empty string or "0x". - return (str.size() > starting_location); + return str.size() > 0; } -std::vector<unsigned char> ParseHex(const char* psz) +template <typename Byte> +std::vector<Byte> ParseHex(std::string_view str) { - // convert hex dump to vector - std::vector<unsigned char> vch; - while (true) - { - while (IsSpace(*psz)) - psz++; - signed char c = HexDigit(*psz++); - if (c == (signed char)-1) - break; - auto n{uint8_t(c << 4)}; - c = HexDigit(*psz++); - if (c == (signed char)-1) - break; - n |= c; - vch.push_back(n); + std::vector<Byte> vch; + auto it = str.begin(); + while (it != str.end() && it + 1 != str.end()) { + if (IsSpace(*it)) { + ++it; + continue; + } + auto c1 = HexDigit(*(it++)); + auto c2 = HexDigit(*(it++)); + if (c1 < 0 || c2 < 0) break; + vch.push_back(Byte(c1 << 4) | Byte(c2)); } return vch; } +template std::vector<std::byte> ParseHex(std::string_view); +template std::vector<uint8_t> ParseHex(std::string_view); -std::vector<unsigned char> ParseHex(const std::string& str) -{ - return ParseHex(str.c_str()); -} - -void SplitHostPort(std::string in, uint16_t& portOut, std::string& hostOut) +void SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut) { size_t colon = in.find_last_of(':'); // if a : is found, and it either follows a [...], or no other : is in the string, treat it as port separator @@ -139,7 +129,7 @@ std::string EncodeBase64(Span<const unsigned char> input) return str; } -std::vector<unsigned char> DecodeBase64(const char* p, bool* pf_invalid) +std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str) { static const int8_t decode64_table[256]{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -157,46 +147,23 @@ std::vector<unsigned char> DecodeBase64(const char* p, bool* pf_invalid) -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; - const char* e = p; - std::vector<uint8_t> val; - val.reserve(strlen(p)); - while (*p != 0) { - int x = decode64_table[(unsigned char)*p]; - if (x == -1) break; - val.push_back(uint8_t(x)); - ++p; - } + if (str.size() % 4 != 0) return {}; + /* One or two = characters at the end are permitted. */ + if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); + if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); std::vector<unsigned char> ret; - ret.reserve((val.size() * 3) / 4); - bool valid = ConvertBits<6, 8, false>([&](unsigned char c) { ret.push_back(c); }, val.begin(), val.end()); - - const char* q = p; - while (valid && *p != 0) { - if (*p != '=') { - valid = false; - break; - } - ++p; - } - valid = valid && (p - e) % 4 == 0 && p - q < 4; - if (pf_invalid) *pf_invalid = !valid; + ret.reserve((str.size() * 3) / 4); + bool valid = ConvertBits<6, 8, false>( + [&](unsigned char c) { ret.push_back(c); }, + str.begin(), str.end(), + [](char c) { return decode64_table[uint8_t(c)]; } + ); + if (!valid) return {}; return ret; } -std::string DecodeBase64(const std::string& str, bool* pf_invalid) -{ - if (!ValidAsCString(str)) { - if (pf_invalid) { - *pf_invalid = true; - } - return {}; - } - std::vector<unsigned char> vchRet = DecodeBase64(str.c_str(), pf_invalid); - return std::string((const char*)vchRet.data(), vchRet.size()); -} - std::string EncodeBase32(Span<const unsigned char> input, bool pad) { static const char *pbase32 = "abcdefghijklmnopqrstuvwxyz234567"; @@ -212,12 +179,12 @@ std::string EncodeBase32(Span<const unsigned char> input, bool pad) return str; } -std::string EncodeBase32(const std::string& str, bool pad) +std::string EncodeBase32(std::string_view str, bool pad) { return EncodeBase32(MakeUCharSpan(str), pad); } -std::vector<unsigned char> DecodeBase32(const char* p, bool* pf_invalid) +std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str) { static const int8_t decode32_table[256]{ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, @@ -235,49 +202,29 @@ std::vector<unsigned char> DecodeBase32(const char* p, bool* pf_invalid) -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; - const char* e = p; - std::vector<uint8_t> val; - val.reserve(strlen(p)); - while (*p != 0) { - int x = decode32_table[(unsigned char)*p]; - if (x == -1) break; - val.push_back(uint8_t(x)); - ++p; - } + if (str.size() % 8 != 0) return {}; + /* 1, 3, 4, or 6 padding '=' suffix characters are permitted. */ + if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); + if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); + if (str.size() >= 1 && str.back() == '=') str.remove_suffix(1); + if (str.size() >= 2 && str.substr(str.size() - 2) == "==") str.remove_suffix(2); std::vector<unsigned char> ret; - ret.reserve((val.size() * 5) / 8); - bool valid = ConvertBits<5, 8, false>([&](unsigned char c) { ret.push_back(c); }, val.begin(), val.end()); - - const char* q = p; - while (valid && *p != 0) { - if (*p != '=') { - valid = false; - break; - } - ++p; - } - valid = valid && (p - e) % 8 == 0 && p - q < 8; - if (pf_invalid) *pf_invalid = !valid; + ret.reserve((str.size() * 5) / 8); + bool valid = ConvertBits<5, 8, false>( + [&](unsigned char c) { ret.push_back(c); }, + str.begin(), str.end(), + [](char c) { return decode32_table[uint8_t(c)]; } + ); - return ret; -} + if (!valid) return {}; -std::string DecodeBase32(const std::string& str, bool* pf_invalid) -{ - if (!ValidAsCString(str)) { - if (pf_invalid) { - *pf_invalid = true; - } - return {}; - } - std::vector<unsigned char> vchRet = DecodeBase32(str.c_str(), pf_invalid); - return std::string((const char*)vchRet.data(), vchRet.size()); + return ret; } namespace { template <typename T> -bool ParseIntegral(const std::string& str, T* out) +bool ParseIntegral(std::string_view str, T* out) { static_assert(std::is_integral<T>::value); // Replicate the exact behavior of strtol/strtoll/strtoul/strtoull when @@ -296,37 +243,37 @@ bool ParseIntegral(const std::string& str, T* out) } }; // namespace -bool ParseInt32(const std::string& str, int32_t* out) +bool ParseInt32(std::string_view str, int32_t* out) { return ParseIntegral<int32_t>(str, out); } -bool ParseInt64(const std::string& str, int64_t* out) +bool ParseInt64(std::string_view str, int64_t* out) { return ParseIntegral<int64_t>(str, out); } -bool ParseUInt8(const std::string& str, uint8_t* out) +bool ParseUInt8(std::string_view str, uint8_t* out) { return ParseIntegral<uint8_t>(str, out); } -bool ParseUInt16(const std::string& str, uint16_t* out) +bool ParseUInt16(std::string_view str, uint16_t* out) { return ParseIntegral<uint16_t>(str, out); } -bool ParseUInt32(const std::string& str, uint32_t* out) +bool ParseUInt32(std::string_view str, uint32_t* out) { return ParseIntegral<uint32_t>(str, out); } -bool ParseUInt64(const std::string& str, uint64_t* out) +bool ParseUInt64(std::string_view str, uint64_t* out) { return ParseIntegral<uint64_t>(str, out); } -std::string FormatParagraph(const std::string& in, size_t width, size_t indent) +std::string FormatParagraph(std::string_view in, size_t width, size_t indent) { assert(width >= indent); std::stringstream out; @@ -395,7 +342,7 @@ static inline bool ProcessMantissaDigit(char ch, int64_t &mantissa, int &mantiss return true; } -bool ParseFixedPoint(const std::string &val, int decimals, int64_t *amount_out) +bool ParseFixedPoint(std::string_view val, int decimals, int64_t *amount_out) { int64_t mantissa = 0; int64_t exponent = 0; @@ -487,14 +434,14 @@ bool ParseFixedPoint(const std::string &val, int decimals, int64_t *amount_out) return true; } -std::string ToLower(const std::string& str) +std::string ToLower(std::string_view str) { std::string r; for (auto ch : str) r += ToLower(ch); return r; } -std::string ToUpper(const std::string& str) +std::string ToUpper(std::string_view str) { std::string r; for (auto ch : str) r += ToUpper(ch); @@ -508,21 +455,41 @@ std::string Capitalize(std::string str) return str; } +namespace { + +using ByteAsHex = std::array<char, 2>; + +constexpr std::array<ByteAsHex, 256> CreateByteToHexMap() +{ + constexpr char hexmap[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'}; + + std::array<ByteAsHex, 256> byte_to_hex{}; + for (size_t i = 0; i < byte_to_hex.size(); ++i) { + byte_to_hex[i][0] = hexmap[i >> 4]; + byte_to_hex[i][1] = hexmap[i & 15]; + } + return byte_to_hex; +} + +} // namespace + std::string HexStr(const Span<const uint8_t> s) { std::string rv(s.size() * 2, '\0'); - static constexpr char hexmap[16] = { '0', '1', '2', '3', '4', '5', '6', '7', - '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; - auto it = rv.begin(); + static constexpr auto byte_to_hex = CreateByteToHexMap(); + static_assert(sizeof(byte_to_hex) == 512); + + char* it = rv.data(); for (uint8_t v : s) { - *it++ = hexmap[v >> 4]; - *it++ = hexmap[v & 15]; + std::memcpy(it, byte_to_hex[v].data(), 2); + it += 2; } - assert(it == rv.end()); + + assert(it == rv.data() + rv.size()); return rv; } -std::optional<uint64_t> ParseByteUnits(const std::string& str, ByteUnit default_multiplier) +std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier) { if (str.empty()) { return std::nullopt; |