src/util/strencodings.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372

// Copyright (c) 2009-2010 Satoshi Nakamoto
// Copyright (c) 2009-2022 The Bitcoin Core developers
// Distributed under the MIT software license, see the accompanying
// file COPYING or http://www.opensource.org/licenses/mit-license.php.

/**
 * Utilities for converting data from/to strings.
 */
#ifndef BITCOIN_UTIL_STRENCODINGS_H
#define BITCOIN_UTIL_STRENCODINGS_H

#include <crypto/hex_base.h> // IWYU pragma: export
#include <span.h>
#include <util/string.h>

#include <charconv>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <optional>
#include <string>      // IWYU pragma: export
#include <string_view> // IWYU pragma: export
#include <system_error>
#include <type_traits>
#include <vector>

/** Used by SanitizeString() */
enum SafeChars
{
    SAFE_CHARS_DEFAULT, //!< The full set of allowed chars
    SAFE_CHARS_UA_COMMENT, //!< BIP-0014 subset
    SAFE_CHARS_FILENAME, //!< Chars allowed in filenames
    SAFE_CHARS_URI, //!< Chars allowed in URIs (RFC 3986)
};

/**
 * Used by ParseByteUnits()
 * Lowercase base 1000
 * Uppercase base 1024
*/
enum class ByteUnit : uint64_t {
    NOOP = 1ULL,
    k = 1000ULL,
    K = 1024ULL,
    m = 1'000'000ULL,
    M = 1ULL << 20,
    g = 1'000'000'000ULL,
    G = 1ULL << 30,
    t = 1'000'000'000'000ULL,
    T = 1ULL << 40,
};

/**
* Remove unsafe chars. Safe chars chosen to allow simple messages/URLs/email
* addresses, but avoid anything even possibly remotely dangerous like & or >
* @param[in] str    The string to sanitize
* @param[in] rule   The set of safe chars to choose (default: least restrictive)
* @return           A new string without unsafe chars
*/
std::string SanitizeString(std::string_view str, int rule = SAFE_CHARS_DEFAULT);
/** Parse the hex string into bytes (uint8_t or std::byte). Ignores whitespace. Returns nullopt on invalid input. */
template <typename Byte = std::byte>
std::optional<std::vector<Byte>> TryParseHex(std::string_view str);
/** Like TryParseHex, but returns an empty vector on invalid input. */
template <typename Byte = uint8_t>
std::vector<Byte> ParseHex(std::string_view hex_str)
{
    return TryParseHex<Byte>(hex_str).value_or(std::vector<Byte>{});
}
/* Returns true if each character in str is a hex character, and has an even
 * number of hex digits.*/
bool IsHex(std::string_view str);
/**
* Return true if the string is a hex number, optionally prefixed with "0x"
*/
bool IsHexNumber(std::string_view str);
std::optional<std::vector<unsigned char>> DecodeBase64(std::string_view str);
std::string EncodeBase64(Span<const unsigned char> input);
inline std::string EncodeBase64(Span<const std::byte> input) { return EncodeBase64(MakeUCharSpan(input)); }
inline std::string EncodeBase64(std::string_view str) { return EncodeBase64(MakeUCharSpan(str)); }
std::optional<std::vector<unsigned char>> DecodeBase32(std::string_view str);

/**
 * Base32 encode.
 * If `pad` is true, then the output will be padded with '=' so that its length
 * is a multiple of 8.
 */
std::string EncodeBase32(Span<const unsigned char> input, bool pad = true);

/**
 * Base32 encode.
 * If `pad` is true, then the output will be padded with '=' so that its length
 * is a multiple of 8.
 */
std::string EncodeBase32(std::string_view str, bool pad = true);

/**
 * Splits socket address string into host string and port value.
 * Validates port value.
 *
 * @param[in] in        The socket address string to split.
 * @param[out] portOut  Port-portion of the input, if found and parsable.
 * @param[out] hostOut  Host-portion of the input, if found.
 * @return              true if port-portion is absent or within its allowed range, otherwise false
 */
bool SplitHostPort(std::string_view in, uint16_t& portOut, std::string& hostOut);

// LocaleIndependentAtoi is provided for backwards compatibility reasons.
//
// New code should use ToIntegral or the ParseInt* functions
// which provide parse error feedback.
//
// The goal of LocaleIndependentAtoi is to replicate the defined behaviour of
// std::atoi as it behaves under the "C" locale, and remove some undefined
// behavior. If the parsed value is bigger than the integer type's maximum
// value, or smaller than the integer type's minimum value, std::atoi has
// undefined behavior, while this function returns the maximum or minimum
// values, respectively.
template <typename T>
T LocaleIndependentAtoi(std::string_view str)
{
    static_assert(std::is_integral<T>::value);
    T result;
    // Emulate atoi(...) handling of white space and leading +/-.
    std::string_view s = util::TrimStringView(str);
    if (!s.empty() && s[0] == '+') {
        if (s.length() >= 2 && s[1] == '-') {
            return 0;
        }
        s = s.substr(1);
    }
    auto [_, error_condition] = std::from_chars(s.data(), s.data() + s.size(), result);
    if (error_condition == std::errc::result_out_of_range) {
        if (s.length() >= 1 && s[0] == '-') {
            // Saturate underflow, per strtoll's behavior.
            return std::numeric_limits<T>::min();
        } else {
            // Saturate overflow, per strtoll's behavior.
            return std::numeric_limits<T>::max();
        }
    } else if (error_condition != std::errc{}) {
        return 0;
    }
    return result;
}

/**
 * Tests if the given character is a decimal digit.
 * @param[in] c     character to test
 * @return          true if the argument is a decimal digit; otherwise false.
 */
constexpr bool IsDigit(char c)
{
    return c >= '0' && c <= '9';
}

/**
 * Tests if the given character is a whitespace character. The whitespace characters
 * are: space, form-feed ('\f'), newline ('\n'), carriage return ('\r'), horizontal
 * tab ('\t'), and vertical tab ('\v').
 *
 * This function is locale independent. Under the C locale this function gives the
 * same result as std::isspace.
 *
 * @param[in] c     character to test
 * @return          true if the argument is a whitespace character; otherwise false
 */
constexpr inline bool IsSpace(char c) noexcept {
    return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || c == '\v';
}

/**
 * Convert string to integral type T. Leading whitespace, a leading +, or any
 * trailing character fail the parsing. The required format expressed as regex
 * is `-?[0-9]+`. The minus sign is only permitted for signed integer types.
 *
 * @returns std::nullopt if the entire string could not be parsed, or if the
 *   parsed value is not in the range representable by the type T.
 */
template <typename T>
std::optional<T> ToIntegral(std::string_view str)
{
    static_assert(std::is_integral<T>::value);
    T result;
    const auto [first_nonmatching, error_condition] = std::from_chars(str.data(), str.data() + str.size(), result);
    if (first_nonmatching != str.data() + str.size() || error_condition != std::errc{}) {
        return std::nullopt;
    }
    return result;
}

/**
 * Convert string to signed 32-bit integer with strict parse error feedback.
 * @returns true if the entire string could be parsed as valid integer,
 *   false if not the entire string could be parsed or when overflow or underflow occurred.
 */
[[nodiscard]] bool ParseInt32(std::string_view str, int32_t *out);

/**
 * Convert string to signed 64-bit integer with strict parse error feedback.
 * @returns true if the entire string could be parsed as valid integer,
 *   false if not the entire string could be parsed or when overflow or underflow occurred.
 */
[[nodiscard]] bool ParseInt64(std::string_view str, int64_t *out);

/**
 * Convert decimal string to unsigned 8-bit integer with strict parse error feedback.
 * @returns true if the entire string could be parsed as valid integer,
 *   false if not the entire string could be parsed or when overflow or underflow occurred.
 */
[[nodiscard]] bool ParseUInt8(std::string_view str, uint8_t *out);

/**
 * Convert decimal string to unsigned 16-bit integer with strict parse error feedback.
 * @returns true if the entire string could be parsed as valid integer,
 *   false if the entire string could not be parsed or if overflow or underflow occurred.
 */
[[nodiscard]] bool ParseUInt16(std::string_view str, uint16_t* out);

/**
 * Convert decimal string to unsigned 32-bit integer with strict parse error feedback.
 * @returns true if the entire string could be parsed as valid integer,
 *   false if not the entire string could be parsed or when overflow or underflow occurred.
 */
[[nodiscard]] bool ParseUInt32(std::string_view str, uint32_t *out);

/**
 * Convert decimal string to unsigned 64-bit integer with strict parse error feedback.
 * @returns true if the entire string could be parsed as valid integer,
 *   false if not the entire string could be parsed or when overflow or underflow occurred.
 */
[[nodiscard]] bool ParseUInt64(std::string_view str, uint64_t *out);

/**
 * Format a paragraph of text to a fixed width, adding spaces for
 * indentation to any added line.
 */
std::string FormatParagraph(std::string_view in, size_t width = 79, size_t indent = 0);

/**
 * Timing-attack-resistant comparison.
 * Takes time proportional to length
 * of first argument.
 */
template <typename T>
bool TimingResistantEqual(const T& a, const T& b)
{
    if (b.size() == 0) return a.size() == 0;
    size_t accumulator = a.size() ^ b.size();
    for (size_t i = 0; i < a.size(); i++)
        accumulator |= size_t(a[i] ^ b[i%b.size()]);
    return accumulator == 0;
}

/** Parse number as fixed point according to JSON number syntax.
 * @returns true on success, false on error.
 * @note The result must be in the range (-10^18,10^18), otherwise an overflow error will trigger.
 */
[[nodiscard]] bool ParseFixedPoint(std::string_view, int decimals, int64_t *amount_out);

namespace {
/** Helper class for the default infn argument to ConvertBits (just returns the input). */
struct IntIdentity
{
    [[maybe_unused]] int operator()(int x) const { return x; }
};

} // namespace

/** Convert from one power-of-2 number base to another. */
template<int frombits, int tobits, bool pad, typename O, typename It, typename I = IntIdentity>
bool ConvertBits(O outfn, It it, It end, I infn = {}) {
    size_t acc = 0;
    size_t bits = 0;
    constexpr size_t maxv = (1 << tobits) - 1;
    constexpr size_t max_acc = (1 << (frombits + tobits - 1)) - 1;
    while (it != end) {
        int v = infn(*it);
        if (v < 0) return false;
        acc = ((acc << frombits) | v) & max_acc;
        bits += frombits;
        while (bits >= tobits) {
            bits -= tobits;
            outfn((acc >> bits) & maxv);
        }
        ++it;
    }
    if (pad) {
        if (bits) outfn((acc << (tobits - bits)) & maxv);
    } else if (bits >= frombits || ((acc << (tobits - bits)) & maxv)) {
        return false;
    }
    return true;
}

/**
 * Converts the given character to its lowercase equivalent.
 * This function is locale independent. It only converts uppercase
 * characters in the standard 7-bit ASCII range.
 * This is a feature, not a limitation.
 *
 * @param[in] c     the character to convert to lowercase.
 * @return          the lowercase equivalent of c; or the argument
 *                  if no conversion is possible.
 */
constexpr char ToLower(char c)
{
    return (c >= 'A' && c <= 'Z' ? (c - 'A') + 'a' : c);
}

/**
 * Returns the lowercase equivalent of the given string.
 * This function is locale independent. It only converts uppercase
 * characters in the standard 7-bit ASCII range.
 * This is a feature, not a limitation.
 *
 * @param[in] str   the string to convert to lowercase.
 * @returns         lowercased equivalent of str
 */
std::string ToLower(std::string_view str);

/**
 * Converts the given character to its uppercase equivalent.
 * This function is locale independent. It only converts lowercase
 * characters in the standard 7-bit ASCII range.
 * This is a feature, not a limitation.
 *
 * @param[in] c     the character to convert to uppercase.
 * @return          the uppercase equivalent of c; or the argument
 *                  if no conversion is possible.
 */
constexpr char ToUpper(char c)
{
    return (c >= 'a' && c <= 'z' ? (c - 'a') + 'A' : c);
}

/**
 * Returns the uppercase equivalent of the given string.
 * This function is locale independent. It only converts lowercase
 * characters in the standard 7-bit ASCII range.
 * This is a feature, not a limitation.
 *
 * @param[in] str   the string to convert to uppercase.
 * @returns         UPPERCASED EQUIVALENT OF str
 */
std::string ToUpper(std::string_view str);

/**
 * Capitalizes the first character of the given string.
 * This function is locale independent. It only converts lowercase
 * characters in the standard 7-bit ASCII range.
 * This is a feature, not a limitation.
 *
 * @param[in] str   the string to capitalize.
 * @returns         string with the first letter capitalized.
 */
std::string Capitalize(std::string str);

/**
 * Parse a string with suffix unit [k|K|m|M|g|G|t|T].
 * Must be a whole integer, fractions not allowed (0.5t), no whitespace or +-
 * Lowercase units are 1000 base. Uppercase units are 1024 base.
 * Examples: 2m,27M,19g,41T
 *
 * @param[in] str                  the string to convert into bytes
 * @param[in] default_multiplier   if no unit is found in str use this unit
 * @returns                        optional uint64_t bytes from str or nullopt
 *                                 if ToIntegral is false, str is empty, trailing whitespace or overflow
 */
std::optional<uint64_t> ParseByteUnits(std::string_view str, ByteUnit default_multiplier);

#endif // BITCOIN_UTIL_STRENCODINGS_H