Merge bitcoin/bitcoin#30047: refactor: Model the bech32 charlimit as an Enum

7f3f6c6dc80247e6dfb0d406dc53bc8198f029fd refactor: replace hardcoded numbers (Lőrinc) 5676aec1e1a6d2c6fd3099e120e263a0a7def089 refactor: Model the bech32 charlimit as an Enum (josibake) Pull request description: Broken out from #28122 --- Bech32(m) was defined with a 90 character limit so that certain guarantees for error detection could be made for segwit addresses (see https://github.com/bitcoin/bips/blob/master/bip-0173.mediawiki#checksum-design). However, there is nothing about the encoding scheme itself that requires a limit of 90 and in practice bech32(m) is being used without the 90 char limit (e.g. lightning invoices, silent payments). Further, increasing the character limit doesn't do away with error detection, it simply changes the guarantee. The primary motivation for this change is for being able to parse BIP352 v0 silent payment addresses (see https://github.com/bitcoin/bitcoin/pull/28122/commits/622c7a98b9f08177a3cfb601306daabb101af1fd), which require up to 118 characters. In addition to BIP352, modeling the character limit as an enum allows us to easily support new address types that use bech32m and specify their own character limit. ACKs for top commit: paplorinc: re-ACK 7f3f6c6dc80247e6dfb0d406dc53bc8198f029fd achow101: ACK 7f3f6c6dc80247e6dfb0d406dc53bc8198f029fd theuni: utACK 7f3f6c6dc80247e6dfb0d406dc53bc8198f029fd Tree-SHA512: 9c793d657448c1f795093b9f7d4d6dfa431598f48d54e1c899a69fb2f43aeb68b40ca2ff08864eefeeb6627d4171877234b5df0056ff2a2b84415bc3558bd280
author: Ava Chow <github@achow101.com> 2024-06-04 20:32:25 -0400
committer: Ava Chow <github@achow101.com> 2024-06-04 20:32:25 -0400
commit: 55cf34a5c30f1aee0737b48fa2814996359eca89 (patch)
tree: 13af2fb645850f449759b08f0ad28c1bc797272c /src
parent: 09fe1435d9cae5d32fa4e24042d7a862b7c7bf22 (diff)
parent: 7f3f6c6dc80247e6dfb0d406dc53bc8198f029fd (diff)
2 files changed, 41 insertions, 27 deletions
diff --git a/src/bech32.cpp b/src/bech32.cpp
index ba3c419d8b..c3c4ca8006 100644
--- a/src/bech32.cpp
+++ b/src/bech32.cpp
@@ -19,6 +19,9 @@ namespace
 
 typedef std::vector<uint8_t> data;
 
+/** The Bech32 and Bech32m checksum size */
+constexpr size_t CHECKSUM_SIZE = 6;
+
 /** The Bech32 and Bech32m character set for encoding. */
 const char* CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
 
@@ -308,18 +311,18 @@ bool CheckCharacters(const std::string& str, std::vector<int>& errors)
     return errors.empty();
 }
 
-/** Expand a HRP for use in checksum computation. */
-data ExpandHRP(const std::string& hrp)
+std::vector<unsigned char> PreparePolynomialCoefficients(const std::string& hrp, const data& values)
 {
     data ret;
-    ret.reserve(hrp.size() + 90);
-    ret.resize(hrp.size() * 2 + 1);
-    for (size_t i = 0; i < hrp.size(); ++i) {
-        unsigned char c = hrp[i];
-        ret[i] = c >> 5;
-        ret[i + hrp.size() + 1] = c & 0x1f;
-    }
-    ret[hrp.size()] = 0;
+    ret.reserve(hrp.size() + 1 + hrp.size() + values.size() + CHECKSUM_SIZE);
+
+    /** Expand a HRP for use in checksum computation. */
+    for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] >> 5);
+    ret.push_back(0);
+    for (size_t i = 0; i < hrp.size(); ++i) ret.push_back(hrp[i] & 0x1f);
+
+    ret.insert(ret.end(), values.begin(), values.end());
+
     return ret;
 }
 
@@ -331,7 +334,8 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
     // list of values would result in a new valid list. For that reason, Bech32 requires the
     // resulting checksum to be 1 instead. In Bech32m, this constant was amended. See
     // https://gist.github.com/sipa/14c248c288c3880a3b191f978a34508e for details.
-    const uint32_t check = PolyMod(Cat(ExpandHRP(hrp), values));
+    auto enc = PreparePolynomialCoefficients(hrp, values);
+    const uint32_t check = PolyMod(enc);
     if (check == EncodingConstant(Encoding::BECH32)) return Encoding::BECH32;
     if (check == EncodingConstant(Encoding::BECH32M)) return Encoding::BECH32M;
     return Encoding::INVALID;
@@ -340,11 +344,11 @@ Encoding VerifyChecksum(const std::string& hrp, const data& values)
 /** Create a checksum. */
 data CreateChecksum(Encoding encoding, const std::string& hrp, const data& values)
 {
-    data enc = Cat(ExpandHRP(hrp), values);
-    enc.resize(enc.size() + 6); // Append 6 zeroes
+    auto enc = PreparePolynomialCoefficients(hrp, values);
+    enc.insert(enc.end(), CHECKSUM_SIZE, 0x00);
     uint32_t mod = PolyMod(enc) ^ EncodingConstant(encoding); // Determine what to XOR into those 6 zeroes.
-    data ret(6);
-    for (size_t i = 0; i < 6; ++i) {
+    data ret(CHECKSUM_SIZE);
+    for (size_t i = 0; i < CHECKSUM_SIZE; ++i) {
         // Convert the 5-bit groups in mod to checksum values.
         ret[i] = (mod >> (5 * (5 - i))) & 31;
     }
@@ -370,11 +374,12 @@ std::string Encode(Encoding encoding, const std::string& hrp, const data& values
 }
 
 /** Decode a Bech32 or Bech32m string. */
-DecodeResult Decode(const std::string& str) {
+DecodeResult Decode(const std::string& str, CharLimit limit) {
     std::vector<int> errors;
     if (!CheckCharacters(str, errors)) return {};
     size_t pos = str.rfind('1');
-    if (str.size() > 90 || pos == str.npos || pos == 0 || pos + 7 > str.size()) {
+    if (str.size() > limit) return {};
+    if (pos == str.npos || pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
         return {};
     }
     data values(str.size() - 1 - pos);
@@ -393,16 +398,16 @@ DecodeResult Decode(const std::string& str) {
     }
     Encoding result = VerifyChecksum(hrp, values);
     if (result == Encoding::INVALID) return {};
-    return {result, std::move(hrp), data(values.begin(), values.end() - 6)};
+    return {result, std::move(hrp), data(values.begin(), values.end() - CHECKSUM_SIZE)};
 }
 
 /** Find index of an incorrect character in a Bech32 string. */
-std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
+std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit) {
     std::vector<int> error_locations{};
 
-    if (str.size() > 90) {
-        error_locations.resize(str.size() - 90);
-        std::iota(error_locations.begin(), error_locations.end(), 90);
+    if (str.size() > limit) {
+        error_locations.resize(str.size() - limit);
+        std::iota(error_locations.begin(), error_locations.end(), static_cast<int>(limit));
         return std::make_pair("Bech32 string too long", std::move(error_locations));
     }
 
@@ -414,7 +419,7 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
     if (pos == str.npos) {
         return std::make_pair("Missing separator", std::vector<int>{});
     }
-    if (pos == 0 || pos + 7 > str.size()) {
+    if (pos == 0 || pos + CHECKSUM_SIZE >= str.size()) {
         error_locations.push_back(pos);
         return std::make_pair("Invalid separator position", std::move(error_locations));
     }
@@ -441,9 +446,10 @@ std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str) {
     std::optional<Encoding> error_encoding;
     for (Encoding encoding : {Encoding::BECH32, Encoding::BECH32M}) {
         std::vector<int> possible_errors;
-        // Recall that (ExpandHRP(hrp) ++ values) is interpreted as a list of coefficients of a polynomial
+        // Recall that (expanded hrp + values) is interpreted as a list of coefficients of a polynomial
         // over GF(32). PolyMod computes the "remainder" of this polynomial modulo the generator G(x).
-        uint32_t residue = PolyMod(Cat(ExpandHRP(hrp), values)) ^ EncodingConstant(encoding);
+        auto enc = PreparePolynomialCoefficients(hrp, values);
+        uint32_t residue = PolyMod(enc) ^ EncodingConstant(encoding);
 
         // All valid codewords should be multiples of G(x), so this remainder (after XORing with the encoding
         // constant) should be 0 - hence 0 indicates there are no errors present.
diff --git a/src/bech32.h b/src/bech32.h
index 5e89e6efda..fe2a276ae0 100644
--- a/src/bech32.h
+++ b/src/bech32.h
@@ -28,6 +28,14 @@ enum class Encoding {
     BECH32M, //!< Bech32m encoding as defined in BIP350
 };
 
+/** Character limits for Bech32(m) encoded strings. Character limits are how we provide error location guarantees.
+ *  These values should never exceed 2^31 - 1 (max value for a 32-bit int), since there are places where we may need to
+ *  convert the CharLimit::VALUE to an int. In practice, this should never happen since this CharLimit applies to an address encoding
+ *  and we would never encode an address with such a massive value */
+enum CharLimit : size_t {
+    BECH32 = 90,            //!< BIP173/350 imposed character limit for Bech32(m) encoded addresses. This guarantees finding up to 4 errors.
+};
+
 /** Encode a Bech32 or Bech32m string. If hrp contains uppercase characters, this will cause an
  *  assertion error. Encoding must be one of BECH32 or BECH32M. */
 std::string Encode(Encoding encoding, const std::string& hrp, const std::vector<uint8_t>& values);
@@ -43,10 +51,10 @@ struct DecodeResult
 };
 
 /** Decode a Bech32 or Bech32m string. */
-DecodeResult Decode(const std::string& str);
+DecodeResult Decode(const std::string& str, CharLimit limit = CharLimit::BECH32);
 
 /** Return the positions of errors in a Bech32 string. */
-std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str);
+std::pair<std::string, std::vector<int>> LocateErrors(const std::string& str, CharLimit limit = CharLimit::BECH32);
 
 } // namespace bech32
author	Ava Chow <github@achow101.com>	2024-06-04 20:32:25 -0400
committer	Ava Chow <github@achow101.com>	2024-06-04 20:32:25 -0400
commit	55cf34a5c30f1aee0737b48fa2814996359eca89 (patch)
tree	13af2fb645850f449759b08f0ad28c1bc797272c /src
parent	09fe1435d9cae5d32fa4e24042d7a862b7c7bf22 (diff)
parent	7f3f6c6dc80247e6dfb0d406dc53bc8198f029fd (diff)