aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorPieter Wuille <pieter.wuille@gmail.com>2019-02-06 18:08:21 -0800
committerPieter Wuille <pieter.wuille@gmail.com>2019-02-15 22:36:05 -0800
commit3b40bff9880e9ae2817136b7d14989afccfc1937 (patch)
tree65325ad0dbdda14e32704dfff400196aa13d04b8 /src
parent743c2f461c0e7c58b1e5c6ddfeaf4f0ff6e6ca85 (diff)
Descriptor checksum
Diffstat (limited to 'src')
-rw-r--r--src/script/descriptor.cpp144
-rw-r--r--src/script/descriptor.h11
-rw-r--r--src/test/descriptor_tests.cpp34
3 files changed, 178 insertions, 11 deletions
diff --git a/src/script/descriptor.cpp b/src/script/descriptor.cpp
index 532a8028a2..43448d7222 100644
--- a/src/script/descriptor.cpp
+++ b/src/script/descriptor.cpp
@@ -21,6 +21,125 @@
namespace {
////////////////////////////////////////////////////////////////////////////
+// Checksum //
+////////////////////////////////////////////////////////////////////////////
+
+// This section implements a checksum algorithm for descriptors with the
+// following properties:
+// * Mistakes in a descriptor string are measured in "symbol errors". The higher
+// the number of symbol errors, the harder it is to detect:
+// * An error substituting a character from 0123456789()[],'/*abcdefgh@:$%{} for
+// another in that set always counts as 1 symbol error.
+// * Note that hex encoded keys are covered by these characters. Xprvs and
+// xpubs use other characters too, but already have their own checksum
+// mechanism.
+// * Function names like "multi()" use other characters, but mistakes in
+// these would generally result in an unparseable descriptor.
+// * A case error always counts as 1 symbol error.
+// * Any other 1 character substitution error counts as 1 or 2 symbol errors.
+// * Any 1 symbol error is always detected.
+// * Any 2 or 3 symbol error in a descriptor of up to 49154 characters is always detected.
+// * Any 4 symbol error in a descriptor of up to 507 characters is always detected.
+// * Any 5 symbol error in a descriptor of up to 77 characters is always detected.
+// * Is optimized to minimize the chance a 5 symbol error in a descriptor up to 387 characters is undetected
+// * Random errors have a chance of 1 in 2**40 of being undetected.
+//
+// These properties are achieved by expanding every group of 3 (non checksum) characters into
+// 4 GF(32) symbols, over which a cyclic code is defined.
+
+/*
+ * Interprets c as 8 groups of 5 bits which are the coefficients of a degree 8 polynomial over GF(32),
+ * multiplies that polynomial by x, computes its remainder modulo a generator, and adds the constant term val.
+ *
+ * This generator is G(x) = x^8 + {30}x^7 + {23}x^6 + {15}x^5 + {14}x^4 + {10}x^3 + {6}x^2 + {12}x + {9}.
+ * It is chosen to define an cyclic error detecting code which is selected by:
+ * - Starting from all BCH codes over GF(32) of degree 8 and below, which by construction guarantee detecting
+ * 3 errors in windows up to 19000 symbols.
+ * - Taking all those generators, and for degree 7 ones, extend them to degree 8 by adding all degree-1 factors.
+ * - Selecting just the set of generators that guarantee detecting 4 errors in a window of length 512.
+ * - Selecting one of those with best worst-case behavior for 5 errors in windows of length up to 512.
+ *
+ * The generator and the constants to implement it can be verified using this Sage code:
+ * B = GF(2) # Binary field
+ * BP.<b> = B[] # Polynomials over the binary field
+ * F_mod = b**5 + b**3 + 1
+ * F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition
+ * FP.<x> = F[] # Polynomials over GF(32)
+ * E_mod = x**3 + x + F.fetch_int(8)
+ * E.<e> = F.extension(E_mod) # Extension field definition
+ * alpha = e**2743 # Choice of an element in extension field
+ * for p in divisors(E.order() - 1): # Verify alpha has order 32767.
+ * assert((alpha**p == 1) == (p % 32767 == 0))
+ * G = lcm([(alpha**i).minpoly() for i in [1056,1057,1058]] + [x + 1])
+ * print(G) # Print out the generator
+ * for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(G mod x^8), packed in hex integers.
+ * v = 0
+ * for coef in reversed((F.fetch_int(i)*(G % x**8)).coefficients(sparse=True)):
+ * v = v*32 + coef.integer_representation()
+ * print("0x%x" % v)
+ */
+uint64_t PolyMod(uint64_t c, int val)
+{
+ uint8_t c0 = c >> 35;
+ c = ((c & 0x7ffffffff) << 5) ^ val;
+ if (c0 & 1) c ^= 0xf5dee51989;
+ if (c0 & 2) c ^= 0xa9fdca3312;
+ if (c0 & 4) c ^= 0x1bab10e32d;
+ if (c0 & 8) c ^= 0x3706b1677a;
+ if (c0 & 16) c ^= 0x644d626ffd;
+ return c;
+}
+
+std::string DescriptorChecksum(const Span<const char>& span)
+{
+ /** A character set designed such that:
+ * - The most common 'unprotected' descriptor characters (hex, keypaths) are in the first group of 32.
+ * - Case errors cause an offset that's a multiple of 32.
+ * - As many alphabetic characters are in the same group (while following the above restrictions).
+ *
+ * If p(x) gives the position of a character c in this character set, every group of 3 characters
+ * (a,b,c) is encoded as the 4 symbols (p(a) & 31, p(b) & 31, p(c) & 31, (p(a) / 32) + 3 * (p(b) / 32) + 9 * (p(c) / 32).
+ * This means that changes that only affect the lower 5 bits of the position, or only the higher 2 bits, will just
+ * affect a single symbol.
+ *
+ * As a result, within-group-of-32 errors count as 1 symbol, as do cross-group errors that don't affect
+ * the position within the groups.
+ */
+ static std::string INPUT_CHARSET =
+ "0123456789()[],'/*abcdefgh@:$%{}"
+ "IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~"
+ "ijklmnopqrstuvwxyzABCDEFGH`#\"\\ ";
+
+ /** The character set for the checksum itself (same as bech32). */
+ static std::string CHECKSUM_CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
+
+ uint64_t c = 1;
+ int cls = 0;
+ int clscount = 0;
+ for (auto ch : span) {
+ auto pos = INPUT_CHARSET.find(ch);
+ if (pos == std::string::npos) return "";
+ c = PolyMod(c, pos & 31); // Emit a symbol for the position inside the group, for every character.
+ cls = cls * 3 + (pos >> 5); // Accumulate the group numbers
+ if (++clscount == 3) {
+ // Emit an extra symbol representing the group numbers, for every 3 characters.
+ c = PolyMod(c, cls);
+ cls = 0;
+ clscount = 0;
+ }
+ }
+ if (clscount > 0) c = PolyMod(c, cls);
+ for (int j = 0; j < 8; ++j) c = PolyMod(c, 0); // Shift further to determine the checksum.
+ c ^= 1; // Prevent appending zeroes from not affecting the checksum.
+
+ std::string ret(8, ' ');
+ for (int j = 0; j < 8; ++j) ret[j] = CHECKSUM_CHARSET[(c >> (5 * (7 - j))) & 31];
+ return ret;
+}
+
+std::string AddChecksum(const std::string& str) { return str + "#" + DescriptorChecksum(MakeSpan(str)); }
+
+////////////////////////////////////////////////////////////////////////////
// Internal representation //
////////////////////////////////////////////////////////////////////////////
@@ -273,10 +392,15 @@ public:
{
std::string ret;
ToStringHelper(nullptr, ret, false);
- return ret;
+ return AddChecksum(ret);
}
- bool ToPrivateString(const SigningProvider& arg, std::string& out) const override final { return ToStringHelper(&arg, out, true); }
+ bool ToPrivateString(const SigningProvider& arg, std::string& out) const override final
+ {
+ bool ret = ToStringHelper(&arg, out, true);
+ out = AddChecksum(out);
+ return ret;
+ }
bool ExpandHelper(int pos, const SigningProvider& arg, Span<const unsigned char>* cache_read, std::vector<CScript>& output_scripts, FlatSigningProvider& out, std::vector<unsigned char>* cache_write) const
{
@@ -751,11 +875,25 @@ std::unique_ptr<DescriptorImpl> InferScript(const CScript& script, ParseScriptCo
return MakeUnique<RawDescriptor>(script);
}
+
} // namespace
-std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out)
+std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out, bool require_checksum)
{
Span<const char> sp(descriptor.data(), descriptor.size());
+
+ // Checksum checks
+ auto check_split = Split(sp, '#');
+ if (check_split.size() > 2) return nullptr; // Multiple '#' symbols
+ if (check_split.size() == 1 && require_checksum) return nullptr; // Missing checksum
+ if (check_split.size() == 2) {
+ if (check_split[1].size() != 8) return nullptr; // Unexpected length for checksum
+ auto checksum = DescriptorChecksum(check_split[0]);
+ if (checksum.empty()) return nullptr; // Invalid characters in payload
+ if (!std::equal(checksum.begin(), checksum.end(), check_split[1].begin())) return nullptr; // Checksum mismatch
+ }
+ sp = check_split[0];
+
auto ret = ParseScript(sp, ParseScriptContext::TOP, out);
if (sp.size() == 0 && ret) return std::unique_ptr<Descriptor>(std::move(ret));
return nullptr;
diff --git a/src/script/descriptor.h b/src/script/descriptor.h
index 44f0efca03..907a102284 100644
--- a/src/script/descriptor.h
+++ b/src/script/descriptor.h
@@ -62,8 +62,15 @@ struct Descriptor {
virtual bool ExpandFromCache(int pos, const std::vector<unsigned char>& cache, std::vector<CScript>& output_scripts, FlatSigningProvider& out) const = 0;
};
-/** Parse a descriptor string. Included private keys are put in out. Returns nullptr if parsing fails. */
-std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out);
+/** Parse a descriptor string. Included private keys are put in out.
+ *
+ * If the descriptor has a checksum, it must be valid. If require_checksum
+ * is set, the checksum is mandatory - otherwise it is optional.
+ *
+ * If a parse error occurs, or the checksum is missing/invalid, or anything
+ * else is wrong, nullptr is returned.
+ */
+std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out, bool require_checksum = false);
/** Find a descriptor for the specified script, using information from provider where possible.
*
diff --git a/src/test/descriptor_tests.cpp b/src/test/descriptor_tests.cpp
index 8da8cfc00c..ff2b8d4fc9 100644
--- a/src/test/descriptor_tests.cpp
+++ b/src/test/descriptor_tests.cpp
@@ -18,8 +18,8 @@ void CheckUnparsable(const std::string& prv, const std::string& pub)
FlatSigningProvider keys_priv, keys_pub;
auto parse_priv = Parse(prv, keys_priv);
auto parse_pub = Parse(pub, keys_pub);
- BOOST_CHECK(!parse_priv);
- BOOST_CHECK(!parse_pub);
+ BOOST_CHECK_MESSAGE(!parse_priv, prv);
+ BOOST_CHECK_MESSAGE(!parse_pub, pub);
}
constexpr int DEFAULT = 0;
@@ -28,6 +28,18 @@ constexpr int HARDENED = 2; // Derivation needs access to private keys
constexpr int UNSOLVABLE = 4; // This descriptor is not expected to be solvable
constexpr int SIGNABLE = 8; // We can sign with this descriptor (this is not true when actual BIP32 derivation is used, as that's not integrated in our signing code)
+/** Compare two descriptors. If only one of them has a checksum, the checksum is ignored. */
+bool EqualDescriptor(std::string a, std::string b)
+{
+ bool a_check = (a.size() > 9 && a[a.size() - 9] == '#');
+ bool b_check = (b.size() > 9 && b[b.size() - 9] == '#');
+ if (a_check != b_check) {
+ if (a_check) a = a.substr(0, a.size() - 9);
+ if (b_check) b = b.substr(0, b.size() - 9);
+ }
+ return a == b;
+}
+
std::string MaybeUseHInsteadOfApostrophy(std::string ret)
{
if (InsecureRandBool()) {
@@ -35,6 +47,7 @@ std::string MaybeUseHInsteadOfApostrophy(std::string ret)
auto it = ret.find("'");
if (it != std::string::npos) {
ret[it] = 'h';
+ if (ret.size() > 9 && ret[ret.size() - 9] == '#') ret = ret.substr(0, ret.size() - 9); // Changing apostrophe to h breaks the checksum
} else {
break;
}
@@ -63,16 +76,16 @@ void Check(const std::string& prv, const std::string& pub, int flags, const std:
// Check that both versions serialize back to the public version.
std::string pub1 = parse_priv->ToString();
std::string pub2 = parse_pub->ToString();
- BOOST_CHECK_EQUAL(pub, pub1);
- BOOST_CHECK_EQUAL(pub, pub2);
+ BOOST_CHECK(EqualDescriptor(pub, pub1));
+ BOOST_CHECK(EqualDescriptor(pub, pub2));
// Check that both can be serialized with private key back to the private version, but not without private key.
std::string prv1;
BOOST_CHECK(parse_priv->ToPrivateString(keys_priv, prv1));
- BOOST_CHECK_EQUAL(prv, prv1);
+ BOOST_CHECK(EqualDescriptor(prv, prv1));
BOOST_CHECK(!parse_priv->ToPrivateString(keys_pub, prv1));
BOOST_CHECK(parse_pub->ToPrivateString(keys_priv, prv1));
- BOOST_CHECK_EQUAL(prv, prv1);
+ BOOST_CHECK(EqualDescriptor(prv, prv1));
BOOST_CHECK(!parse_pub->ToPrivateString(keys_pub, prv1));
// Check whether IsRange on both returns the expected result
@@ -210,6 +223,15 @@ BOOST_AUTO_TEST_CASE(descriptor_test)
CheckUnparsable("wsh(sh(pk(L4rK1yDtCWekvXuE6oXD9jCYfFNV2cWRpVuPLBcCU2z8TrisoyY1)))", "wsh(sh(pk(03a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd)))"); // Cannot embed P2SH inside P2WSH
CheckUnparsable("sh(sh(pk(L4rK1yDtCWekvXuE6oXD9jCYfFNV2cWRpVuPLBcCU2z8TrisoyY1)))", "sh(sh(pk(03a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd)))"); // Cannot embed P2SH inside P2SH
CheckUnparsable("wsh(wsh(pk(L4rK1yDtCWekvXuE6oXD9jCYfFNV2cWRpVuPLBcCU2z8TrisoyY1)))", "wsh(wsh(pk(03a34b99f22c790c4e36b2b3c2c35a36db06226e41c692fc82b8b56ac1c540c5bd)))"); // Cannot embed P2WSH inside P2WSH
+
+ // Checksums
+ Check("sh(multi(2,[00000000/111'/222]xprvA1RpRA33e1JQ7ifknakTFpgNXPmW2YvmhqLQYMmrj4xJXXWYpDPS3xz7iAxn8L39njGVyuoseXzU6rcxFLJ8HFsTjSyQbLYnMpCqE2VbFWc,xprv9uPDJpEQgRQfDcW7BkF7eTya6RPxXeJCqCJGHuCJ4GiRVLzkTXBAJMu2qaMWPrS7AANYqdq6vcBcBUdJCVVFceUvJFjaPdGZ2y9WACViL4L/0))#ggrsrxfy", "sh(multi(2,[00000000/111'/222]xpub6ERApfZwUNrhLCkDtcHTcxd75RbzS1ed54G1LkBUHQVHQKqhMkhgbmJbZRkrgZw4koxb5JaHWkY4ALHY2grBGRjaDMzQLcgJvLJuZZvRcEL,xpub68NZiKmJWnxxS6aaHmn81bvJeTESw724CRDs6HbuccFQN9Ku14VQrADWgqbhhTHBaohPX4CjNLf9fq9MYo6oDaPPLPxSb7gwQN3ih19Zm4Y/0))#tjg09x5t", DEFAULT, {{"a91445a9a622a8b0a1269944be477640eedc447bbd8487"}}, {{0x8000006FUL,222},{0}});
+ Check("sh(multi(2,[00000000/111'/222]xprvA1RpRA33e1JQ7ifknakTFpgNXPmW2YvmhqLQYMmrj4xJXXWYpDPS3xz7iAxn8L39njGVyuoseXzU6rcxFLJ8HFsTjSyQbLYnMpCqE2VbFWc,xprv9uPDJpEQgRQfDcW7BkF7eTya6RPxXeJCqCJGHuCJ4GiRVLzkTXBAJMu2qaMWPrS7AANYqdq6vcBcBUdJCVVFceUvJFjaPdGZ2y9WACViL4L/0))", "sh(multi(2,[00000000/111'/222]xpub6ERApfZwUNrhLCkDtcHTcxd75RbzS1ed54G1LkBUHQVHQKqhMkhgbmJbZRkrgZw4koxb5JaHWkY4ALHY2grBGRjaDMzQLcgJvLJuZZvRcEL,xpub68NZiKmJWnxxS6aaHmn81bvJeTESw724CRDs6HbuccFQN9Ku14VQrADWgqbhhTHBaohPX4CjNLf9fq9MYo6oDaPPLPxSb7gwQN3ih19Zm4Y/0))", DEFAULT, {{"a91445a9a622a8b0a1269944be477640eedc447bbd8487"}}, {{0x8000006FUL,222},{0}});
+ CheckUnparsable("sh(multi(2,[00000000/111'/222]xprvA1RpRA33e1JQ7ifknakTFpgNXPmW2YvmhqLQYMmrj4xJXXWYpDPS3xz7iAxn8L39njGVyuoseXzU6rcxFLJ8HFsTjSyQbLYnMpCqE2VbFWc,xprv9uPDJpEQgRQfDcW7BkF7eTya6RPxXeJCqCJGHuCJ4GiRVLzkTXBAJMu2qaMWPrS7AANYqdq6vcBcBUdJCVVFceUvJFjaPdGZ2y9WACViL4L/0))#", "sh(multi(2,[00000000/111'/222]xpub6ERApfZwUNrhLCkDtcHTcxd75RbzS1ed54G1LkBUHQVHQKqhMkhgbmJbZRkrgZw4koxb5JaHWkY4ALHY2grBGRjaDMzQLcgJvLJuZZvRcEL,xpub68NZiKmJWnxxS6aaHmn81bvJeTESw724CRDs6HbuccFQN9Ku14VQrADWgqbhhTHBaohPX4CjNLf9fq9MYo6oDaPPLPxSb7gwQN3ih19Zm4Y/0))#"); // Empty checksum
+ CheckUnparsable("sh(multi(2,[00000000/111'/222]xprvA1RpRA33e1JQ7ifknakTFpgNXPmW2YvmhqLQYMmrj4xJXXWYpDPS3xz7iAxn8L39njGVyuoseXzU6rcxFLJ8HFsTjSyQbLYnMpCqE2VbFWc,xprv9uPDJpEQgRQfDcW7BkF7eTya6RPxXeJCqCJGHuCJ4GiRVLzkTXBAJMu2qaMWPrS7AANYqdq6vcBcBUdJCVVFceUvJFjaPdGZ2y9WACViL4L/0))#ggrsrxfyq", "sh(multi(2,[00000000/111'/222]xpub6ERApfZwUNrhLCkDtcHTcxd75RbzS1ed54G1LkBUHQVHQKqhMkhgbmJbZRkrgZw4koxb5JaHWkY4ALHY2grBGRjaDMzQLcgJvLJuZZvRcEL,xpub68NZiKmJWnxxS6aaHmn81bvJeTESw724CRDs6HbuccFQN9Ku14VQrADWgqbhhTHBaohPX4CjNLf9fq9MYo6oDaPPLPxSb7gwQN3ih19Zm4Y/0))#tjg09x5tq"); // Too long checksum
+ CheckUnparsable("sh(multi(2,[00000000/111'/222]xprvA1RpRA33e1JQ7ifknakTFpgNXPmW2YvmhqLQYMmrj4xJXXWYpDPS3xz7iAxn8L39njGVyuoseXzU6rcxFLJ8HFsTjSyQbLYnMpCqE2VbFWc,xprv9uPDJpEQgRQfDcW7BkF7eTya6RPxXeJCqCJGHuCJ4GiRVLzkTXBAJMu2qaMWPrS7AANYqdq6vcBcBUdJCVVFceUvJFjaPdGZ2y9WACViL4L/0))#ggrsrxf", "sh(multi(2,[00000000/111'/222]xpub6ERApfZwUNrhLCkDtcHTcxd75RbzS1ed54G1LkBUHQVHQKqhMkhgbmJbZRkrgZw4koxb5JaHWkY4ALHY2grBGRjaDMzQLcgJvLJuZZvRcEL,xpub68NZiKmJWnxxS6aaHmn81bvJeTESw724CRDs6HbuccFQN9Ku14VQrADWgqbhhTHBaohPX4CjNLf9fq9MYo6oDaPPLPxSb7gwQN3ih19Zm4Y/0))#tjg09x5"); // Too short checksum
+ CheckUnparsable("sh(multi(3,[00000000/111'/222]xprvA1RpRA33e1JQ7ifknakTFpgNXPmW2YvmhqLQYMmrj4xJXXWYpDPS3xz7iAxn8L39njGVyuoseXzU6rcxFLJ8HFsTjSyQbLYnMpCqE2VbFWc,xprv9uPDJpEQgRQfDcW7BkF7eTya6RPxXeJCqCJGHuCJ4GiRVLzkTXBAJMu2qaMWPrS7AANYqdq6vcBcBUdJCVVFceUvJFjaPdGZ2y9WACViL4L/0))#ggrsrxfy", "sh(multi(3,[00000000/111'/222]xpub6ERApfZwUNrhLCkDtcHTcxd75RbzS1ed54G1LkBUHQVHQKqhMkhgbmJbZRkrgZw4koxb5JaHWkY4ALHY2grBGRjaDMzQLcgJvLJuZZvRcEL,xpub68NZiKmJWnxxS6aaHmn81bvJeTESw724CRDs6HbuccFQN9Ku14VQrADWgqbhhTHBaohPX4CjNLf9fq9MYo6oDaPPLPxSb7gwQN3ih19Zm4Y/0))#tjg09x5t"); // Error in payload
+ CheckUnparsable("sh(multi(2,[00000000/111'/222]xprvA1RpRA33e1JQ7ifknakTFpgNXPmW2YvmhqLQYMmrj4xJXXWYpDPS3xz7iAxn8L39njGVyuoseXzU6rcxFLJ8HFsTjSyQbLYnMpCqE2VbFWc,xprv9uPDJpEQgRQfDcW7BkF7eTya6RPxXeJCqCJGHuCJ4GiRVLzkTXBAJMu2qaMWPrS7AANYqdq6vcBcBUdJCVVFceUvJFjaPdGZ2y9WACViL4L/0))#ggssrxfy", "sh(multi(2,[00000000/111'/222]xpub6ERApfZwUNrhLCkDtcHTcxd75RbzS1ed54G1LkBUHQVHQKqhMkhgbmJbZRkrgZw4koxb5JaHWkY4ALHY2grBGRjaDMzQLcgJvLJuZZvRcEL,xpub68NZiKmJWnxxS6aaHmn81bvJeTESw724CRDs6HbuccFQN9Ku14VQrADWgqbhhTHBaohPX4CjNLf9fq9MYo6oDaPPLPxSb7gwQN3ih19Zm4Y/0))#tjq09x4t"); // Error in checksum
}
BOOST_AUTO_TEST_SUITE_END()