aboutsummaryrefslogtreecommitdiff
path: root/src/script
diff options
context:
space:
mode:
authorWladimir J. van der Laan <laanwj@gmail.com>2019-02-16 21:05:38 +0100
committerWladimir J. van der Laan <laanwj@gmail.com>2019-02-16 21:39:32 +0100
commitf60d029a2a768b082e75ed98c92429e56f58974d (patch)
tree13ea371c289352c96f5f762aa4d50f0fcb6ced92 /src/script
parentd5b929c813ff3d7d93bf4e3164b34e10eeb63801 (diff)
parentfd637be8d21a606e98c037b40b268c4a1fae2244 (diff)
Merge #15368: Descriptor checksums
fd637be8d21a606e98c037b40b268c4a1fae2244 Add checksums to descriptors.md (Pieter Wuille) be62903c417293f6217e124669e62fd2172a18f1 Make descriptor checksums mandatory in deriveaddresses and importmulti (Pieter Wuille) b52cb6368869c9f6dd2cd8f309b3000de514d439 Add getdescriptorinfo to compute checksum (Pieter Wuille) 3b40bff9880e9ae2817136b7d14989afccfc1937 Descriptor checksum (Pieter Wuille) Pull request description: This adds support for a descriptor-specific 8-character checksum. Descriptors may optionally be suffixed with a `#` plus these 8 checksum characters. Any descriptor that contains a `#` at the end must be followed by a valid checksum. If the `#` is missing entirely, it is valid without checksum. All RPCs are updated to report descriptors that include the checksum. On input, they are optional except in `deriveaddress` and `importmulti`, which require descriptors which include a checksum. A new RPC is also added to analyse descriptors (`getdescriptorinfo`), which can be used to compute the checksum for a descriptor without. Tree-SHA512: a8294b09155eb6c67fbc178b5e2d3fbc0e9bec8b6de57a13f8835550d51c2cb32a428b3c9a188ded42b454d594e9305edbd4797906b755de77a8f33c79165f6b
Diffstat (limited to 'src/script')
-rw-r--r--src/script/descriptor.cpp144
-rw-r--r--src/script/descriptor.h11
2 files changed, 150 insertions, 5 deletions
diff --git a/src/script/descriptor.cpp b/src/script/descriptor.cpp
index 532a8028a2..43448d7222 100644
--- a/src/script/descriptor.cpp
+++ b/src/script/descriptor.cpp
@@ -21,6 +21,125 @@
namespace {
////////////////////////////////////////////////////////////////////////////
+// Checksum //
+////////////////////////////////////////////////////////////////////////////
+
+// This section implements a checksum algorithm for descriptors with the
+// following properties:
+// * Mistakes in a descriptor string are measured in "symbol errors". The higher
+// the number of symbol errors, the harder it is to detect:
+// * An error substituting a character from 0123456789()[],'/*abcdefgh@:$%{} for
+// another in that set always counts as 1 symbol error.
+// * Note that hex encoded keys are covered by these characters. Xprvs and
+// xpubs use other characters too, but already have their own checksum
+// mechanism.
+// * Function names like "multi()" use other characters, but mistakes in
+// these would generally result in an unparseable descriptor.
+// * A case error always counts as 1 symbol error.
+// * Any other 1 character substitution error counts as 1 or 2 symbol errors.
+// * Any 1 symbol error is always detected.
+// * Any 2 or 3 symbol error in a descriptor of up to 49154 characters is always detected.
+// * Any 4 symbol error in a descriptor of up to 507 characters is always detected.
+// * Any 5 symbol error in a descriptor of up to 77 characters is always detected.
+// * Is optimized to minimize the chance a 5 symbol error in a descriptor up to 387 characters is undetected
+// * Random errors have a chance of 1 in 2**40 of being undetected.
+//
+// These properties are achieved by expanding every group of 3 (non checksum) characters into
+// 4 GF(32) symbols, over which a cyclic code is defined.
+
+/*
+ * Interprets c as 8 groups of 5 bits which are the coefficients of a degree 8 polynomial over GF(32),
+ * multiplies that polynomial by x, computes its remainder modulo a generator, and adds the constant term val.
+ *
+ * This generator is G(x) = x^8 + {30}x^7 + {23}x^6 + {15}x^5 + {14}x^4 + {10}x^3 + {6}x^2 + {12}x + {9}.
+ * It is chosen to define an cyclic error detecting code which is selected by:
+ * - Starting from all BCH codes over GF(32) of degree 8 and below, which by construction guarantee detecting
+ * 3 errors in windows up to 19000 symbols.
+ * - Taking all those generators, and for degree 7 ones, extend them to degree 8 by adding all degree-1 factors.
+ * - Selecting just the set of generators that guarantee detecting 4 errors in a window of length 512.
+ * - Selecting one of those with best worst-case behavior for 5 errors in windows of length up to 512.
+ *
+ * The generator and the constants to implement it can be verified using this Sage code:
+ * B = GF(2) # Binary field
+ * BP.<b> = B[] # Polynomials over the binary field
+ * F_mod = b**5 + b**3 + 1
+ * F.<f> = GF(32, modulus=F_mod, repr='int') # GF(32) definition
+ * FP.<x> = F[] # Polynomials over GF(32)
+ * E_mod = x**3 + x + F.fetch_int(8)
+ * E.<e> = F.extension(E_mod) # Extension field definition
+ * alpha = e**2743 # Choice of an element in extension field
+ * for p in divisors(E.order() - 1): # Verify alpha has order 32767.
+ * assert((alpha**p == 1) == (p % 32767 == 0))
+ * G = lcm([(alpha**i).minpoly() for i in [1056,1057,1058]] + [x + 1])
+ * print(G) # Print out the generator
+ * for i in [1,2,4,8,16]: # Print out {1,2,4,8,16}*(G mod x^8), packed in hex integers.
+ * v = 0
+ * for coef in reversed((F.fetch_int(i)*(G % x**8)).coefficients(sparse=True)):
+ * v = v*32 + coef.integer_representation()
+ * print("0x%x" % v)
+ */
+uint64_t PolyMod(uint64_t c, int val)
+{
+ uint8_t c0 = c >> 35;
+ c = ((c & 0x7ffffffff) << 5) ^ val;
+ if (c0 & 1) c ^= 0xf5dee51989;
+ if (c0 & 2) c ^= 0xa9fdca3312;
+ if (c0 & 4) c ^= 0x1bab10e32d;
+ if (c0 & 8) c ^= 0x3706b1677a;
+ if (c0 & 16) c ^= 0x644d626ffd;
+ return c;
+}
+
+std::string DescriptorChecksum(const Span<const char>& span)
+{
+ /** A character set designed such that:
+ * - The most common 'unprotected' descriptor characters (hex, keypaths) are in the first group of 32.
+ * - Case errors cause an offset that's a multiple of 32.
+ * - As many alphabetic characters are in the same group (while following the above restrictions).
+ *
+ * If p(x) gives the position of a character c in this character set, every group of 3 characters
+ * (a,b,c) is encoded as the 4 symbols (p(a) & 31, p(b) & 31, p(c) & 31, (p(a) / 32) + 3 * (p(b) / 32) + 9 * (p(c) / 32).
+ * This means that changes that only affect the lower 5 bits of the position, or only the higher 2 bits, will just
+ * affect a single symbol.
+ *
+ * As a result, within-group-of-32 errors count as 1 symbol, as do cross-group errors that don't affect
+ * the position within the groups.
+ */
+ static std::string INPUT_CHARSET =
+ "0123456789()[],'/*abcdefgh@:$%{}"
+ "IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~"
+ "ijklmnopqrstuvwxyzABCDEFGH`#\"\\ ";
+
+ /** The character set for the checksum itself (same as bech32). */
+ static std::string CHECKSUM_CHARSET = "qpzry9x8gf2tvdw0s3jn54khce6mua7l";
+
+ uint64_t c = 1;
+ int cls = 0;
+ int clscount = 0;
+ for (auto ch : span) {
+ auto pos = INPUT_CHARSET.find(ch);
+ if (pos == std::string::npos) return "";
+ c = PolyMod(c, pos & 31); // Emit a symbol for the position inside the group, for every character.
+ cls = cls * 3 + (pos >> 5); // Accumulate the group numbers
+ if (++clscount == 3) {
+ // Emit an extra symbol representing the group numbers, for every 3 characters.
+ c = PolyMod(c, cls);
+ cls = 0;
+ clscount = 0;
+ }
+ }
+ if (clscount > 0) c = PolyMod(c, cls);
+ for (int j = 0; j < 8; ++j) c = PolyMod(c, 0); // Shift further to determine the checksum.
+ c ^= 1; // Prevent appending zeroes from not affecting the checksum.
+
+ std::string ret(8, ' ');
+ for (int j = 0; j < 8; ++j) ret[j] = CHECKSUM_CHARSET[(c >> (5 * (7 - j))) & 31];
+ return ret;
+}
+
+std::string AddChecksum(const std::string& str) { return str + "#" + DescriptorChecksum(MakeSpan(str)); }
+
+////////////////////////////////////////////////////////////////////////////
// Internal representation //
////////////////////////////////////////////////////////////////////////////
@@ -273,10 +392,15 @@ public:
{
std::string ret;
ToStringHelper(nullptr, ret, false);
- return ret;
+ return AddChecksum(ret);
}
- bool ToPrivateString(const SigningProvider& arg, std::string& out) const override final { return ToStringHelper(&arg, out, true); }
+ bool ToPrivateString(const SigningProvider& arg, std::string& out) const override final
+ {
+ bool ret = ToStringHelper(&arg, out, true);
+ out = AddChecksum(out);
+ return ret;
+ }
bool ExpandHelper(int pos, const SigningProvider& arg, Span<const unsigned char>* cache_read, std::vector<CScript>& output_scripts, FlatSigningProvider& out, std::vector<unsigned char>* cache_write) const
{
@@ -751,11 +875,25 @@ std::unique_ptr<DescriptorImpl> InferScript(const CScript& script, ParseScriptCo
return MakeUnique<RawDescriptor>(script);
}
+
} // namespace
-std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out)
+std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out, bool require_checksum)
{
Span<const char> sp(descriptor.data(), descriptor.size());
+
+ // Checksum checks
+ auto check_split = Split(sp, '#');
+ if (check_split.size() > 2) return nullptr; // Multiple '#' symbols
+ if (check_split.size() == 1 && require_checksum) return nullptr; // Missing checksum
+ if (check_split.size() == 2) {
+ if (check_split[1].size() != 8) return nullptr; // Unexpected length for checksum
+ auto checksum = DescriptorChecksum(check_split[0]);
+ if (checksum.empty()) return nullptr; // Invalid characters in payload
+ if (!std::equal(checksum.begin(), checksum.end(), check_split[1].begin())) return nullptr; // Checksum mismatch
+ }
+ sp = check_split[0];
+
auto ret = ParseScript(sp, ParseScriptContext::TOP, out);
if (sp.size() == 0 && ret) return std::unique_ptr<Descriptor>(std::move(ret));
return nullptr;
diff --git a/src/script/descriptor.h b/src/script/descriptor.h
index 44f0efca03..907a102284 100644
--- a/src/script/descriptor.h
+++ b/src/script/descriptor.h
@@ -62,8 +62,15 @@ struct Descriptor {
virtual bool ExpandFromCache(int pos, const std::vector<unsigned char>& cache, std::vector<CScript>& output_scripts, FlatSigningProvider& out) const = 0;
};
-/** Parse a descriptor string. Included private keys are put in out. Returns nullptr if parsing fails. */
-std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out);
+/** Parse a descriptor string. Included private keys are put in out.
+ *
+ * If the descriptor has a checksum, it must be valid. If require_checksum
+ * is set, the checksum is mandatory - otherwise it is optional.
+ *
+ * If a parse error occurs, or the checksum is missing/invalid, or anything
+ * else is wrong, nullptr is returned.
+ */
+std::unique_ptr<Descriptor> Parse(const std::string& descriptor, FlatSigningProvider& out, bool require_checksum = false);
/** Find a descriptor for the specified script, using information from provider where possible.
*