diff options
author | W. J. van der Laan <laanwj@protonmail.com> | 2021-06-17 17:37:44 +0200 |
---|---|---|
committer | W. J. van der Laan <laanwj@protonmail.com> | 2021-06-17 17:43:16 +0200 |
commit | 7b45c5e875cab07509aaf3974f5ad6c3888dd3ff (patch) | |
tree | aea1cf8ed3ce17353db7372c9457ca6847a47b1d /src | |
parent | 922abe8ca3825a73fd2054d6bc8f557acefb5bab (diff) | |
parent | f8866e8c324be3322fa507c2ceb1de35d148d0f1 (diff) |
Merge bitcoin/bitcoin#20516: Well-defined CAddress disk serialization, and addrv2 anchors.dat
f8866e8c324be3322fa507c2ceb1de35d148d0f1 Add roundtrip fuzz tests for CAddress serialization (Pieter Wuille)
e2f0548b52a4b2ba3edf77e3f21365f1e8f270a4 Use addrv2 serialization in anchors.dat (Pieter Wuille)
8cd8f37dfe3ffb73a09f3ad773603d9d89452245 Introduce well-defined CAddress disk serialization (Pieter Wuille)
Pull request description:
Alternative to #20509.
This makes the `CAddress` disk serialization format well defined, and uses it to enable addrv2 support in anchors.dat (in a way that's compatible with older software). The new format is:
- The first 4 bytes store a format version number. Its low 19 bits are ignored (as those historically stored the `CLIENT_VERSION`), but its high 13 bits specify the actual serialization:
- 0x00000000: LE64 encoding for `nServices`, V1 encoding for `CService` (like pre-BIP155 network serialization).
- 0x20000000: CompactSize encoding for `nServices`, V2 encoding for `CService` (like BIP155 network serialization).
- Any other value triggers an unsupported format error on deserialization, and can be used for future format changes.
- The `ADDRV2_FORMAT` flag in the stream's version does not determine the actual serialization format; it only sets whether or not V2 encoding is permitted.
ACKs for top commit:
achow101:
ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1
laanwj:
Code review ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1
vasild:
ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1
jonatack:
ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1 tested rebased to master and built/run/restarted with DEBUG_ADDRMAN, peers.dat and anchors ser/deser seems fine
hebasto:
ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1, tested on Linux Mint 20.1 (x86_64).
Tree-SHA512: 3898f8a8c51783a46dd0aae03fa10060521f5dd6e79315fe95ba807689e78f202388ffa28c40bf156c6f7b1fc2ce806b155dcbe56027df73d039a55331723796
Diffstat (limited to 'src')
-rw-r--r-- | src/addrdb.cpp | 22 | ||||
-rw-r--r-- | src/protocol.h | 83 | ||||
-rw-r--r-- | src/test/fuzz/deserialize.cpp | 44 |
3 files changed, 119 insertions, 30 deletions
diff --git a/src/addrdb.cpp b/src/addrdb.cpp index c376aced10..bf2f6c7614 100644 --- a/src/addrdb.cpp +++ b/src/addrdb.cpp @@ -23,7 +23,7 @@ bool SerializeDB(Stream& stream, const Data& data) { // Write and commit header, data try { - CHashWriter hasher(SER_DISK, CLIENT_VERSION); + CHashWriter hasher(stream.GetType(), stream.GetVersion()); stream << Params().MessageStart() << data; hasher << Params().MessageStart() << data; stream << hasher.GetHash(); @@ -35,7 +35,7 @@ bool SerializeDB(Stream& stream, const Data& data) } template <typename Data> -bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data) +bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data, int version) { // Generate random temporary filename uint16_t randv = 0; @@ -45,7 +45,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data // open temp output file, and associate with CAutoFile fs::path pathTmp = gArgs.GetDataDirNet() / tmpfn; FILE *file = fsbridge::fopen(pathTmp, "wb"); - CAutoFile fileout(file, SER_DISK, CLIENT_VERSION); + CAutoFile fileout(file, SER_DISK, version); if (fileout.IsNull()) { fileout.fclose(); remove(pathTmp); @@ -106,11 +106,11 @@ bool DeserializeDB(Stream& stream, Data& data, bool fCheckSum = true) } template <typename Data> -bool DeserializeFileDB(const fs::path& path, Data& data) +bool DeserializeFileDB(const fs::path& path, Data& data, int version) { // open input file, and associate with CAutoFile FILE* file = fsbridge::fopen(path, "rb"); - CAutoFile filein(file, SER_DISK, CLIENT_VERSION); + CAutoFile filein(file, SER_DISK, version); if (filein.IsNull()) { LogPrintf("Missing or invalid file %s\n", path.string()); return false; @@ -125,12 +125,12 @@ CBanDB::CBanDB(fs::path ban_list_path) : m_ban_list_path(std::move(ban_list_path bool CBanDB::Write(const banmap_t& banSet) { - return SerializeFileDB("banlist", m_ban_list_path, banSet); + return SerializeFileDB("banlist", m_ban_list_path, banSet, CLIENT_VERSION); } bool CBanDB::Read(banmap_t& banSet) { - return DeserializeFileDB(m_ban_list_path, banSet); + return DeserializeFileDB(m_ban_list_path, banSet, CLIENT_VERSION); } CAddrDB::CAddrDB() @@ -140,12 +140,12 @@ CAddrDB::CAddrDB() bool CAddrDB::Write(const CAddrMan& addr) { - return SerializeFileDB("peers", pathAddr, addr); + return SerializeFileDB("peers", pathAddr, addr, CLIENT_VERSION); } bool CAddrDB::Read(CAddrMan& addr) { - return DeserializeFileDB(pathAddr, addr); + return DeserializeFileDB(pathAddr, addr, CLIENT_VERSION); } bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers) @@ -161,13 +161,13 @@ bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers) void DumpAnchors(const fs::path& anchors_db_path, const std::vector<CAddress>& anchors) { LOG_TIME_SECONDS(strprintf("Flush %d outbound block-relay-only peer addresses to anchors.dat", anchors.size())); - SerializeFileDB("anchors", anchors_db_path, anchors); + SerializeFileDB("anchors", anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT); } std::vector<CAddress> ReadAnchors(const fs::path& anchors_db_path) { std::vector<CAddress> anchors; - if (DeserializeFileDB(anchors_db_path, anchors)) { + if (DeserializeFileDB(anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT)) { LogPrintf("Loaded %i addresses from %s\n", anchors.size(), anchors_db_path.filename()); } else { anchors.clear(); diff --git a/src/protocol.h b/src/protocol.h index aaa9f1df40..f9248899dc 100644 --- a/src/protocol.h +++ b/src/protocol.h @@ -13,6 +13,7 @@ #include <netaddress.h> #include <primitives/transaction.h> #include <serialize.h> +#include <streams.h> #include <uint256.h> #include <version.h> @@ -358,6 +359,31 @@ class CAddress : public CService { static constexpr uint32_t TIME_INIT{100000000}; + /** Historically, CAddress disk serialization stored the CLIENT_VERSION, optionally OR'ed with + * the ADDRV2_FORMAT flag to indicate V2 serialization. The first field has since been + * disentangled from client versioning, and now instead: + * - The low bits (masked by DISK_VERSION_IGNORE_MASK) store the fixed value DISK_VERSION_INIT, + * (in case any code exists that treats it as a client version) but are ignored on + * deserialization. + * - The high bits (masked by ~DISK_VERSION_IGNORE_MASK) store actual serialization information. + * Only 0 or DISK_VERSION_ADDRV2 (equal to the historical ADDRV2_FORMAT) are valid now, and + * any other value triggers a deserialization failure. Other values can be added later if + * needed. + * + * For disk deserialization, ADDRV2_FORMAT in the stream version signals that ADDRV2 + * deserialization is permitted, but the actual format is determined by the high bits in the + * stored version field. For network serialization, the stream version having ADDRV2_FORMAT or + * not determines the actual format used (as it has no embedded version number). + */ + static constexpr uint32_t DISK_VERSION_INIT{220000}; + static constexpr uint32_t DISK_VERSION_IGNORE_MASK{0b00000000'00000111'11111111'11111111}; + /** The version number written in disk serialized addresses to indicate V2 serializations. + * It must be exactly 1<<29, as that is the value that historical versions used for this + * (they used their internal ADDRV2_FORMAT flag here). */ + static constexpr uint32_t DISK_VERSION_ADDRV2{1 << 29}; + static_assert((DISK_VERSION_INIT & ~DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_INIT must be covered by DISK_VERSION_IGNORE_MASK"); + static_assert((DISK_VERSION_ADDRV2 & DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_ADDRV2 must not be covered by DISK_VERSION_IGNORE_MASK"); + public: CAddress() : CService{} {}; CAddress(CService ipIn, ServiceFlags nServicesIn) : CService{ipIn}, nServices{nServicesIn} {}; @@ -365,22 +391,48 @@ public: SERIALIZE_METHODS(CAddress, obj) { - SER_READ(obj, obj.nTime = TIME_INIT); - int nVersion = s.GetVersion(); + // CAddress has a distinct network serialization and a disk serialization, but it should never + // be hashed (except through CHashWriter in addrdb.cpp, which sets SER_DISK), and it's + // ambiguous what that would mean. Make sure no code relying on that is introduced: + assert(!(s.GetType() & SER_GETHASH)); + bool use_v2; + bool store_time; if (s.GetType() & SER_DISK) { - READWRITE(nVersion); - } - if ((s.GetType() & SER_DISK) || - (nVersion != INIT_PROTO_VERSION && !(s.GetType() & SER_GETHASH))) { + // In the disk serialization format, the encoding (v1 or v2) is determined by a flag version + // that's part of the serialization itself. ADDRV2_FORMAT in the stream version only determines + // whether V2 is chosen/permitted at all. + uint32_t stored_format_version = DISK_VERSION_INIT; + if (s.GetVersion() & ADDRV2_FORMAT) stored_format_version |= DISK_VERSION_ADDRV2; + READWRITE(stored_format_version); + stored_format_version &= ~DISK_VERSION_IGNORE_MASK; // ignore low bits + if (stored_format_version == 0) { + use_v2 = false; + } else if (stored_format_version == DISK_VERSION_ADDRV2 && (s.GetVersion() & ADDRV2_FORMAT)) { + // Only support v2 deserialization if ADDRV2_FORMAT is set. + use_v2 = true; + } else { + throw std::ios_base::failure("Unsupported CAddress disk format version"); + } + store_time = true; + } else { + // In the network serialization format, the encoding (v1 or v2) is determined directly by + // the value of ADDRV2_FORMAT in the stream version, as no explicitly encoded version + // exists in the stream. + assert(s.GetType() & SER_NETWORK); + use_v2 = s.GetVersion() & ADDRV2_FORMAT; // The only time we serialize a CAddress object without nTime is in // the initial VERSION messages which contain two CAddress records. // At that point, the serialization version is INIT_PROTO_VERSION. // After the version handshake, serialization version is >= // MIN_PEER_PROTO_VERSION and all ADDR messages are serialized with // nTime. - READWRITE(obj.nTime); + store_time = s.GetVersion() != INIT_PROTO_VERSION; } - if (nVersion & ADDRV2_FORMAT) { + + SER_READ(obj, obj.nTime = TIME_INIT); + if (store_time) READWRITE(obj.nTime); + // nServices is serialized as CompactSize in V2; as uint64_t in V1. + if (use_v2) { uint64_t services_tmp; SER_WRITE(obj, services_tmp = obj.nServices); READWRITE(Using<CompactSizeFormatter<false>>(services_tmp)); @@ -388,13 +440,22 @@ public: } else { READWRITE(Using<CustomUintFormatter<8>>(obj.nServices)); } - READWRITEAS(CService, obj); + // Invoke V1/V2 serializer for CService parent object. + OverrideStream<Stream> os(&s, s.GetType(), use_v2 ? ADDRV2_FORMAT : 0); + SerReadWriteMany(os, ser_action, ReadWriteAsHelper<CService>(obj)); } - // disk and network only + //! Always included in serialization, except in the network format on INIT_PROTO_VERSION. uint32_t nTime{TIME_INIT}; - + //! Serialized as uint64_t in V1, and as CompactSize in V2. ServiceFlags nServices{NODE_NONE}; + + friend bool operator==(const CAddress& a, const CAddress& b) + { + return a.nTime == b.nTime && + a.nServices == b.nServices && + static_cast<const CService&>(a) == static_cast<const CService&>(b); + } }; /** getdata message type flags */ diff --git a/src/test/fuzz/deserialize.cpp b/src/test/fuzz/deserialize.cpp index 1290c78712..decfc2610c 100644 --- a/src/test/fuzz/deserialize.cpp +++ b/src/test/fuzz/deserialize.cpp @@ -53,9 +53,9 @@ struct invalid_fuzzing_input_exception : public std::exception { }; template <typename T> -CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION) +CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK) { - CDataStream ds(SER_NETWORK, version); + CDataStream ds(ser_type, version); ds << obj; return ds; } @@ -69,9 +69,9 @@ T Deserialize(CDataStream ds) } template <typename T> -void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt) +void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt, const int ser_type = SER_NETWORK) { - CDataStream ds(buffer, SER_NETWORK, INIT_PROTO_VERSION); + CDataStream ds(buffer, ser_type, INIT_PROTO_VERSION); if (protocol_version) { ds.SetVersion(*protocol_version); } else { @@ -92,9 +92,9 @@ void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optio } template <typename T> -void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION) +void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK) { - assert(Deserialize<T>(Serialize(obj, version)) == obj); + assert(Deserialize<T>(Serialize(obj, version, ser_type)) == obj); } } // namespace @@ -251,9 +251,37 @@ FUZZ_TARGET_DESERIALIZE(messageheader_deserialize, { DeserializeFromFuzzingInput(buffer, mh); (void)mh.IsCommandValid(); }) -FUZZ_TARGET_DESERIALIZE(address_deserialize, { +FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_notime, { CAddress a; - DeserializeFromFuzzingInput(buffer, a); + DeserializeFromFuzzingInput(buffer, a, INIT_PROTO_VERSION); + // A CAddress without nTime (as is expected under INIT_PROTO_VERSION) will roundtrip + // in all 5 formats (with/without nTime, v1/v2, network/disk) + AssertEqualAfterSerializeDeserialize(a, INIT_PROTO_VERSION); + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION); + AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK); + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT); + AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK); +}) +FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_withtime, { + CAddress a; + DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION); + // A CAddress in V1 mode will roundtrip in all 4 formats that have nTime. + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION); + AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK); + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT); + AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK); +}) +FUZZ_TARGET_DESERIALIZE(address_deserialize_v2, { + CAddress a; + DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION | ADDRV2_FORMAT); + // A CAddress in V2 mode will roundtrip in both V2 formats, and also in the V1 formats + // with time if it's V1 compatible. + if (a.IsAddrV1Compatible()) { + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION); + AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK); + } + AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT); + AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK); }) FUZZ_TARGET_DESERIALIZE(inv_deserialize, { CInv i; |