aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorW. J. van der Laan <laanwj@protonmail.com>2021-06-17 17:37:44 +0200
committerW. J. van der Laan <laanwj@protonmail.com>2021-06-17 17:43:16 +0200
commit7b45c5e875cab07509aaf3974f5ad6c3888dd3ff (patch)
treeaea1cf8ed3ce17353db7372c9457ca6847a47b1d /src
parent922abe8ca3825a73fd2054d6bc8f557acefb5bab (diff)
parentf8866e8c324be3322fa507c2ceb1de35d148d0f1 (diff)
Merge bitcoin/bitcoin#20516: Well-defined CAddress disk serialization, and addrv2 anchors.dat
f8866e8c324be3322fa507c2ceb1de35d148d0f1 Add roundtrip fuzz tests for CAddress serialization (Pieter Wuille) e2f0548b52a4b2ba3edf77e3f21365f1e8f270a4 Use addrv2 serialization in anchors.dat (Pieter Wuille) 8cd8f37dfe3ffb73a09f3ad773603d9d89452245 Introduce well-defined CAddress disk serialization (Pieter Wuille) Pull request description: Alternative to #20509. This makes the `CAddress` disk serialization format well defined, and uses it to enable addrv2 support in anchors.dat (in a way that's compatible with older software). The new format is: - The first 4 bytes store a format version number. Its low 19 bits are ignored (as those historically stored the `CLIENT_VERSION`), but its high 13 bits specify the actual serialization: - 0x00000000: LE64 encoding for `nServices`, V1 encoding for `CService` (like pre-BIP155 network serialization). - 0x20000000: CompactSize encoding for `nServices`, V2 encoding for `CService` (like BIP155 network serialization). - Any other value triggers an unsupported format error on deserialization, and can be used for future format changes. - The `ADDRV2_FORMAT` flag in the stream's version does not determine the actual serialization format; it only sets whether or not V2 encoding is permitted. ACKs for top commit: achow101: ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1 laanwj: Code review ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1 vasild: ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1 jonatack: ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1 tested rebased to master and built/run/restarted with DEBUG_ADDRMAN, peers.dat and anchors ser/deser seems fine hebasto: ACK f8866e8c324be3322fa507c2ceb1de35d148d0f1, tested on Linux Mint 20.1 (x86_64). Tree-SHA512: 3898f8a8c51783a46dd0aae03fa10060521f5dd6e79315fe95ba807689e78f202388ffa28c40bf156c6f7b1fc2ce806b155dcbe56027df73d039a55331723796
Diffstat (limited to 'src')
-rw-r--r--src/addrdb.cpp22
-rw-r--r--src/protocol.h83
-rw-r--r--src/test/fuzz/deserialize.cpp44
3 files changed, 119 insertions, 30 deletions
diff --git a/src/addrdb.cpp b/src/addrdb.cpp
index c376aced10..bf2f6c7614 100644
--- a/src/addrdb.cpp
+++ b/src/addrdb.cpp
@@ -23,7 +23,7 @@ bool SerializeDB(Stream& stream, const Data& data)
{
// Write and commit header, data
try {
- CHashWriter hasher(SER_DISK, CLIENT_VERSION);
+ CHashWriter hasher(stream.GetType(), stream.GetVersion());
stream << Params().MessageStart() << data;
hasher << Params().MessageStart() << data;
stream << hasher.GetHash();
@@ -35,7 +35,7 @@ bool SerializeDB(Stream& stream, const Data& data)
}
template <typename Data>
-bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data)
+bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data& data, int version)
{
// Generate random temporary filename
uint16_t randv = 0;
@@ -45,7 +45,7 @@ bool SerializeFileDB(const std::string& prefix, const fs::path& path, const Data
// open temp output file, and associate with CAutoFile
fs::path pathTmp = gArgs.GetDataDirNet() / tmpfn;
FILE *file = fsbridge::fopen(pathTmp, "wb");
- CAutoFile fileout(file, SER_DISK, CLIENT_VERSION);
+ CAutoFile fileout(file, SER_DISK, version);
if (fileout.IsNull()) {
fileout.fclose();
remove(pathTmp);
@@ -106,11 +106,11 @@ bool DeserializeDB(Stream& stream, Data& data, bool fCheckSum = true)
}
template <typename Data>
-bool DeserializeFileDB(const fs::path& path, Data& data)
+bool DeserializeFileDB(const fs::path& path, Data& data, int version)
{
// open input file, and associate with CAutoFile
FILE* file = fsbridge::fopen(path, "rb");
- CAutoFile filein(file, SER_DISK, CLIENT_VERSION);
+ CAutoFile filein(file, SER_DISK, version);
if (filein.IsNull()) {
LogPrintf("Missing or invalid file %s\n", path.string());
return false;
@@ -125,12 +125,12 @@ CBanDB::CBanDB(fs::path ban_list_path) : m_ban_list_path(std::move(ban_list_path
bool CBanDB::Write(const banmap_t& banSet)
{
- return SerializeFileDB("banlist", m_ban_list_path, banSet);
+ return SerializeFileDB("banlist", m_ban_list_path, banSet, CLIENT_VERSION);
}
bool CBanDB::Read(banmap_t& banSet)
{
- return DeserializeFileDB(m_ban_list_path, banSet);
+ return DeserializeFileDB(m_ban_list_path, banSet, CLIENT_VERSION);
}
CAddrDB::CAddrDB()
@@ -140,12 +140,12 @@ CAddrDB::CAddrDB()
bool CAddrDB::Write(const CAddrMan& addr)
{
- return SerializeFileDB("peers", pathAddr, addr);
+ return SerializeFileDB("peers", pathAddr, addr, CLIENT_VERSION);
}
bool CAddrDB::Read(CAddrMan& addr)
{
- return DeserializeFileDB(pathAddr, addr);
+ return DeserializeFileDB(pathAddr, addr, CLIENT_VERSION);
}
bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
@@ -161,13 +161,13 @@ bool CAddrDB::Read(CAddrMan& addr, CDataStream& ssPeers)
void DumpAnchors(const fs::path& anchors_db_path, const std::vector<CAddress>& anchors)
{
LOG_TIME_SECONDS(strprintf("Flush %d outbound block-relay-only peer addresses to anchors.dat", anchors.size()));
- SerializeFileDB("anchors", anchors_db_path, anchors);
+ SerializeFileDB("anchors", anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT);
}
std::vector<CAddress> ReadAnchors(const fs::path& anchors_db_path)
{
std::vector<CAddress> anchors;
- if (DeserializeFileDB(anchors_db_path, anchors)) {
+ if (DeserializeFileDB(anchors_db_path, anchors, CLIENT_VERSION | ADDRV2_FORMAT)) {
LogPrintf("Loaded %i addresses from %s\n", anchors.size(), anchors_db_path.filename());
} else {
anchors.clear();
diff --git a/src/protocol.h b/src/protocol.h
index aaa9f1df40..f9248899dc 100644
--- a/src/protocol.h
+++ b/src/protocol.h
@@ -13,6 +13,7 @@
#include <netaddress.h>
#include <primitives/transaction.h>
#include <serialize.h>
+#include <streams.h>
#include <uint256.h>
#include <version.h>
@@ -358,6 +359,31 @@ class CAddress : public CService
{
static constexpr uint32_t TIME_INIT{100000000};
+ /** Historically, CAddress disk serialization stored the CLIENT_VERSION, optionally OR'ed with
+ * the ADDRV2_FORMAT flag to indicate V2 serialization. The first field has since been
+ * disentangled from client versioning, and now instead:
+ * - The low bits (masked by DISK_VERSION_IGNORE_MASK) store the fixed value DISK_VERSION_INIT,
+ * (in case any code exists that treats it as a client version) but are ignored on
+ * deserialization.
+ * - The high bits (masked by ~DISK_VERSION_IGNORE_MASK) store actual serialization information.
+ * Only 0 or DISK_VERSION_ADDRV2 (equal to the historical ADDRV2_FORMAT) are valid now, and
+ * any other value triggers a deserialization failure. Other values can be added later if
+ * needed.
+ *
+ * For disk deserialization, ADDRV2_FORMAT in the stream version signals that ADDRV2
+ * deserialization is permitted, but the actual format is determined by the high bits in the
+ * stored version field. For network serialization, the stream version having ADDRV2_FORMAT or
+ * not determines the actual format used (as it has no embedded version number).
+ */
+ static constexpr uint32_t DISK_VERSION_INIT{220000};
+ static constexpr uint32_t DISK_VERSION_IGNORE_MASK{0b00000000'00000111'11111111'11111111};
+ /** The version number written in disk serialized addresses to indicate V2 serializations.
+ * It must be exactly 1<<29, as that is the value that historical versions used for this
+ * (they used their internal ADDRV2_FORMAT flag here). */
+ static constexpr uint32_t DISK_VERSION_ADDRV2{1 << 29};
+ static_assert((DISK_VERSION_INIT & ~DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_INIT must be covered by DISK_VERSION_IGNORE_MASK");
+ static_assert((DISK_VERSION_ADDRV2 & DISK_VERSION_IGNORE_MASK) == 0, "DISK_VERSION_ADDRV2 must not be covered by DISK_VERSION_IGNORE_MASK");
+
public:
CAddress() : CService{} {};
CAddress(CService ipIn, ServiceFlags nServicesIn) : CService{ipIn}, nServices{nServicesIn} {};
@@ -365,22 +391,48 @@ public:
SERIALIZE_METHODS(CAddress, obj)
{
- SER_READ(obj, obj.nTime = TIME_INIT);
- int nVersion = s.GetVersion();
+ // CAddress has a distinct network serialization and a disk serialization, but it should never
+ // be hashed (except through CHashWriter in addrdb.cpp, which sets SER_DISK), and it's
+ // ambiguous what that would mean. Make sure no code relying on that is introduced:
+ assert(!(s.GetType() & SER_GETHASH));
+ bool use_v2;
+ bool store_time;
if (s.GetType() & SER_DISK) {
- READWRITE(nVersion);
- }
- if ((s.GetType() & SER_DISK) ||
- (nVersion != INIT_PROTO_VERSION && !(s.GetType() & SER_GETHASH))) {
+ // In the disk serialization format, the encoding (v1 or v2) is determined by a flag version
+ // that's part of the serialization itself. ADDRV2_FORMAT in the stream version only determines
+ // whether V2 is chosen/permitted at all.
+ uint32_t stored_format_version = DISK_VERSION_INIT;
+ if (s.GetVersion() & ADDRV2_FORMAT) stored_format_version |= DISK_VERSION_ADDRV2;
+ READWRITE(stored_format_version);
+ stored_format_version &= ~DISK_VERSION_IGNORE_MASK; // ignore low bits
+ if (stored_format_version == 0) {
+ use_v2 = false;
+ } else if (stored_format_version == DISK_VERSION_ADDRV2 && (s.GetVersion() & ADDRV2_FORMAT)) {
+ // Only support v2 deserialization if ADDRV2_FORMAT is set.
+ use_v2 = true;
+ } else {
+ throw std::ios_base::failure("Unsupported CAddress disk format version");
+ }
+ store_time = true;
+ } else {
+ // In the network serialization format, the encoding (v1 or v2) is determined directly by
+ // the value of ADDRV2_FORMAT in the stream version, as no explicitly encoded version
+ // exists in the stream.
+ assert(s.GetType() & SER_NETWORK);
+ use_v2 = s.GetVersion() & ADDRV2_FORMAT;
// The only time we serialize a CAddress object without nTime is in
// the initial VERSION messages which contain two CAddress records.
// At that point, the serialization version is INIT_PROTO_VERSION.
// After the version handshake, serialization version is >=
// MIN_PEER_PROTO_VERSION and all ADDR messages are serialized with
// nTime.
- READWRITE(obj.nTime);
+ store_time = s.GetVersion() != INIT_PROTO_VERSION;
}
- if (nVersion & ADDRV2_FORMAT) {
+
+ SER_READ(obj, obj.nTime = TIME_INIT);
+ if (store_time) READWRITE(obj.nTime);
+ // nServices is serialized as CompactSize in V2; as uint64_t in V1.
+ if (use_v2) {
uint64_t services_tmp;
SER_WRITE(obj, services_tmp = obj.nServices);
READWRITE(Using<CompactSizeFormatter<false>>(services_tmp));
@@ -388,13 +440,22 @@ public:
} else {
READWRITE(Using<CustomUintFormatter<8>>(obj.nServices));
}
- READWRITEAS(CService, obj);
+ // Invoke V1/V2 serializer for CService parent object.
+ OverrideStream<Stream> os(&s, s.GetType(), use_v2 ? ADDRV2_FORMAT : 0);
+ SerReadWriteMany(os, ser_action, ReadWriteAsHelper<CService>(obj));
}
- // disk and network only
+ //! Always included in serialization, except in the network format on INIT_PROTO_VERSION.
uint32_t nTime{TIME_INIT};
-
+ //! Serialized as uint64_t in V1, and as CompactSize in V2.
ServiceFlags nServices{NODE_NONE};
+
+ friend bool operator==(const CAddress& a, const CAddress& b)
+ {
+ return a.nTime == b.nTime &&
+ a.nServices == b.nServices &&
+ static_cast<const CService&>(a) == static_cast<const CService&>(b);
+ }
};
/** getdata message type flags */
diff --git a/src/test/fuzz/deserialize.cpp b/src/test/fuzz/deserialize.cpp
index 1290c78712..decfc2610c 100644
--- a/src/test/fuzz/deserialize.cpp
+++ b/src/test/fuzz/deserialize.cpp
@@ -53,9 +53,9 @@ struct invalid_fuzzing_input_exception : public std::exception {
};
template <typename T>
-CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION)
+CDataStream Serialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
{
- CDataStream ds(SER_NETWORK, version);
+ CDataStream ds(ser_type, version);
ds << obj;
return ds;
}
@@ -69,9 +69,9 @@ T Deserialize(CDataStream ds)
}
template <typename T>
-void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt)
+void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optional<int> protocol_version = std::nullopt, const int ser_type = SER_NETWORK)
{
- CDataStream ds(buffer, SER_NETWORK, INIT_PROTO_VERSION);
+ CDataStream ds(buffer, ser_type, INIT_PROTO_VERSION);
if (protocol_version) {
ds.SetVersion(*protocol_version);
} else {
@@ -92,9 +92,9 @@ void DeserializeFromFuzzingInput(FuzzBufferType buffer, T& obj, const std::optio
}
template <typename T>
-void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION)
+void AssertEqualAfterSerializeDeserialize(const T& obj, const int version = INIT_PROTO_VERSION, const int ser_type = SER_NETWORK)
{
- assert(Deserialize<T>(Serialize(obj, version)) == obj);
+ assert(Deserialize<T>(Serialize(obj, version, ser_type)) == obj);
}
} // namespace
@@ -251,9 +251,37 @@ FUZZ_TARGET_DESERIALIZE(messageheader_deserialize, {
DeserializeFromFuzzingInput(buffer, mh);
(void)mh.IsCommandValid();
})
-FUZZ_TARGET_DESERIALIZE(address_deserialize, {
+FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_notime, {
CAddress a;
- DeserializeFromFuzzingInput(buffer, a);
+ DeserializeFromFuzzingInput(buffer, a, INIT_PROTO_VERSION);
+ // A CAddress without nTime (as is expected under INIT_PROTO_VERSION) will roundtrip
+ // in all 5 formats (with/without nTime, v1/v2, network/disk)
+ AssertEqualAfterSerializeDeserialize(a, INIT_PROTO_VERSION);
+ AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
+ AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
+ AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
+ AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
+})
+FUZZ_TARGET_DESERIALIZE(address_deserialize_v1_withtime, {
+ CAddress a;
+ DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION);
+ // A CAddress in V1 mode will roundtrip in all 4 formats that have nTime.
+ AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
+ AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
+ AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
+ AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
+})
+FUZZ_TARGET_DESERIALIZE(address_deserialize_v2, {
+ CAddress a;
+ DeserializeFromFuzzingInput(buffer, a, PROTOCOL_VERSION | ADDRV2_FORMAT);
+ // A CAddress in V2 mode will roundtrip in both V2 formats, and also in the V1 formats
+ // with time if it's V1 compatible.
+ if (a.IsAddrV1Compatible()) {
+ AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION);
+ AssertEqualAfterSerializeDeserialize(a, 0, SER_DISK);
+ }
+ AssertEqualAfterSerializeDeserialize(a, PROTOCOL_VERSION | ADDRV2_FORMAT);
+ AssertEqualAfterSerializeDeserialize(a, ADDRV2_FORMAT, SER_DISK);
})
FUZZ_TARGET_DESERIALIZE(inv_deserialize, {
CInv i;