aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPieter Wuille <pieter.wuille@gmail.com>2012-06-15 14:19:11 +0200
committerPieter Wuille <pieter.wuille@gmail.com>2012-10-20 23:08:56 +0200
commit4d6144f97faf9d2a6c89f41d7d2360f21f0b71e2 (patch)
tree02f4f8d5cb22c9ca152e1cb55f9d62943ee5458e
parent43b7905e98d827df45e970dbaf5f08561ecc6cda (diff)
Compact serialization for variable-length integers
Variable-length integers: bytes are a MSB base-128 encoding of the number. The high bit in each byte signifies whether another digit follows. To make the encoding is one-to-one, one is subtracted from all but the last digit. Thus, the byte sequence a[] with length len, where all but the last byte has bit 128 set, encodes the number: (a[len-1] & 0x7F) + sum(i=1..len-1, 128^i*((a[len-i-1] & 0x7F)+1)) Properties: * Very small (0-127: 1 byte, 128-16511: 2 bytes, 16512-2113663: 3 bytes) * Every integer has exactly one encoding * Encoding does not depend on size of original integer type
-rw-r--r--src/serialize.h94
-rw-r--r--src/test/serialize_tests.cpp45
2 files changed, 138 insertions, 1 deletions
diff --git a/src/serialize.h b/src/serialize.h
index 63df3160a6..549e46cbc3 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -238,9 +238,75 @@ uint64 ReadCompactSize(Stream& is)
return nSizeRet;
}
+// Variable-length integers: bytes are a MSB base-128 encoding of the number.
+// The high bit in each byte signifies whether another digit follows. To make
+// the encoding is one-to-one, one is subtracted from all but the last digit.
+// Thus, the byte sequence a[] with length len, where all but the last byte
+// has bit 128 set, encodes the number:
+//
+// (a[len-1] & 0x7F) + sum(i=1..len-1, 128^i*((a[len-i-1] & 0x7F)+1))
+//
+// Properties:
+// * Very small (0-127: 1 byte, 128-16511: 2 bytes, 16512-2113663: 3 bytes)
+// * Every integer has exactly one encoding
+// * Encoding does not depend on size of original integer type
+// * No redundancy: every (infinite) byte sequence corresponds to a list
+// of encoded integers.
+//
+// 0: [0x00] 256: [0x81 0x00]
+// 1: [0x01] 16383: [0xFE 0x7F]
+// 127: [0x7F] 16384: [0xFF 0x00]
+// 128: [0x80 0x00] 16511: [0x80 0xFF 0x7F]
+// 255: [0x80 0x7F] 65535: [0x82 0xFD 0x7F]
+// 2^32: [0x8E 0xFE 0xFE 0xFF 0x00]
+
+template<typename I>
+inline unsigned int GetSizeOfVarInt(I n)
+{
+ int nRet = 0;
+ while(true) {
+ nRet++;
+ if (n <= 0x7F)
+ break;
+ n = (n >> 7) - 1;
+ }
+ return nRet;
+}
+
+template<typename Stream, typename I>
+void WriteVarInt(Stream& os, I n)
+{
+ unsigned char tmp[(sizeof(n)*8+6)/7];
+ int len=0;
+ while(true) {
+ tmp[len] = (n & 0x7F) | (len ? 0x80 : 0x00);
+ if (n <= 0x7F)
+ break;
+ n = (n >> 7) - 1;
+ len++;
+ }
+ do {
+ WRITEDATA(os, tmp[len]);
+ } while(len--);
+}
+template<typename Stream, typename I>
+I ReadVarInt(Stream& is)
+{
+ I n = 0;
+ while(true) {
+ unsigned char chData;
+ READDATA(is, chData);
+ n = (n << 7) | (chData & 0x7F);
+ if (chData & 0x80)
+ n++;
+ else
+ return n;
+ }
+}
-#define FLATDATA(obj) REF(CFlatData((char*)&(obj), (char*)&(obj) + sizeof(obj)))
+#define FLATDATA(obj) REF(CFlatData((char*)&(obj), (char*)&(obj) + sizeof(obj)))
+#define VARINT(obj) REF(WrapVarInt(REF(obj)))
/** Wrapper for serializing arrays and POD.
*/
@@ -274,6 +340,32 @@ public:
}
};
+template<typename I>
+class CVarInt
+{
+protected:
+ I &n;
+public:
+ CVarInt(I& nIn) : n(nIn) { }
+
+ unsigned int GetSerializeSize(int, int) const {
+ return GetSizeOfVarInt<I>(n);
+ }
+
+ template<typename Stream>
+ void Serialize(Stream &s, int, int) const {
+ WriteVarInt<Stream,I>(s, n);
+ }
+
+ template<typename Stream>
+ void Unserialize(Stream& s, int, int) {
+ n = ReadVarInt<Stream,I>(s);
+ }
+};
+
+template<typename I>
+CVarInt<I> WrapVarInt(I& n) { return CVarInt<I>(n); }
+
//
// Forward declarations
//
diff --git a/src/test/serialize_tests.cpp b/src/test/serialize_tests.cpp
new file mode 100644
index 0000000000..90ac89f8c5
--- /dev/null
+++ b/src/test/serialize_tests.cpp
@@ -0,0 +1,45 @@
+#include <boost/test/unit_test.hpp>
+
+#include <string>
+#include <vector>
+
+#include "serialize.h"
+
+using namespace std;
+
+BOOST_AUTO_TEST_SUITE(serialize_tests)
+
+BOOST_AUTO_TEST_CASE(varints)
+{
+ // encode
+
+ CDataStream ss(SER_DISK, 0);
+ CDataStream::size_type size = 0;
+ for (int i = 0; i < 100000; i++) {
+ ss << VARINT(i);
+ size += ::GetSerializeSize(VARINT(i), 0, 0);
+ BOOST_CHECK(size == ss.size());
+ }
+
+ for (uint64 i = 0; i < 100000000000ULL; i += 999999937) {
+ ss << VARINT(i);
+ size += ::GetSerializeSize(VARINT(i), 0, 0);
+ BOOST_CHECK(size == ss.size());
+ }
+
+ // decode
+ for (int i = 0; i < 100000; i++) {
+ int j;
+ ss >> VARINT(j);
+ BOOST_CHECK_MESSAGE(i == j, "decoded:" << j << " expected:" << i);
+ }
+
+ for (uint64 i = 0; i < 100000000000ULL; i += 999999937) {
+ uint64 j;
+ ss >> VARINT(j);
+ BOOST_CHECK_MESSAGE(i == j, "decoded:" << j << " expected:" << i);
+ }
+
+}
+
+BOOST_AUTO_TEST_SUITE_END()