diff options
author | Ava Chow <github@achow101.com> | 2024-08-12 15:03:35 -0400 |
---|---|---|
committer | Ava Chow <github@achow101.com> | 2024-08-12 15:03:35 -0400 |
commit | ba5fdd1a686fd0884db19d1bf841dca268228cbd (patch) | |
tree | f91ce6ff04babf41e5b77a003d3161c568d1362d /contrib | |
parent | 5c5a298f6da183ab2dda95e8318b3cbde11b172f (diff) | |
parent | 77ff0ec1f185b818b30877de2bedc1750319e6c4 (diff) |
Merge bitcoin/bitcoin#30607: contrib: support reading XORed blocks in linearize-data.py script
77ff0ec1f185b818b30877de2bedc1750319e6c4 contrib: support reading XORed blocks in linearize-data.py script (Sebastian Falbesoner)
Pull request description:
This PR is a small follow-up for #28052, adding support for the block linearization script to handle XORed blocksdir *.dat files. Note that if no xor.dat file exists, the XOR pattern is set to all-zeros, in order to still support blockdirs that have been created with versions earlier than 28.x.
Partly fixes issue #30599.
ACKs for top commit:
achow101:
ACK 77ff0ec1f185b818b30877de2bedc1750319e6c4
tdb3:
ACK 77ff0ec1f185b818b30877de2bedc1750319e6c4
hodlinator:
ACK 77ff0ec1f185b818b30877de2bedc1750319e6c4
Tree-SHA512: 011eb02e2411de373cbbf4b26db4640fc693a20be8c2430529fba6e36a3a3abfdfdc3b005d330f9ec2846bfad9bfbf34231c574ba99289ef37dd51a68e6e7f3d
Diffstat (limited to 'contrib')
-rwxr-xr-x | contrib/linearize/linearize-data.py | 28 |
1 files changed, 23 insertions, 5 deletions
diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index 24f6b29a26..74d9830705 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -76,6 +76,16 @@ def getFirstBlockFileId(block_dir_path): blkId = int(firstBlkFn[3:8]) return blkId +def read_xor_key(blocks_path): + NUM_XOR_BYTES = 8 # From InitBlocksdirXorKey::xor_key.size() + try: + xor_filename = os.path.join(blocks_path, "xor.dat") + with open(xor_filename, "rb") as xor_file: + return xor_file.read(NUM_XOR_BYTES) + # support also blockdirs created with pre-v28 versions, where no xor key exists yet + except FileNotFoundError: + return bytes([0] * NUM_XOR_BYTES) + # Block header and extent on disk BlockExtent = namedtuple('BlockExtent', ['fn', 'offset', 'inhdr', 'blkhdr', 'size']) @@ -95,6 +105,7 @@ class BlockDataCopier: self.outFname = None self.blkCountIn = 0 self.blkCountOut = 0 + self.xor_key = read_xor_key(self.settings['input']) self.lastDate = datetime.datetime(2000, 1, 1) self.highTS = 1408893517 - 315360000 @@ -113,6 +124,13 @@ class BlockDataCopier: self.outOfOrderData = {} self.outOfOrderSize = 0 # running total size for items in outOfOrderData + def read_xored(self, f, size): + offset = f.tell() + data = bytearray(f.read(size)) + for i in range(len(data)): + data[i] ^= self.xor_key[(i + offset) % len(self.xor_key)] + return bytes(data) + def writeBlock(self, inhdr, blk_hdr, rawblock): blockSizeOnDisk = len(inhdr) + len(blk_hdr) + len(rawblock) if not self.fileOutput and ((self.outsz + blockSizeOnDisk) > self.maxOutSz): @@ -165,7 +183,7 @@ class BlockDataCopier: '''Fetch block contents from disk given extents''' with open(self.inFileName(extent.fn), "rb") as f: f.seek(extent.offset) - return f.read(extent.size) + return self.read_xored(f, extent.size) def copyOneBlock(self): '''Find the next block to be written in the input, and copy it to the output.''' @@ -190,7 +208,7 @@ class BlockDataCopier: print("Premature end of block data") return - inhdr = self.inF.read(8) + inhdr = self.read_xored(self.inF, 8) if (not inhdr or (inhdr[0] == "\0")): self.inF.close() self.inF = None @@ -207,7 +225,7 @@ class BlockDataCopier: inLenLE = inhdr[4:] su = struct.unpack("<I", inLenLE) inLen = su[0] - 80 # length without header - blk_hdr = self.inF.read(80) + blk_hdr = self.read_xored(self.inF, 80) inExtent = BlockExtent(self.inFn, self.inF.tell(), inhdr, blk_hdr, inLen) self.hash_str = calc_hash_str(blk_hdr) @@ -224,7 +242,7 @@ class BlockDataCopier: if self.blkCountOut == blkHeight: # If in-order block, just copy - rawblock = self.inF.read(inLen) + rawblock = self.read_xored(self.inF, inLen) self.writeBlock(inhdr, blk_hdr, rawblock) # See if we can catch up to prior out-of-order blocks @@ -237,7 +255,7 @@ class BlockDataCopier: # If there is space in the cache, read the data # Reading the data in file sequence instead of seeking and fetching it later is preferred, # but we don't want to fill up memory - self.outOfOrderData[blkHeight] = self.inF.read(inLen) + self.outOfOrderData[blkHeight] = self.read_xored(self.inF, inLen) self.outOfOrderSize += inLen else: # If no space in cache, seek forward self.inF.seek(inLen, os.SEEK_CUR) |