aboutsummaryrefslogtreecommitdiff
path: root/contrib/linearize
diff options
context:
space:
mode:
authorDoug <joroark@vt.edu>2016-12-20 19:25:24 -0800
committerDouglas Roark <doug@bloq.com>2017-01-05 00:46:29 -0800
commitd5aa19813c32eff02d4b65ac9a4131f3eed04825 (patch)
treea7c97ce84513d941ab73733cb3b3afd3a9da1bb0 /contrib/linearize
parent7dac1e5e9e887f5f6ff146e812a05bd3bf281eae (diff)
Allow linearization scripts to support hash byte reversal
Currently, the linearization scripts require input hashes to be in one endian form. Add support for byte reversal.
Diffstat (limited to 'contrib/linearize')
-rw-r--r--contrib/linearize/README.md40
-rw-r--r--contrib/linearize/example-linearize.cfg4
-rwxr-xr-xcontrib/linearize/linearize-data.py19
-rwxr-xr-xcontrib/linearize/linearize-hashes.py14
4 files changed, 58 insertions, 19 deletions
diff --git a/contrib/linearize/README.md b/contrib/linearize/README.md
index 06f278f3b3..07597481e0 100644
--- a/contrib/linearize/README.md
+++ b/contrib/linearize/README.md
@@ -1,33 +1,43 @@
# Linearize
-Construct a linear, no-fork, best version of the blockchain.
+Construct a linear, no-fork, best version of the Bitcoin blockchain.
## Step 1: Download hash list
$ ./linearize-hashes.py linearize.cfg > hashlist.txt
Required configuration file settings for linearize-hashes:
-* RPC: rpcuser, rpcpassword
+* RPC: `rpcuser`, `rpcpassword`
Optional config file setting for linearize-hashes:
-* RPC: host, port
-* Block chain: min_height, max_height
+* RPC: `host`, `port` (Default: `127.0.0.1:8332`)
+* Blockchain: `min_height`, `max_height`
+* `rev_hash_bytes`: If true, the written block hash list will be
+byte-reversed. (In other words, the hash returned by getblockhash will have its
+bytes reversed.) False by default. Intended for generation of
+standalone hash lists but safe to use with linearize-data.py, which will output
+the same data no matter which byte format is chosen.
## Step 2: Copy local block data
$ ./linearize-data.py linearize.cfg
Required configuration file settings:
-* "input": bitcoind blocks/ directory containing blkNNNNN.dat
-* "hashlist": text file containing list of block hashes, linearized-hashes.py
-output.
-* "output_file": bootstrap.dat
+* `output_file`: The file that will contain the final blockchain.
or
-* "output": output directory for linearized blocks/blkNNNNN.dat output
+* `output`: Output directory for linearized blocks/blkNNNNN.dat output.
Optional config file setting for linearize-data:
-* "netmagic": network magic number
-* "max_out_sz": maximum output file size (default `1000*1000*1000`)
-* "split_timestamp": Split files when a new month is first seen, in addition to
-reaching a maximum file size.
-* "file_timestamp": Set each file's last-modified time to that of the
-most recent block in that file.
+* `file_timestamp`: Set each file's last-modified time to that of the most
+recent block in that file.
+* `genesis`: The hash of the genesis block in the blockchain.
+* `input: bitcoind blocks/ directory containing blkNNNNN.dat
+* `hashlist`: text file containing list of block hashes created by
+linearize-hashes.py.
+* `max_out_sz`: Maximum size for files created by the `output_file` option.
+(Default: `1000*1000*1000 bytes`)
+* `netmagic`: Network magic number.
+* `rev_hash_bytes`: If true, the block hash list written by linearize-hashes.py
+will be byte-reversed when read by linearize-data.py. See the linearize-hashes
+entry for more information.
+* `split_timestamp`: Split blockchain files when a new month is first seen, in
+addition to reaching a maximum file size (`max_out_sz`).
diff --git a/contrib/linearize/example-linearize.cfg b/contrib/linearize/example-linearize.cfg
index 38da02e66c..cccdd79213 100644
--- a/contrib/linearize/example-linearize.cfg
+++ b/contrib/linearize/example-linearize.cfg
@@ -23,7 +23,9 @@ input=/home/example/.bitcoin/blocks
output_file=/home/example/Downloads/bootstrap.dat
hashlist=hashlist.txt
-split_year=1
# Maxmimum size in bytes of out-of-order blocks cache in memory
out_of_order_cache_sz = 100000000
+
+# Do we want the reverse the hash bytes coming from getblockhash?
+rev_hash_bytes = False
diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py
index d0417748fc..adea35dbd5 100755
--- a/contrib/linearize/linearize-data.py
+++ b/contrib/linearize/linearize-data.py
@@ -23,6 +23,12 @@ from collections import namedtuple
settings = {}
+##### Switch endian-ness #####
+def hex_switchEndian(s):
+ """ Switches the endianness of a hex string (in pairs of hex chars) """
+ pairList = [s[i]+s[i+1] for i in range(0,len(s),2)]
+ return ''.join(pairList[::-1])
+
def uint32(x):
return x & 0xffffffffL
@@ -69,17 +75,21 @@ def get_blk_dt(blk_hdr):
dt_ym = datetime.datetime(dt.year, dt.month, 1)
return (dt_ym, nTime)
+# When getting the list of block hashes, undo any byte reversals.
def get_block_hashes(settings):
blkindex = []
f = open(settings['hashlist'], "r")
for line in f:
line = line.rstrip()
+ if settings['rev_hash_bytes'] == 'true':
+ line = hex_switchEndian(line)
blkindex.append(line)
print("Read " + str(len(blkindex)) + " hashes")
return blkindex
+# The block map shouldn't give or receive byte-reversed hashes.
def mkblockmap(blkindex):
blkmap = {}
for height,hash in enumerate(blkindex):
@@ -265,6 +275,12 @@ if __name__ == '__main__':
settings[m.group(1)] = m.group(2)
f.close()
+ # Force hash byte format setting to be lowercase to make comparisons easier.
+ # Also place upfront in case any settings need to know about it.
+ if 'rev_hash_bytes' not in settings:
+ settings['rev_hash_bytes'] = 'false'
+ settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower()
+
if 'netmagic' not in settings:
settings['netmagic'] = 'f9beb4d9'
if 'genesis' not in settings:
@@ -295,9 +311,8 @@ if __name__ == '__main__':
blkindex = get_block_hashes(settings)
blkmap = mkblockmap(blkindex)
+ # Block hash map won't be byte-reversed. Neither should the genesis hash.
if not settings['genesis'] in blkmap:
print("Genesis block not found in hashlist")
else:
BlockDataCopier(settings, blkindex, blkmap).run()
-
-
diff --git a/contrib/linearize/linearize-hashes.py b/contrib/linearize/linearize-hashes.py
index ba8d6f8a04..9efdf13255 100755
--- a/contrib/linearize/linearize-hashes.py
+++ b/contrib/linearize/linearize-hashes.py
@@ -17,6 +17,12 @@ import sys
settings = {}
+##### Switch endian-ness #####
+def hex_switchEndian(s):
+ """ Switches the endianness of a hex string (in pairs of hex chars) """
+ pairList = [s[i]+s[i+1] for i in range(0,len(s),2)]
+ return ''.join(pairList[::-1])
+
class BitcoinRPC:
def __init__(self, host, port, username, password):
authpair = "%s:%s" % (username, password)
@@ -70,6 +76,8 @@ def get_block_hashes(settings, max_blocks_per_call=10000):
print('JSON-RPC: error at height', height+x, ': ', resp_obj['error'], file=sys.stderr)
exit(1)
assert(resp_obj['id'] == x) # assume replies are in-sequence
+ if settings['rev_hash_bytes'] == 'true':
+ resp_obj['result'] = hex_switchEndian(resp_obj['result'])
print(resp_obj['result'])
height += num_blocks
@@ -101,6 +109,8 @@ if __name__ == '__main__':
settings['min_height'] = 0
if 'max_height' not in settings:
settings['max_height'] = 313000
+ if 'rev_hash_bytes' not in settings:
+ settings['rev_hash_bytes'] = 'false'
if 'rpcuser' not in settings or 'rpcpassword' not in settings:
print("Missing username and/or password in cfg file", file=stderr)
sys.exit(1)
@@ -109,5 +119,7 @@ if __name__ == '__main__':
settings['min_height'] = int(settings['min_height'])
settings['max_height'] = int(settings['max_height'])
- get_block_hashes(settings)
+ # Force hash byte format setting to be lowercase to make comparisons easier.
+ settings['rev_hash_bytes'] = settings['rev_hash_bytes'].lower()
+ get_block_hashes(settings)