From 476eb7eb53f680494952865a823e5cf9459da2b9 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sat, 23 Aug 2014 21:46:12 -0400 Subject: Update linearize scripts. Break into two steps: * Generate hash list * Build data file(s) from local bitcoind blocks/ directory. This supports building one large bootstrap.dat, or multiple smaller blocks/blkNNNNN.dat files. --- contrib/linearize/README.md | 31 +++++- contrib/linearize/example-linearize.cfg | 12 ++- contrib/linearize/linearize-data.py | 182 ++++++++++++++++++++++++++++++++ contrib/linearize/linearize-hashes.py | 106 +++++++++++++++++++ contrib/linearize/linearize.py | 132 ----------------------- 5 files changed, 325 insertions(+), 138 deletions(-) create mode 100755 contrib/linearize/linearize-data.py create mode 100755 contrib/linearize/linearize-hashes.py delete mode 100644 contrib/linearize/linearize.py (limited to 'contrib/linearize') diff --git a/contrib/linearize/README.md b/contrib/linearize/README.md index 70b9f034cd..8d06d53b10 100644 --- a/contrib/linearize/README.md +++ b/contrib/linearize/README.md @@ -1,2 +1,29 @@ -### Linearize ### -Construct a linear, no-fork, best version of the blockchain. \ No newline at end of file +# Linearize +Construct a linear, no-fork, best version of the blockchain. + +## Step 1: Download hash list + + $ ./linearize-hashes.py linearize.cfg > hashlist.txt + +Required configuration file settings for linearize-hashes: +* RPC: rpcuser, rpcpassword + +Optional config file setting for linearize-hashes: +* RPC: host, port +* Block chain: min_height, max_height + +## Step 2: Copy local block data + + $ ./linearize-data.py linearize.cfg + +Required configuration file settings: +* "input": bitcoind blocks/ directory containing blkNNNNN.dat +* "hashlist": text file containing list of block hashes, linearized-hashes.py +output. +* "output_file": bootstrap.dat + or +* "output": output directory for linearized blocks/blkNNNNN.dat output + +Optional config file setting for linearize-data: +* "netmagic": network magic number + diff --git a/contrib/linearize/example-linearize.cfg b/contrib/linearize/example-linearize.cfg index f5cdab5325..9c3270d653 100644 --- a/contrib/linearize/example-linearize.cfg +++ b/contrib/linearize/example-linearize.cfg @@ -1,12 +1,16 @@ -# bitcoind RPC settings +# bitcoind RPC settings (linearize-hashes) rpcuser=someuser rpcpassword=somepassword host=127.0.0.1 port=8332 -# bootstrap.dat settings +# bootstrap.dat hashlist settings (linearize-hashes) +max_height=313000 + +# bootstrap.dat input/output settings (linearize-data) netmagic=f9beb4d9 -max_height=279000 -output=bootstrap.dat +input=/home/example/.bitcoin/blocks +output_file=/home/example/Downloads/bootstrap.dat +hashlist=hashlist.txt diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py new file mode 100755 index 0000000000..77bae6e3c6 --- /dev/null +++ b/contrib/linearize/linearize-data.py @@ -0,0 +1,182 @@ +#!/usr/bin/python +# +# linearize-data.py: Construct a linear, no-fork version of the chain. +# +# Copyright (c) 2013 The Bitcoin developers +# Distributed under the MIT/X11 software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# + +import json +import struct +import re +import base64 +import httplib +import sys +import hashlib + +MAX_OUT_SZ = 128 * 1024 * 1024 + +settings = {} + + +def uint32(x): + return x & 0xffffffffL + +def bytereverse(x): + return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) | + (((x) >> 8) & 0x0000ff00) | ((x) >> 24) )) + +def bufreverse(in_buf): + out_words = [] + for i in range(0, len(in_buf), 4): + word = struct.unpack('@I', in_buf[i:i+4])[0] + out_words.append(struct.pack('@I', bytereverse(word))) + return ''.join(out_words) + +def wordreverse(in_buf): + out_words = [] + for i in range(0, len(in_buf), 4): + out_words.append(in_buf[i:i+4]) + out_words.reverse() + return ''.join(out_words) + +def calc_hdr_hash(rawblock): + blk_hdr = rawblock[:80] + + hash1 = hashlib.sha256() + hash1.update(blk_hdr) + hash1_o = hash1.digest() + + hash2 = hashlib.sha256() + hash2.update(hash1_o) + hash2_o = hash2.digest() + + return hash2_o + +def calc_hash_str(rawblock): + hash = calc_hdr_hash(rawblock) + hash = bufreverse(hash) + hash = wordreverse(hash) + hash_str = hash.encode('hex') + return hash_str + +def get_block_hashes(settings): + blkindex = [] + f = open(settings['hashlist'], "r") + for line in f: + line = line.rstrip() + blkindex.append(line) + + print("Read " + str(len(blkindex)) + " hashes") + + return blkindex + +def mkblockset(blkindex): + blkmap = {} + for hash in blkindex: + blkmap[hash] = True + return blkmap + +def copydata(settings, blkindex, blkset): + inFn = 0 + inF = None + outFn = 0 + outsz = 0 + outF = None + blkCount = 0 + + fileOutput = True + if 'output' in settings: + fileOutput = False + + while True: + if not inF: + fname = "%s/blk%05d.dat" % (settings['input'], inFn) + print("Input file" + fname) + inF = open(fname, "rb") + + inhdr = inF.read(8) + if (not inhdr or (inhdr[0] == "\0")): + inF.close() + inF = None + inFn = inFn + 1 + continue + + inMagic = inhdr[:4] + if (inMagic != settings['netmagic']): + print("Invalid magic:" + inMagic) + return + inLenLE = inhdr[4:] + su = struct.unpack(" MAX_OUT_SZ): + outF.close() + outF = None + outFn = outFn + 1 + outsz = 0 + if not outF: + if fileOutput: + fname = settings['output_file'] + else: + fname = "%s/blk%05d.dat" % (settings['output'], outFn) + print("Output file" + fname) + outF = open(fname, "wb") + + outF.write(inhdr) + outF.write(rawblock) + outsz = outsz + inLen + 8 + + blkCount = blkCount + 1 + + if (blkCount % 1000) == 0: + print("Wrote " + str(blkCount) + " blocks") + +if __name__ == '__main__': + if len(sys.argv) != 2: + print "Usage: linearize-data.py CONFIG-FILE" + sys.exit(1) + + f = open(sys.argv[1]) + for line in f: + # skip comment lines + m = re.search('^\s*#', line) + if m: + continue + + # parse key=value lines + m = re.search('^(\w+)\s*=\s*(\S.*)$', line) + if m is None: + continue + settings[m.group(1)] = m.group(2) + f.close() + + if 'netmagic' not in settings: + settings['netmagic'] = 'f9beb4d9' + if 'input' not in settings: + settings['input'] = 'input' + if 'hashlist' not in settings: + settings['hashlist'] = 'hashlist.txt' + + settings['netmagic'] = settings['netmagic'].decode('hex') + + if 'output_file' not in settings and 'output' not in settings: + print("Missing output file / directory") + sys.exit(1) + + blkindex = get_block_hashes(settings) + blkset = mkblockset(blkindex) + + if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkset: + print("not found") + else: + copydata(settings, blkindex, blkset) + + diff --git a/contrib/linearize/linearize-hashes.py b/contrib/linearize/linearize-hashes.py new file mode 100755 index 0000000000..791b71bc33 --- /dev/null +++ b/contrib/linearize/linearize-hashes.py @@ -0,0 +1,106 @@ +#!/usr/bin/python +# +# linearize-hashes.py: List blocks in a linear, no-fork version of the chain. +# +# Copyright (c) 2013 The Bitcoin developers +# Distributed under the MIT/X11 software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# + +import json +import struct +import re +import base64 +import httplib +import sys + +settings = {} + +class BitcoinRPC: + OBJID = 1 + + def __init__(self, host, port, username, password): + authpair = "%s:%s" % (username, password) + self.authhdr = "Basic %s" % (base64.b64encode(authpair)) + self.conn = httplib.HTTPConnection(host, port, False, 30) + def rpc(self, method, params=None): + self.OBJID += 1 + obj = { 'version' : '1.1', + 'method' : method, + 'id' : self.OBJID } + if params is None: + obj['params'] = [] + else: + obj['params'] = params + self.conn.request('POST', '/', json.dumps(obj), + { 'Authorization' : self.authhdr, + 'Content-type' : 'application/json' }) + + resp = self.conn.getresponse() + if resp is None: + print "JSON-RPC: no response" + return None + + body = resp.read() + resp_obj = json.loads(body) + if resp_obj is None: + print "JSON-RPC: cannot JSON-decode body" + return None + if 'error' in resp_obj and resp_obj['error'] != None: + return resp_obj['error'] + if 'result' not in resp_obj: + print "JSON-RPC: no result in object" + return None + + return resp_obj['result'] + def getblock(self, hash, verbose=True): + return self.rpc('getblock', [hash, verbose]) + def getblockhash(self, index): + return self.rpc('getblockhash', [index]) + +def get_block_hashes(settings): + rpc = BitcoinRPC(settings['host'], settings['port'], + settings['rpcuser'], settings['rpcpassword']) + + for height in xrange(settings['min_height'], settings['max_height']+1): + hash = rpc.getblockhash(height) + + print(hash) + +if __name__ == '__main__': + if len(sys.argv) != 2: + print "Usage: linearize-hashes.py CONFIG-FILE" + sys.exit(1) + + f = open(sys.argv[1]) + for line in f: + # skip comment lines + m = re.search('^\s*#', line) + if m: + continue + + # parse key=value lines + m = re.search('^(\w+)\s*=\s*(\S.*)$', line) + if m is None: + continue + settings[m.group(1)] = m.group(2) + f.close() + + if 'host' not in settings: + settings['host'] = '127.0.0.1' + if 'port' not in settings: + settings['port'] = 8332 + if 'min_height' not in settings: + settings['min_height'] = 0 + if 'max_height' not in settings: + settings['max_height'] = 313000 + if 'rpcuser' not in settings or 'rpcpassword' not in settings: + print "Missing username and/or password in cfg file" + sys.exit(1) + + settings['port'] = int(settings['port']) + settings['min_height'] = int(settings['min_height']) + settings['max_height'] = int(settings['max_height']) + + get_block_hashes(settings) + diff --git a/contrib/linearize/linearize.py b/contrib/linearize/linearize.py deleted file mode 100644 index 650f7d3684..0000000000 --- a/contrib/linearize/linearize.py +++ /dev/null @@ -1,132 +0,0 @@ -#!/usr/bin/python -# -# linearize.py: Construct a linear, no-fork, best version of the blockchain. -# -# -# Copyright (c) 2013 The Bitcoin developers -# Distributed under the MIT/X11 software license, see the accompanying -# file COPYING or http://www.opensource.org/licenses/mit-license.php. -# - -import json -import struct -import re -import base64 -import httplib -import sys - -ERR_SLEEP = 15 -MAX_NONCE = 1000000L - -settings = {} - -class BitcoinRPC: - OBJID = 1 - - def __init__(self, host, port, username, password): - authpair = "%s:%s" % (username, password) - self.authhdr = "Basic %s" % (base64.b64encode(authpair)) - self.conn = httplib.HTTPConnection(host, port, False, 30) - def rpc(self, method, params=None): - self.OBJID += 1 - obj = { 'version' : '1.1', - 'method' : method, - 'id' : self.OBJID } - if params is None: - obj['params'] = [] - else: - obj['params'] = params - self.conn.request('POST', '/', json.dumps(obj), - { 'Authorization' : self.authhdr, - 'Content-type' : 'application/json' }) - - resp = self.conn.getresponse() - if resp is None: - print "JSON-RPC: no response" - return None - - body = resp.read() - resp_obj = json.loads(body) - if resp_obj is None: - print "JSON-RPC: cannot JSON-decode body" - return None - if 'error' in resp_obj and resp_obj['error'] != None: - return resp_obj['error'] - if 'result' not in resp_obj: - print "JSON-RPC: no result in object" - return None - - return resp_obj['result'] - def getblock(self, hash, verbose=True): - return self.rpc('getblock', [hash, verbose]) - def getblockhash(self, index): - return self.rpc('getblockhash', [index]) - -def getblock(rpc, settings, n): - hash = rpc.getblockhash(n) - hexdata = rpc.getblock(hash, False) - data = hexdata.decode('hex') - - return data - -def get_blocks(settings): - rpc = BitcoinRPC(settings['host'], settings['port'], - settings['rpcuser'], settings['rpcpassword']) - - outf = open(settings['output'], 'ab') - - for height in xrange(settings['min_height'], settings['max_height']+1): - data = getblock(rpc, settings, height) - - outhdr = settings['netmagic'] - outhdr += struct.pack(" Date: Sat, 23 Aug 2014 22:59:16 -0400 Subject: contrib/linearize: split output files based on new-timestamp-year or max-file-size --- contrib/linearize/README.md | 3 +++ contrib/linearize/example-linearize.cfg | 1 + contrib/linearize/linearize-data.py | 44 ++++++++++++++++++++++++++------- 3 files changed, 39 insertions(+), 9 deletions(-) (limited to 'contrib/linearize') diff --git a/contrib/linearize/README.md b/contrib/linearize/README.md index 8d06d53b10..b5c6e7824e 100644 --- a/contrib/linearize/README.md +++ b/contrib/linearize/README.md @@ -26,4 +26,7 @@ output. Optional config file setting for linearize-data: * "netmagic": network magic number +* "max_out_sz": maximum output file size (default 1000*1000*1000) +* "split_year": Split files when a new year is first seen, in addition to +reaching a maximum file size. diff --git a/contrib/linearize/example-linearize.cfg b/contrib/linearize/example-linearize.cfg index 9c3270d653..071345f23a 100644 --- a/contrib/linearize/example-linearize.cfg +++ b/contrib/linearize/example-linearize.cfg @@ -13,4 +13,5 @@ netmagic=f9beb4d9 input=/home/example/.bitcoin/blocks output_file=/home/example/Downloads/bootstrap.dat hashlist=hashlist.txt +split_year=1 diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py index 77bae6e3c6..ea94f25fae 100755 --- a/contrib/linearize/linearize-data.py +++ b/contrib/linearize/linearize-data.py @@ -14,8 +14,7 @@ import base64 import httplib import sys import hashlib - -MAX_OUT_SZ = 128 * 1024 * 1024 +import datetime settings = {} @@ -41,9 +40,7 @@ def wordreverse(in_buf): out_words.reverse() return ''.join(out_words) -def calc_hdr_hash(rawblock): - blk_hdr = rawblock[:80] - +def calc_hdr_hash(blk_hdr): hash1 = hashlib.sha256() hash1.update(blk_hdr) hash1_o = hash1.digest() @@ -54,13 +51,18 @@ def calc_hdr_hash(rawblock): return hash2_o -def calc_hash_str(rawblock): - hash = calc_hdr_hash(rawblock) +def calc_hash_str(blk_hdr): + hash = calc_hdr_hash(blk_hdr) hash = bufreverse(hash) hash = wordreverse(hash) hash_str = hash.encode('hex') return hash_str +def get_blk_year(blk_hdr): + members = struct.unpack(" MAX_OUT_SZ): + if not fileOutput and ((outsz + inLen) > maxOutSz): outF.close() outF = None outFn = outFn + 1 outsz = 0 + + if splitYear: + blkYear = get_blk_year(blk_hdr) + if blkYear > lastYear: + print("New year " + str(blkYear) + " @ " + hash_str) + lastYear = blkYear + if outF: + outF.close() + outF = None + outFn = outFn + 1 + outsz = 0 + if not outF: if fileOutput: fname = settings['output_file'] @@ -164,7 +184,13 @@ if __name__ == '__main__': settings['input'] = 'input' if 'hashlist' not in settings: settings['hashlist'] = 'hashlist.txt' + if 'split_year' not in settings: + settings['split_year'] = 0 + if 'max_out_sz' not in settings: + settings['max_out_sz'] = 1000L * 1000 * 1000 + settings['max_out_sz'] = long(settings['max_out_sz']) + settings['split_year'] = int(settings['split_year']) settings['netmagic'] = settings['netmagic'].decode('hex') if 'output_file' not in settings and 'output' not in settings: -- cgit v1.2.3