diff options
Diffstat (limited to 'contrib/linearize/linearize-data.py')
-rwxr-xr-x | contrib/linearize/linearize-data.py | 237 |
1 files changed, 237 insertions, 0 deletions
diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py new file mode 100755 index 0000000000..3b5d198c14 --- /dev/null +++ b/contrib/linearize/linearize-data.py @@ -0,0 +1,237 @@ +#!/usr/bin/python +# +# linearize-data.py: Construct a linear, no-fork version of the chain. +# +# Copyright (c) 2013 The Bitcoin developers +# Distributed under the MIT/X11 software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# + +import json +import struct +import re +import os +import base64 +import httplib +import sys +import hashlib +import datetime +import time + +settings = {} + + +def uint32(x): + return x & 0xffffffffL + +def bytereverse(x): + return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) | + (((x) >> 8) & 0x0000ff00) | ((x) >> 24) )) + +def bufreverse(in_buf): + out_words = [] + for i in range(0, len(in_buf), 4): + word = struct.unpack('@I', in_buf[i:i+4])[0] + out_words.append(struct.pack('@I', bytereverse(word))) + return ''.join(out_words) + +def wordreverse(in_buf): + out_words = [] + for i in range(0, len(in_buf), 4): + out_words.append(in_buf[i:i+4]) + out_words.reverse() + return ''.join(out_words) + +def calc_hdr_hash(blk_hdr): + hash1 = hashlib.sha256() + hash1.update(blk_hdr) + hash1_o = hash1.digest() + + hash2 = hashlib.sha256() + hash2.update(hash1_o) + hash2_o = hash2.digest() + + return hash2_o + +def calc_hash_str(blk_hdr): + hash = calc_hdr_hash(blk_hdr) + hash = bufreverse(hash) + hash = wordreverse(hash) + hash_str = hash.encode('hex') + return hash_str + +def get_blk_dt(blk_hdr): + members = struct.unpack("<I", blk_hdr[68:68+4]) + nTime = members[0] + dt = datetime.datetime.fromtimestamp(nTime) + dt_ym = datetime.datetime(dt.year, dt.month, 1) + return (dt_ym, nTime) + +def get_block_hashes(settings): + blkindex = [] + f = open(settings['hashlist'], "r") + for line in f: + line = line.rstrip() + blkindex.append(line) + + print("Read " + str(len(blkindex)) + " hashes") + + return blkindex + +def mkblockset(blkindex): + blkmap = {} + for hash in blkindex: + blkmap[hash] = True + return blkmap + +def copydata(settings, blkindex, blkset): + inFn = 0 + inF = None + outFn = 0 + outsz = 0 + outF = None + outFname = None + blkCount = 0 + + lastDate = datetime.datetime(2000, 1, 1) + highTS = 1408893517 - 315360000 + timestampSplit = False + fileOutput = True + setFileTime = False + maxOutSz = settings['max_out_sz'] + if 'output' in settings: + fileOutput = False + if settings['file_timestamp'] != 0: + setFileTime = True + if settings['split_timestamp'] != 0: + timestampSplit = True + + while True: + if not inF: + fname = "%s/blk%05d.dat" % (settings['input'], inFn) + print("Input file" + fname) + try: + inF = open(fname, "rb") + except IOError: + print "Done" + return + + inhdr = inF.read(8) + if (not inhdr or (inhdr[0] == "\0")): + inF.close() + inF = None + inFn = inFn + 1 + continue + + inMagic = inhdr[:4] + if (inMagic != settings['netmagic']): + print("Invalid magic:" + inMagic) + return + inLenLE = inhdr[4:] + su = struct.unpack("<I", inLenLE) + inLen = su[0] + rawblock = inF.read(inLen) + blk_hdr = rawblock[:80] + + hash_str = calc_hash_str(blk_hdr) + if not hash_str in blkset: + print("Skipping unknown block " + hash_str) + continue + + if blkindex[blkCount] != hash_str: + print("Out of order block.") + print("Expected " + blkindex[blkCount]) + print("Got " + hash_str) + sys.exit(1) + + if not fileOutput and ((outsz + inLen) > maxOutSz): + outF.close() + if setFileTime: + os.utime(outFname, (int(time.time()), highTS)) + outF = None + outFname = None + outFn = outFn + 1 + outsz = 0 + + (blkDate, blkTS) = get_blk_dt(blk_hdr) + if timestampSplit and (blkDate > lastDate): + print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str) + lastDate = blkDate + if outF: + outF.close() + if setFileTime: + os.utime(outFname, (int(time.time()), highTS)) + outF = None + outFname = None + outFn = outFn + 1 + outsz = 0 + + if not outF: + if fileOutput: + outFname = settings['output_file'] + else: + outFname = "%s/blk%05d.dat" % (settings['output'], outFn) + print("Output file" + outFname) + outF = open(outFname, "wb") + + outF.write(inhdr) + outF.write(rawblock) + outsz = outsz + inLen + 8 + + blkCount = blkCount + 1 + if blkTS > highTS: + highTS = blkTS + + if (blkCount % 1000) == 0: + print("Wrote " + str(blkCount) + " blocks") + +if __name__ == '__main__': + if len(sys.argv) != 2: + print "Usage: linearize-data.py CONFIG-FILE" + sys.exit(1) + + f = open(sys.argv[1]) + for line in f: + # skip comment lines + m = re.search('^\s*#', line) + if m: + continue + + # parse key=value lines + m = re.search('^(\w+)\s*=\s*(\S.*)$', line) + if m is None: + continue + settings[m.group(1)] = m.group(2) + f.close() + + if 'netmagic' not in settings: + settings['netmagic'] = 'f9beb4d9' + if 'input' not in settings: + settings['input'] = 'input' + if 'hashlist' not in settings: + settings['hashlist'] = 'hashlist.txt' + if 'file_timestamp' not in settings: + settings['file_timestamp'] = 0 + if 'split_timestamp' not in settings: + settings['split_timestamp'] = 0 + if 'max_out_sz' not in settings: + settings['max_out_sz'] = 1000L * 1000 * 1000 + + settings['max_out_sz'] = long(settings['max_out_sz']) + settings['split_timestamp'] = int(settings['split_timestamp']) + settings['file_timestamp'] = int(settings['file_timestamp']) + settings['netmagic'] = settings['netmagic'].decode('hex') + + if 'output_file' not in settings and 'output' not in settings: + print("Missing output file / directory") + sys.exit(1) + + blkindex = get_block_hashes(settings) + blkset = mkblockset(blkindex) + + if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkset: + print("not found") + else: + copydata(settings, blkindex, blkset) + + |