diff options
Diffstat (limited to 'contrib')
-rwxr-xr-x | contrib/devtools/update-translations.py | 148 | ||||
-rw-r--r-- | contrib/linearize/README.md | 35 | ||||
-rw-r--r-- | contrib/linearize/example-linearize.cfg | 13 | ||||
-rwxr-xr-x | contrib/linearize/linearize-data.py | 233 | ||||
-rwxr-xr-x[-rw-r--r--] | contrib/linearize/linearize-hashes.py (renamed from contrib/linearize/linearize.py) | 40 |
5 files changed, 416 insertions, 53 deletions
diff --git a/contrib/devtools/update-translations.py b/contrib/devtools/update-translations.py index 1950a42678..0be632069a 100755 --- a/contrib/devtools/update-translations.py +++ b/contrib/devtools/update-translations.py @@ -14,13 +14,14 @@ It will do the following automatically: TODO: - auto-add new translations to the build system according to the translation process -- remove 'unfinished' translation items ''' from __future__ import division, print_function import subprocess import re import sys import os +import io +import xml.etree.ElementTree as ET # Name of transifex tool TX = 'tx' @@ -40,24 +41,143 @@ def fetch_all_translations(): print('Error while fetching translations', file=sys.stderr) exit(1) -def postprocess_translations(): - print('Postprocessing...') +def find_format_specifiers(s): + '''Find all format specifiers in a string.''' + pos = 0 + specifiers = [] + while True: + percent = s.find('%', pos) + if percent < 0: + break + specifiers.append(s[percent+1]) + pos = percent+2 + return specifiers + +def split_format_specifiers(specifiers): + '''Split format specifiers between numeric (Qt) and others (strprintf)''' + numeric = [] + other = [] + for s in specifiers: + if s in {'1','2','3','4','5','6','7','8','9'}: + numeric.append(s) + else: + other.append(s) + + # numeric (Qt) can be present in any order, others (strprintf) must be in specified order + return set(numeric),other + +def sanitize_string(s): + '''Sanitize string for printing''' + return s.replace('\n',' ') + +def check_format_specifiers(source, translation, errors): + source_f = split_format_specifiers(find_format_specifiers(source)) + # assert that no source messages contain both Qt and strprintf format specifiers + # if this fails, go change the source as this is hacky and confusing! + assert(not(source_f[0] and source_f[1])) + try: + translation_f = split_format_specifiers(find_format_specifiers(translation)) + except IndexError: + errors.append("Parse error in translation '%s'" % sanitize_string(translation)) + return False + else: + if source_f != translation_f: + errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation))) + return False + return True + +def all_ts_files(suffix=''): for filename in os.listdir(LOCALE_DIR): # process only language files, and do not process source language - if not filename.endswith('.ts') or filename == SOURCE_LANG: + if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix: continue + if suffix: # remove provided suffix + filename = filename[0:-len(suffix)] filepath = os.path.join(LOCALE_DIR, filename) - with open(filepath, 'rb') as f: + yield(filename, filepath) + +FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]') +def remove_invalid_characters(s): + '''Remove invalid characters from translation string''' + return FIX_RE.sub(b'', s) + +# Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for +# comparison, disable by default) +_orig_escape_cdata = None +def escape_cdata(text): + text = _orig_escape_cdata(text) + text = text.replace("'", ''') + text = text.replace('"', '"') + return text + +def postprocess_translations(reduce_diff_hacks=False): + print('Checking and postprocessing...') + + if reduce_diff_hacks: + global _orig_escape_cdata + _orig_escape_cdata = ET._escape_cdata + ET._escape_cdata = escape_cdata + + for (filename,filepath) in all_ts_files(): + os.rename(filepath, filepath+'.orig') + + have_errors = False + for (filename,filepath) in all_ts_files('.orig'): + # pre-fixups to cope with transifex output + parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8' + with open(filepath + '.orig', 'rb') as f: data = f.read() - # remove non-allowed control characters - data = re.sub('[\x00-\x09\x0b\x0c\x0e-\x1f]', '', data) - data = data.split('\n') - # strip locations from non-origin translation - # location tags are used to guide translators, they are not necessary for compilation - # TODO: actually process XML instead of relying on Transifex's one-tag-per-line output format - data = [line for line in data if not '<location' in line] - with open(filepath, 'wb') as f: - f.write('\n'.join(data)) + # remove control characters; this must be done over the entire file otherwise the XML parser will fail + data = remove_invalid_characters(data) + tree = ET.parse(io.BytesIO(data), parser=parser) + + # iterate over all messages in file + root = tree.getroot() + for context in root.findall('context'): + for message in context.findall('message'): + numerus = message.get('numerus') == 'yes' + source = message.find('source').text + translation_node = message.find('translation') + # pick all numerusforms + if numerus: + translations = [i.text for i in translation_node.findall('numerusform')] + else: + translations = [translation_node.text] + + for translation in translations: + if translation is None: + continue + errors = [] + valid = check_format_specifiers(source, translation, errors) + + for error in errors: + print('%s: %s' % (filename, error)) + + if not valid: # set type to unfinished and clear string if invalid + translation_node.clear() + translation_node.set('type', 'unfinished') + have_errors = True + + # Remove location tags + for location in message.findall('location'): + message.remove(location) + + # Remove entire message if it is an unfinished translation + if translation_node.get('type') == 'unfinished': + context.remove(message) + + # write fixed-up tree + # if diff reduction requested, replace some XML to 'sanitize' to qt formatting + if reduce_diff_hacks: + out = io.BytesIO() + tree.write(out, encoding='utf-8') + out = out.getvalue() + out = out.replace(b' />', b'/>') + with open(filepath, 'wb') as f: + f.write(out) + else: + tree.write(filepath, encoding='utf-8') + return have_errors if __name__ == '__main__': check_at_repository_root() diff --git a/contrib/linearize/README.md b/contrib/linearize/README.md index 70b9f034cd..157586e4d4 100644 --- a/contrib/linearize/README.md +++ b/contrib/linearize/README.md @@ -1,2 +1,33 @@ -### Linearize ### -Construct a linear, no-fork, best version of the blockchain.
\ No newline at end of file +# Linearize +Construct a linear, no-fork, best version of the blockchain. + +## Step 1: Download hash list + + $ ./linearize-hashes.py linearize.cfg > hashlist.txt + +Required configuration file settings for linearize-hashes: +* RPC: rpcuser, rpcpassword + +Optional config file setting for linearize-hashes: +* RPC: host, port +* Block chain: min_height, max_height + +## Step 2: Copy local block data + + $ ./linearize-data.py linearize.cfg + +Required configuration file settings: +* "input": bitcoind blocks/ directory containing blkNNNNN.dat +* "hashlist": text file containing list of block hashes, linearized-hashes.py +output. +* "output_file": bootstrap.dat + or +* "output": output directory for linearized blocks/blkNNNNN.dat output + +Optional config file setting for linearize-data: +* "netmagic": network magic number +* "max_out_sz": maximum output file size (default 1000*1000*1000) +* "split_timestamp": Split files when a new month is first seen, in addition to +reaching a maximum file size. +* "file_timestamp": Set each file's last-modified time to that of the +most recent block in that file. diff --git a/contrib/linearize/example-linearize.cfg b/contrib/linearize/example-linearize.cfg index f5cdab5325..071345f23a 100644 --- a/contrib/linearize/example-linearize.cfg +++ b/contrib/linearize/example-linearize.cfg @@ -1,12 +1,17 @@ -# bitcoind RPC settings +# bitcoind RPC settings (linearize-hashes) rpcuser=someuser rpcpassword=somepassword host=127.0.0.1 port=8332 -# bootstrap.dat settings +# bootstrap.dat hashlist settings (linearize-hashes) +max_height=313000 + +# bootstrap.dat input/output settings (linearize-data) netmagic=f9beb4d9 -max_height=279000 -output=bootstrap.dat +input=/home/example/.bitcoin/blocks +output_file=/home/example/Downloads/bootstrap.dat +hashlist=hashlist.txt +split_year=1 diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py new file mode 100755 index 0000000000..383bb38198 --- /dev/null +++ b/contrib/linearize/linearize-data.py @@ -0,0 +1,233 @@ +#!/usr/bin/python +# +# linearize-data.py: Construct a linear, no-fork version of the chain. +# +# Copyright (c) 2013 The Bitcoin developers +# Distributed under the MIT/X11 software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# + +import json +import struct +import re +import os +import base64 +import httplib +import sys +import hashlib +import datetime +import time + +settings = {} + + +def uint32(x): + return x & 0xffffffffL + +def bytereverse(x): + return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) | + (((x) >> 8) & 0x0000ff00) | ((x) >> 24) )) + +def bufreverse(in_buf): + out_words = [] + for i in range(0, len(in_buf), 4): + word = struct.unpack('@I', in_buf[i:i+4])[0] + out_words.append(struct.pack('@I', bytereverse(word))) + return ''.join(out_words) + +def wordreverse(in_buf): + out_words = [] + for i in range(0, len(in_buf), 4): + out_words.append(in_buf[i:i+4]) + out_words.reverse() + return ''.join(out_words) + +def calc_hdr_hash(blk_hdr): + hash1 = hashlib.sha256() + hash1.update(blk_hdr) + hash1_o = hash1.digest() + + hash2 = hashlib.sha256() + hash2.update(hash1_o) + hash2_o = hash2.digest() + + return hash2_o + +def calc_hash_str(blk_hdr): + hash = calc_hdr_hash(blk_hdr) + hash = bufreverse(hash) + hash = wordreverse(hash) + hash_str = hash.encode('hex') + return hash_str + +def get_blk_dt(blk_hdr): + members = struct.unpack("<I", blk_hdr[68:68+4]) + nTime = members[0] + dt = datetime.datetime.fromtimestamp(nTime) + dt_ym = datetime.datetime(dt.year, dt.month, 1) + return (dt_ym, nTime) + +def get_block_hashes(settings): + blkindex = [] + f = open(settings['hashlist'], "r") + for line in f: + line = line.rstrip() + blkindex.append(line) + + print("Read " + str(len(blkindex)) + " hashes") + + return blkindex + +def mkblockset(blkindex): + blkmap = {} + for hash in blkindex: + blkmap[hash] = True + return blkmap + +def copydata(settings, blkindex, blkset): + inFn = 0 + inF = None + outFn = 0 + outsz = 0 + outF = None + outFname = None + blkCount = 0 + + lastDate = datetime.datetime(2000, 1, 1) + highTS = 1408893517 - 315360000 + timestampSplit = False + fileOutput = True + setFileTime = False + maxOutSz = settings['max_out_sz'] + if 'output' in settings: + fileOutput = False + if settings['file_timestamp'] != 0: + setFileTime = True + if settings['split_timestamp'] != 0: + timestampSplit = True + + while True: + if not inF: + fname = "%s/blk%05d.dat" % (settings['input'], inFn) + print("Input file" + fname) + inF = open(fname, "rb") + + inhdr = inF.read(8) + if (not inhdr or (inhdr[0] == "\0")): + inF.close() + inF = None + inFn = inFn + 1 + continue + + inMagic = inhdr[:4] + if (inMagic != settings['netmagic']): + print("Invalid magic:" + inMagic) + return + inLenLE = inhdr[4:] + su = struct.unpack("<I", inLenLE) + inLen = su[0] + rawblock = inF.read(inLen) + blk_hdr = rawblock[:80] + + hash_str = calc_hash_str(blk_hdr) + if not hash_str in blkset: + print("Skipping unknown block " + hash_str) + continue + + if blkindex[blkCount] != hash_str: + print("Out of order block.") + print("Expected " + blkindex[blkCount]) + print("Got " + hash_str) + sys.exit(1) + + if not fileOutput and ((outsz + inLen) > maxOutSz): + outF.close() + if setFileTime: + os.utime(outFname, (int(time.time()), highTS)) + outF = None + outFname = None + outFn = outFn + 1 + outsz = 0 + + (blkDate, blkTS) = get_blk_dt(blk_hdr) + if timestampSplit and (blkDate > lastDate): + print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str) + lastDate = blkDate + if outF: + outF.close() + if setFileTime: + os.utime(outFname, (int(time.time()), highTS)) + outF = None + outFname = None + outFn = outFn + 1 + outsz = 0 + + if not outF: + if fileOutput: + outFname = settings['output_file'] + else: + outFname = "%s/blk%05d.dat" % (settings['output'], outFn) + print("Output file" + outFname) + outF = open(outFname, "wb") + + outF.write(inhdr) + outF.write(rawblock) + outsz = outsz + inLen + 8 + + blkCount = blkCount + 1 + if blkTS > highTS: + highTS = blkTS + + if (blkCount % 1000) == 0: + print("Wrote " + str(blkCount) + " blocks") + +if __name__ == '__main__': + if len(sys.argv) != 2: + print "Usage: linearize-data.py CONFIG-FILE" + sys.exit(1) + + f = open(sys.argv[1]) + for line in f: + # skip comment lines + m = re.search('^\s*#', line) + if m: + continue + + # parse key=value lines + m = re.search('^(\w+)\s*=\s*(\S.*)$', line) + if m is None: + continue + settings[m.group(1)] = m.group(2) + f.close() + + if 'netmagic' not in settings: + settings['netmagic'] = 'f9beb4d9' + if 'input' not in settings: + settings['input'] = 'input' + if 'hashlist' not in settings: + settings['hashlist'] = 'hashlist.txt' + if 'file_timestamp' not in settings: + settings['file_timestamp'] = 0 + if 'split_timestamp' not in settings: + settings['split_timestamp'] = 0 + if 'max_out_sz' not in settings: + settings['max_out_sz'] = 1000L * 1000 * 1000 + + settings['max_out_sz'] = long(settings['max_out_sz']) + settings['split_timestamp'] = int(settings['split_timestamp']) + settings['file_timestamp'] = int(settings['file_timestamp']) + settings['netmagic'] = settings['netmagic'].decode('hex') + + if 'output_file' not in settings and 'output' not in settings: + print("Missing output file / directory") + sys.exit(1) + + blkindex = get_block_hashes(settings) + blkset = mkblockset(blkindex) + + if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkset: + print("not found") + else: + copydata(settings, blkindex, blkset) + + diff --git a/contrib/linearize/linearize.py b/contrib/linearize/linearize-hashes.py index 650f7d3684..791b71bc33 100644..100755 --- a/contrib/linearize/linearize.py +++ b/contrib/linearize/linearize-hashes.py @@ -1,7 +1,6 @@ #!/usr/bin/python # -# linearize.py: Construct a linear, no-fork, best version of the blockchain. -# +# linearize-hashes.py: List blocks in a linear, no-fork version of the chain. # # Copyright (c) 2013 The Bitcoin developers # Distributed under the MIT/X11 software license, see the accompanying @@ -15,9 +14,6 @@ import base64 import httplib import sys -ERR_SLEEP = 15 -MAX_NONCE = 1000000L - settings = {} class BitcoinRPC: @@ -62,34 +58,18 @@ class BitcoinRPC: def getblockhash(self, index): return self.rpc('getblockhash', [index]) -def getblock(rpc, settings, n): - hash = rpc.getblockhash(n) - hexdata = rpc.getblock(hash, False) - data = hexdata.decode('hex') - - return data - -def get_blocks(settings): +def get_block_hashes(settings): rpc = BitcoinRPC(settings['host'], settings['port'], settings['rpcuser'], settings['rpcpassword']) - outf = open(settings['output'], 'ab') - for height in xrange(settings['min_height'], settings['max_height']+1): - data = getblock(rpc, settings, height) - - outhdr = settings['netmagic'] - outhdr += struct.pack("<i", len(data)) + hash = rpc.getblockhash(height) - outf.write(outhdr) - outf.write(data) - - if (height % 1000) == 0: - sys.stdout.write("Wrote block " + str(height) + "\n") + print(hash) if __name__ == '__main__': if len(sys.argv) != 2: - print "Usage: linearize.py CONFIG-FILE" + print "Usage: linearize-hashes.py CONFIG-FILE" sys.exit(1) f = open(sys.argv[1]) @@ -106,10 +86,6 @@ if __name__ == '__main__': settings[m.group(1)] = m.group(2) f.close() - if 'netmagic' not in settings: - settings['netmagic'] = 'f9beb4d9' - if 'output' not in settings: - settings['output'] = 'bootstrap.dat' if 'host' not in settings: settings['host'] = '127.0.0.1' if 'port' not in settings: @@ -117,16 +93,14 @@ if __name__ == '__main__': if 'min_height' not in settings: settings['min_height'] = 0 if 'max_height' not in settings: - settings['max_height'] = 279000 + settings['max_height'] = 313000 if 'rpcuser' not in settings or 'rpcpassword' not in settings: print "Missing username and/or password in cfg file" sys.exit(1) - settings['netmagic'] = settings['netmagic'].decode('hex') settings['port'] = int(settings['port']) settings['min_height'] = int(settings['min_height']) settings['max_height'] = int(settings['max_height']) - get_blocks(settings) - + get_block_hashes(settings) |