aboutsummaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib')
-rwxr-xr-xcontrib/devtools/update-translations.py148
-rw-r--r--contrib/linearize/README.md35
-rw-r--r--contrib/linearize/example-linearize.cfg13
-rwxr-xr-xcontrib/linearize/linearize-data.py233
-rwxr-xr-x[-rw-r--r--]contrib/linearize/linearize-hashes.py (renamed from contrib/linearize/linearize.py)40
5 files changed, 416 insertions, 53 deletions
diff --git a/contrib/devtools/update-translations.py b/contrib/devtools/update-translations.py
index 1950a42678..0be632069a 100755
--- a/contrib/devtools/update-translations.py
+++ b/contrib/devtools/update-translations.py
@@ -14,13 +14,14 @@ It will do the following automatically:
TODO:
- auto-add new translations to the build system according to the translation process
-- remove 'unfinished' translation items
'''
from __future__ import division, print_function
import subprocess
import re
import sys
import os
+import io
+import xml.etree.ElementTree as ET
# Name of transifex tool
TX = 'tx'
@@ -40,24 +41,143 @@ def fetch_all_translations():
print('Error while fetching translations', file=sys.stderr)
exit(1)
-def postprocess_translations():
- print('Postprocessing...')
+def find_format_specifiers(s):
+ '''Find all format specifiers in a string.'''
+ pos = 0
+ specifiers = []
+ while True:
+ percent = s.find('%', pos)
+ if percent < 0:
+ break
+ specifiers.append(s[percent+1])
+ pos = percent+2
+ return specifiers
+
+def split_format_specifiers(specifiers):
+ '''Split format specifiers between numeric (Qt) and others (strprintf)'''
+ numeric = []
+ other = []
+ for s in specifiers:
+ if s in {'1','2','3','4','5','6','7','8','9'}:
+ numeric.append(s)
+ else:
+ other.append(s)
+
+ # numeric (Qt) can be present in any order, others (strprintf) must be in specified order
+ return set(numeric),other
+
+def sanitize_string(s):
+ '''Sanitize string for printing'''
+ return s.replace('\n',' ')
+
+def check_format_specifiers(source, translation, errors):
+ source_f = split_format_specifiers(find_format_specifiers(source))
+ # assert that no source messages contain both Qt and strprintf format specifiers
+ # if this fails, go change the source as this is hacky and confusing!
+ assert(not(source_f[0] and source_f[1]))
+ try:
+ translation_f = split_format_specifiers(find_format_specifiers(translation))
+ except IndexError:
+ errors.append("Parse error in translation '%s'" % sanitize_string(translation))
+ return False
+ else:
+ if source_f != translation_f:
+ errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation)))
+ return False
+ return True
+
+def all_ts_files(suffix=''):
for filename in os.listdir(LOCALE_DIR):
# process only language files, and do not process source language
- if not filename.endswith('.ts') or filename == SOURCE_LANG:
+ if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix:
continue
+ if suffix: # remove provided suffix
+ filename = filename[0:-len(suffix)]
filepath = os.path.join(LOCALE_DIR, filename)
- with open(filepath, 'rb') as f:
+ yield(filename, filepath)
+
+FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]')
+def remove_invalid_characters(s):
+ '''Remove invalid characters from translation string'''
+ return FIX_RE.sub(b'', s)
+
+# Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for
+# comparison, disable by default)
+_orig_escape_cdata = None
+def escape_cdata(text):
+ text = _orig_escape_cdata(text)
+ text = text.replace("'", '&apos;')
+ text = text.replace('"', '&quot;')
+ return text
+
+def postprocess_translations(reduce_diff_hacks=False):
+ print('Checking and postprocessing...')
+
+ if reduce_diff_hacks:
+ global _orig_escape_cdata
+ _orig_escape_cdata = ET._escape_cdata
+ ET._escape_cdata = escape_cdata
+
+ for (filename,filepath) in all_ts_files():
+ os.rename(filepath, filepath+'.orig')
+
+ have_errors = False
+ for (filename,filepath) in all_ts_files('.orig'):
+ # pre-fixups to cope with transifex output
+ parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8'
+ with open(filepath + '.orig', 'rb') as f:
data = f.read()
- # remove non-allowed control characters
- data = re.sub('[\x00-\x09\x0b\x0c\x0e-\x1f]', '', data)
- data = data.split('\n')
- # strip locations from non-origin translation
- # location tags are used to guide translators, they are not necessary for compilation
- # TODO: actually process XML instead of relying on Transifex's one-tag-per-line output format
- data = [line for line in data if not '<location' in line]
- with open(filepath, 'wb') as f:
- f.write('\n'.join(data))
+ # remove control characters; this must be done over the entire file otherwise the XML parser will fail
+ data = remove_invalid_characters(data)
+ tree = ET.parse(io.BytesIO(data), parser=parser)
+
+ # iterate over all messages in file
+ root = tree.getroot()
+ for context in root.findall('context'):
+ for message in context.findall('message'):
+ numerus = message.get('numerus') == 'yes'
+ source = message.find('source').text
+ translation_node = message.find('translation')
+ # pick all numerusforms
+ if numerus:
+ translations = [i.text for i in translation_node.findall('numerusform')]
+ else:
+ translations = [translation_node.text]
+
+ for translation in translations:
+ if translation is None:
+ continue
+ errors = []
+ valid = check_format_specifiers(source, translation, errors)
+
+ for error in errors:
+ print('%s: %s' % (filename, error))
+
+ if not valid: # set type to unfinished and clear string if invalid
+ translation_node.clear()
+ translation_node.set('type', 'unfinished')
+ have_errors = True
+
+ # Remove location tags
+ for location in message.findall('location'):
+ message.remove(location)
+
+ # Remove entire message if it is an unfinished translation
+ if translation_node.get('type') == 'unfinished':
+ context.remove(message)
+
+ # write fixed-up tree
+ # if diff reduction requested, replace some XML to 'sanitize' to qt formatting
+ if reduce_diff_hacks:
+ out = io.BytesIO()
+ tree.write(out, encoding='utf-8')
+ out = out.getvalue()
+ out = out.replace(b' />', b'/>')
+ with open(filepath, 'wb') as f:
+ f.write(out)
+ else:
+ tree.write(filepath, encoding='utf-8')
+ return have_errors
if __name__ == '__main__':
check_at_repository_root()
diff --git a/contrib/linearize/README.md b/contrib/linearize/README.md
index 70b9f034cd..157586e4d4 100644
--- a/contrib/linearize/README.md
+++ b/contrib/linearize/README.md
@@ -1,2 +1,33 @@
-### Linearize ###
-Construct a linear, no-fork, best version of the blockchain. \ No newline at end of file
+# Linearize
+Construct a linear, no-fork, best version of the blockchain.
+
+## Step 1: Download hash list
+
+ $ ./linearize-hashes.py linearize.cfg > hashlist.txt
+
+Required configuration file settings for linearize-hashes:
+* RPC: rpcuser, rpcpassword
+
+Optional config file setting for linearize-hashes:
+* RPC: host, port
+* Block chain: min_height, max_height
+
+## Step 2: Copy local block data
+
+ $ ./linearize-data.py linearize.cfg
+
+Required configuration file settings:
+* "input": bitcoind blocks/ directory containing blkNNNNN.dat
+* "hashlist": text file containing list of block hashes, linearized-hashes.py
+output.
+* "output_file": bootstrap.dat
+ or
+* "output": output directory for linearized blocks/blkNNNNN.dat output
+
+Optional config file setting for linearize-data:
+* "netmagic": network magic number
+* "max_out_sz": maximum output file size (default 1000*1000*1000)
+* "split_timestamp": Split files when a new month is first seen, in addition to
+reaching a maximum file size.
+* "file_timestamp": Set each file's last-modified time to that of the
+most recent block in that file.
diff --git a/contrib/linearize/example-linearize.cfg b/contrib/linearize/example-linearize.cfg
index f5cdab5325..071345f23a 100644
--- a/contrib/linearize/example-linearize.cfg
+++ b/contrib/linearize/example-linearize.cfg
@@ -1,12 +1,17 @@
-# bitcoind RPC settings
+# bitcoind RPC settings (linearize-hashes)
rpcuser=someuser
rpcpassword=somepassword
host=127.0.0.1
port=8332
-# bootstrap.dat settings
+# bootstrap.dat hashlist settings (linearize-hashes)
+max_height=313000
+
+# bootstrap.dat input/output settings (linearize-data)
netmagic=f9beb4d9
-max_height=279000
-output=bootstrap.dat
+input=/home/example/.bitcoin/blocks
+output_file=/home/example/Downloads/bootstrap.dat
+hashlist=hashlist.txt
+split_year=1
diff --git a/contrib/linearize/linearize-data.py b/contrib/linearize/linearize-data.py
new file mode 100755
index 0000000000..383bb38198
--- /dev/null
+++ b/contrib/linearize/linearize-data.py
@@ -0,0 +1,233 @@
+#!/usr/bin/python
+#
+# linearize-data.py: Construct a linear, no-fork version of the chain.
+#
+# Copyright (c) 2013 The Bitcoin developers
+# Distributed under the MIT/X11 software license, see the accompanying
+# file COPYING or http://www.opensource.org/licenses/mit-license.php.
+#
+
+import json
+import struct
+import re
+import os
+import base64
+import httplib
+import sys
+import hashlib
+import datetime
+import time
+
+settings = {}
+
+
+def uint32(x):
+ return x & 0xffffffffL
+
+def bytereverse(x):
+ return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) |
+ (((x) >> 8) & 0x0000ff00) | ((x) >> 24) ))
+
+def bufreverse(in_buf):
+ out_words = []
+ for i in range(0, len(in_buf), 4):
+ word = struct.unpack('@I', in_buf[i:i+4])[0]
+ out_words.append(struct.pack('@I', bytereverse(word)))
+ return ''.join(out_words)
+
+def wordreverse(in_buf):
+ out_words = []
+ for i in range(0, len(in_buf), 4):
+ out_words.append(in_buf[i:i+4])
+ out_words.reverse()
+ return ''.join(out_words)
+
+def calc_hdr_hash(blk_hdr):
+ hash1 = hashlib.sha256()
+ hash1.update(blk_hdr)
+ hash1_o = hash1.digest()
+
+ hash2 = hashlib.sha256()
+ hash2.update(hash1_o)
+ hash2_o = hash2.digest()
+
+ return hash2_o
+
+def calc_hash_str(blk_hdr):
+ hash = calc_hdr_hash(blk_hdr)
+ hash = bufreverse(hash)
+ hash = wordreverse(hash)
+ hash_str = hash.encode('hex')
+ return hash_str
+
+def get_blk_dt(blk_hdr):
+ members = struct.unpack("<I", blk_hdr[68:68+4])
+ nTime = members[0]
+ dt = datetime.datetime.fromtimestamp(nTime)
+ dt_ym = datetime.datetime(dt.year, dt.month, 1)
+ return (dt_ym, nTime)
+
+def get_block_hashes(settings):
+ blkindex = []
+ f = open(settings['hashlist'], "r")
+ for line in f:
+ line = line.rstrip()
+ blkindex.append(line)
+
+ print("Read " + str(len(blkindex)) + " hashes")
+
+ return blkindex
+
+def mkblockset(blkindex):
+ blkmap = {}
+ for hash in blkindex:
+ blkmap[hash] = True
+ return blkmap
+
+def copydata(settings, blkindex, blkset):
+ inFn = 0
+ inF = None
+ outFn = 0
+ outsz = 0
+ outF = None
+ outFname = None
+ blkCount = 0
+
+ lastDate = datetime.datetime(2000, 1, 1)
+ highTS = 1408893517 - 315360000
+ timestampSplit = False
+ fileOutput = True
+ setFileTime = False
+ maxOutSz = settings['max_out_sz']
+ if 'output' in settings:
+ fileOutput = False
+ if settings['file_timestamp'] != 0:
+ setFileTime = True
+ if settings['split_timestamp'] != 0:
+ timestampSplit = True
+
+ while True:
+ if not inF:
+ fname = "%s/blk%05d.dat" % (settings['input'], inFn)
+ print("Input file" + fname)
+ inF = open(fname, "rb")
+
+ inhdr = inF.read(8)
+ if (not inhdr or (inhdr[0] == "\0")):
+ inF.close()
+ inF = None
+ inFn = inFn + 1
+ continue
+
+ inMagic = inhdr[:4]
+ if (inMagic != settings['netmagic']):
+ print("Invalid magic:" + inMagic)
+ return
+ inLenLE = inhdr[4:]
+ su = struct.unpack("<I", inLenLE)
+ inLen = su[0]
+ rawblock = inF.read(inLen)
+ blk_hdr = rawblock[:80]
+
+ hash_str = calc_hash_str(blk_hdr)
+ if not hash_str in blkset:
+ print("Skipping unknown block " + hash_str)
+ continue
+
+ if blkindex[blkCount] != hash_str:
+ print("Out of order block.")
+ print("Expected " + blkindex[blkCount])
+ print("Got " + hash_str)
+ sys.exit(1)
+
+ if not fileOutput and ((outsz + inLen) > maxOutSz):
+ outF.close()
+ if setFileTime:
+ os.utime(outFname, (int(time.time()), highTS))
+ outF = None
+ outFname = None
+ outFn = outFn + 1
+ outsz = 0
+
+ (blkDate, blkTS) = get_blk_dt(blk_hdr)
+ if timestampSplit and (blkDate > lastDate):
+ print("New month " + blkDate.strftime("%Y-%m") + " @ " + hash_str)
+ lastDate = blkDate
+ if outF:
+ outF.close()
+ if setFileTime:
+ os.utime(outFname, (int(time.time()), highTS))
+ outF = None
+ outFname = None
+ outFn = outFn + 1
+ outsz = 0
+
+ if not outF:
+ if fileOutput:
+ outFname = settings['output_file']
+ else:
+ outFname = "%s/blk%05d.dat" % (settings['output'], outFn)
+ print("Output file" + outFname)
+ outF = open(outFname, "wb")
+
+ outF.write(inhdr)
+ outF.write(rawblock)
+ outsz = outsz + inLen + 8
+
+ blkCount = blkCount + 1
+ if blkTS > highTS:
+ highTS = blkTS
+
+ if (blkCount % 1000) == 0:
+ print("Wrote " + str(blkCount) + " blocks")
+
+if __name__ == '__main__':
+ if len(sys.argv) != 2:
+ print "Usage: linearize-data.py CONFIG-FILE"
+ sys.exit(1)
+
+ f = open(sys.argv[1])
+ for line in f:
+ # skip comment lines
+ m = re.search('^\s*#', line)
+ if m:
+ continue
+
+ # parse key=value lines
+ m = re.search('^(\w+)\s*=\s*(\S.*)$', line)
+ if m is None:
+ continue
+ settings[m.group(1)] = m.group(2)
+ f.close()
+
+ if 'netmagic' not in settings:
+ settings['netmagic'] = 'f9beb4d9'
+ if 'input' not in settings:
+ settings['input'] = 'input'
+ if 'hashlist' not in settings:
+ settings['hashlist'] = 'hashlist.txt'
+ if 'file_timestamp' not in settings:
+ settings['file_timestamp'] = 0
+ if 'split_timestamp' not in settings:
+ settings['split_timestamp'] = 0
+ if 'max_out_sz' not in settings:
+ settings['max_out_sz'] = 1000L * 1000 * 1000
+
+ settings['max_out_sz'] = long(settings['max_out_sz'])
+ settings['split_timestamp'] = int(settings['split_timestamp'])
+ settings['file_timestamp'] = int(settings['file_timestamp'])
+ settings['netmagic'] = settings['netmagic'].decode('hex')
+
+ if 'output_file' not in settings and 'output' not in settings:
+ print("Missing output file / directory")
+ sys.exit(1)
+
+ blkindex = get_block_hashes(settings)
+ blkset = mkblockset(blkindex)
+
+ if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkset:
+ print("not found")
+ else:
+ copydata(settings, blkindex, blkset)
+
+
diff --git a/contrib/linearize/linearize.py b/contrib/linearize/linearize-hashes.py
index 650f7d3684..791b71bc33 100644..100755
--- a/contrib/linearize/linearize.py
+++ b/contrib/linearize/linearize-hashes.py
@@ -1,7 +1,6 @@
#!/usr/bin/python
#
-# linearize.py: Construct a linear, no-fork, best version of the blockchain.
-#
+# linearize-hashes.py: List blocks in a linear, no-fork version of the chain.
#
# Copyright (c) 2013 The Bitcoin developers
# Distributed under the MIT/X11 software license, see the accompanying
@@ -15,9 +14,6 @@ import base64
import httplib
import sys
-ERR_SLEEP = 15
-MAX_NONCE = 1000000L
-
settings = {}
class BitcoinRPC:
@@ -62,34 +58,18 @@ class BitcoinRPC:
def getblockhash(self, index):
return self.rpc('getblockhash', [index])
-def getblock(rpc, settings, n):
- hash = rpc.getblockhash(n)
- hexdata = rpc.getblock(hash, False)
- data = hexdata.decode('hex')
-
- return data
-
-def get_blocks(settings):
+def get_block_hashes(settings):
rpc = BitcoinRPC(settings['host'], settings['port'],
settings['rpcuser'], settings['rpcpassword'])
- outf = open(settings['output'], 'ab')
-
for height in xrange(settings['min_height'], settings['max_height']+1):
- data = getblock(rpc, settings, height)
-
- outhdr = settings['netmagic']
- outhdr += struct.pack("<i", len(data))
+ hash = rpc.getblockhash(height)
- outf.write(outhdr)
- outf.write(data)
-
- if (height % 1000) == 0:
- sys.stdout.write("Wrote block " + str(height) + "\n")
+ print(hash)
if __name__ == '__main__':
if len(sys.argv) != 2:
- print "Usage: linearize.py CONFIG-FILE"
+ print "Usage: linearize-hashes.py CONFIG-FILE"
sys.exit(1)
f = open(sys.argv[1])
@@ -106,10 +86,6 @@ if __name__ == '__main__':
settings[m.group(1)] = m.group(2)
f.close()
- if 'netmagic' not in settings:
- settings['netmagic'] = 'f9beb4d9'
- if 'output' not in settings:
- settings['output'] = 'bootstrap.dat'
if 'host' not in settings:
settings['host'] = '127.0.0.1'
if 'port' not in settings:
@@ -117,16 +93,14 @@ if __name__ == '__main__':
if 'min_height' not in settings:
settings['min_height'] = 0
if 'max_height' not in settings:
- settings['max_height'] = 279000
+ settings['max_height'] = 313000
if 'rpcuser' not in settings or 'rpcpassword' not in settings:
print "Missing username and/or password in cfg file"
sys.exit(1)
- settings['netmagic'] = settings['netmagic'].decode('hex')
settings['port'] = int(settings['port'])
settings['min_height'] = int(settings['min_height'])
settings['max_height'] = int(settings['max_height'])
- get_blocks(settings)
-
+ get_block_hashes(settings)