aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorfanquake <fanquake@gmail.com>2019-08-25 10:51:27 +0800
committerfanquake <fanquake@gmail.com>2019-08-25 11:26:45 +0800
commitdb67101c748c208cced8e9b76a66d57cd48fbf6e (patch)
tree54937e223a6a03a5975b0bfd6788ac6a97be8d72
parent3ca514ddb77253042877d1a72dfd3021c3de2812 (diff)
parent3d50fe2c1fb566501257fc16d4606e06f518a0bb (diff)
downloadbitcoin-db67101c748c208cced8e9b76a66d57cd48fbf6e.tar.xz
Merge #16665: scripts: move update-translations.py to maintainer-tools repo
3d50fe2c1fb566501257fc16d4606e06f518a0bb scripts: move update-translations.py to maintainer-tools repo (fanquake) Pull request description: As mentioned in [#16644](https://github.com/bitcoin/bitcoin/pull/16644#issuecomment-522481111), move the `update-translations.py` script out of the `bitcoin/bitcoin` repo. This script is run infrequently by maintainers, and aside from #16644, doesn't see many changes. If it is going to be changed, it probably shouldn't consume review bandwidth in bitcoin/bitcoin anyways. PR adding the script to the maintainer-tools repo: https://github.com/bitcoin-core/bitcoin-maintainer-tools/pull/31 Could also move some other scripts, such as [`gen-manpages.sh`](https://github.com/bitcoin/bitcoin/blob/master/contrib/devtools/gen-manpages.sh). ACKs for top commit: laanwj: ACK 3d50fe2c1fb566501257fc16d4606e06f518a0bb Tree-SHA512: b68da68e2df0c1b3275729890a02726af602eb398507f0a03387a313a73e7629a06db57f3472a1dbcf9e11506e88a8f5f725a397a9ce6afef7627188c88d3d48
-rw-r--r--contrib/devtools/README.md12
-rwxr-xr-xcontrib/devtools/update-translations.py215
-rw-r--r--doc/translation_process.md4
3 files changed, 2 insertions, 229 deletions
diff --git a/contrib/devtools/README.md b/contrib/devtools/README.md
index 3d1024c7a5..04fa02484f 100644
--- a/contrib/devtools/README.md
+++ b/contrib/devtools/README.md
@@ -120,18 +120,6 @@ If there are 'unsupported' symbols, the return value will be 1 a list like this
.../64/test_bitcoin: symbol std::out_of_range::~out_of_range() from unsupported version GLIBCXX_3.4.15
.../64/test_bitcoin: symbol _ZNSt8__detail15_List_nod from unsupported version GLIBCXX_3.4.15
-update-translations.py
-======================
-
-Run this script from the root of the repository to update all translations from transifex.
-It will do the following automatically:
-
-- fetch all translations
-- post-process them into valid and committable format
-- add missing translations to the build system (TODO)
-
-See doc/translation-process.md for more information.
-
circular-dependencies.py
========================
diff --git a/contrib/devtools/update-translations.py b/contrib/devtools/update-translations.py
deleted file mode 100755
index 1b9d3a4c27..0000000000
--- a/contrib/devtools/update-translations.py
+++ /dev/null
@@ -1,215 +0,0 @@
-#!/usr/bin/env python3
-# Copyright (c) 2014 Wladimir J. van der Laan
-# Distributed under the MIT software license, see the accompanying
-# file COPYING or http://www.opensource.org/licenses/mit-license.php.
-'''
-Run this script from the root of the repository to update all translations from
-transifex.
-It will do the following automatically:
-
-- fetch all translations using the tx tool
-- post-process them into valid and committable format
- - remove invalid control characters
- - remove location tags (makes diffs less noisy)
-
-TODO:
-- auto-add new translations to the build system according to the translation process
-'''
-import subprocess
-import re
-import sys
-import os
-import io
-import xml.etree.ElementTree as ET
-
-# Name of transifex tool
-TX = 'tx'
-# Name of source language file
-SOURCE_LANG = 'bitcoin_en.ts'
-# Directory with locale files
-LOCALE_DIR = 'src/qt/locale'
-# Minimum number of messages for translation to be considered at all
-MIN_NUM_MESSAGES = 10
-# Regexp to check for Bitcoin addresses
-ADDRESS_REGEXP = re.compile('([13]|bc1)[a-zA-Z0-9]{30,}')
-
-def check_at_repository_root():
- if not os.path.exists('.git'):
- print('No .git directory found')
- print('Execute this script at the root of the repository', file=sys.stderr)
- sys.exit(1)
-
-def fetch_all_translations():
- if subprocess.call([TX, 'pull', '-f', '-a']):
- print('Error while fetching translations', file=sys.stderr)
- sys.exit(1)
-
-def find_format_specifiers(s):
- '''Find all format specifiers in a string.'''
- pos = 0
- specifiers = []
- while True:
- percent = s.find('%', pos)
- if percent < 0:
- break
- specifiers.append(s[percent+1])
- pos = percent+2
- return specifiers
-
-def split_format_specifiers(specifiers):
- '''Split format specifiers between numeric (Qt) and others (strprintf)'''
- numeric = []
- other = []
- for s in specifiers:
- if s in {'1','2','3','4','5','6','7','8','9'}:
- numeric.append(s)
- else:
- other.append(s)
-
- # If both numeric format specifiers and "others" are used, assume we're dealing
- # with a Qt-formatted message. In the case of Qt formatting (see https://doc.qt.io/qt-5/qstring.html#arg)
- # only numeric formats are replaced at all. This means "(percentage: %1%)" is valid, without needing
- # any kind of escaping that would be necessary for strprintf. Without this, this function
- # would wrongly detect '%)' as a printf format specifier.
- if numeric:
- other = []
-
- # numeric (Qt) can be present in any order, others (strprintf) must be in specified order
- return set(numeric),other
-
-def sanitize_string(s):
- '''Sanitize string for printing'''
- return s.replace('\n',' ')
-
-def check_format_specifiers(source, translation, errors, numerus):
- source_f = split_format_specifiers(find_format_specifiers(source))
- # assert that no source messages contain both Qt and strprintf format specifiers
- # if this fails, go change the source as this is hacky and confusing!
- assert(not(source_f[0] and source_f[1]))
- try:
- translation_f = split_format_specifiers(find_format_specifiers(translation))
- except IndexError:
- errors.append("Parse error in translation for '%s': '%s'" % (sanitize_string(source), sanitize_string(translation)))
- return False
- else:
- if source_f != translation_f:
- if numerus and source_f == (set(), ['n']) and translation_f == (set(), []) and translation.find('%') == -1:
- # Allow numerus translations to omit %n specifier (usually when it only has one possible value)
- return True
- errors.append("Mismatch between '%s' and '%s'" % (sanitize_string(source), sanitize_string(translation)))
- return False
- return True
-
-def all_ts_files(suffix=''):
- for filename in os.listdir(LOCALE_DIR):
- # process only language files, and do not process source language
- if not filename.endswith('.ts'+suffix) or filename == SOURCE_LANG+suffix:
- continue
- if suffix: # remove provided suffix
- filename = filename[0:-len(suffix)]
- filepath = os.path.join(LOCALE_DIR, filename)
- yield(filename, filepath)
-
-FIX_RE = re.compile(b'[\x00-\x09\x0b\x0c\x0e-\x1f]')
-def remove_invalid_characters(s):
- '''Remove invalid characters from translation string'''
- return FIX_RE.sub(b'', s)
-
-# Override cdata escape function to make our output match Qt's (optional, just for cleaner diffs for
-# comparison, disable by default)
-_orig_escape_cdata = None
-def escape_cdata(text):
- text = _orig_escape_cdata(text)
- text = text.replace("'", '&apos;')
- text = text.replace('"', '&quot;')
- return text
-
-def contains_bitcoin_addr(text, errors):
- if text is not None and ADDRESS_REGEXP.search(text) is not None:
- errors.append('Translation "%s" contains a bitcoin address. This will be removed.' % (text))
- return True
- return False
-
-def postprocess_translations(reduce_diff_hacks=False):
- print('Checking and postprocessing...')
-
- if reduce_diff_hacks:
- global _orig_escape_cdata
- _orig_escape_cdata = ET._escape_cdata
- ET._escape_cdata = escape_cdata
-
- for (filename,filepath) in all_ts_files():
- os.rename(filepath, filepath+'.orig')
-
- have_errors = False
- for (filename,filepath) in all_ts_files('.orig'):
- # pre-fixups to cope with transifex output
- parser = ET.XMLParser(encoding='utf-8') # need to override encoding because 'utf8' is not understood only 'utf-8'
- with open(filepath + '.orig', 'rb') as f:
- data = f.read()
- # remove control characters; this must be done over the entire file otherwise the XML parser will fail
- data = remove_invalid_characters(data)
- tree = ET.parse(io.BytesIO(data), parser=parser)
-
- # iterate over all messages in file
- root = tree.getroot()
- for context in root.findall('context'):
- for message in context.findall('message'):
- numerus = message.get('numerus') == 'yes'
- source = message.find('source').text
- translation_node = message.find('translation')
- # pick all numerusforms
- if numerus:
- translations = [i.text for i in translation_node.findall('numerusform')]
- else:
- translations = [translation_node.text]
-
- for translation in translations:
- if translation is None:
- continue
- errors = []
- valid = check_format_specifiers(source, translation, errors, numerus) and not contains_bitcoin_addr(translation, errors)
-
- for error in errors:
- print('%s: %s' % (filename, error))
-
- if not valid: # set type to unfinished and clear string if invalid
- translation_node.clear()
- translation_node.set('type', 'unfinished')
- have_errors = True
-
- # Remove location tags
- for location in message.findall('location'):
- message.remove(location)
-
- # Remove entire message if it is an unfinished translation
- if translation_node.get('type') == 'unfinished':
- context.remove(message)
-
- # check if document is (virtually) empty, and remove it if so
- num_messages = 0
- for context in root.findall('context'):
- for message in context.findall('message'):
- num_messages += 1
- if num_messages < MIN_NUM_MESSAGES:
- print('Removing %s, as it contains only %i messages' % (filepath, num_messages))
- continue
-
- # write fixed-up tree
- # if diff reduction requested, replace some XML to 'sanitize' to qt formatting
- if reduce_diff_hacks:
- out = io.BytesIO()
- tree.write(out, encoding='utf-8')
- out = out.getvalue()
- out = out.replace(b' />', b'/>')
- with open(filepath, 'wb') as f:
- f.write(out)
- else:
- tree.write(filepath, encoding='utf-8')
- return have_errors
-
-if __name__ == '__main__':
- check_at_repository_root()
- fetch_all_translations()
- postprocess_translations()
-
diff --git a/doc/translation_process.md b/doc/translation_process.md
index b9a10b6527..7212105f35 100644
--- a/doc/translation_process.md
+++ b/doc/translation_process.md
@@ -65,9 +65,9 @@ username = USERNAME
The Transifex Bitcoin project config file is included as part of the repo. It can be found at `.tx/config`, however you shouldn’t need to change anything.
### Synchronising translations
-To assist in updating translations, we have created a script to help.
+To assist in updating translations, a helper script is available in the [maintainer-tools repo](https://github.com/bitcoin-core/bitcoin-maintainer-tools).
-1. `python contrib/devtools/update-translations.py`
+1. `python3 ../bitcoin-maintainer-tools/update-translations.py`
2. `git add` new translations from `src/qt/locale/`
3. Update `src/qt/bitcoin_locale.qrc` manually or via
```bash