From 035eef4be6071a060260907001363a0d12f1f2d2 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 19 Apr 2022 02:06:17 +0200 Subject: lint: Convert lint-python-utf8-encoding.sh to Python --- test/lint/lint-python-utf8-encoding.py | 73 ++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100755 test/lint/lint-python-utf8-encoding.py (limited to 'test/lint/lint-python-utf8-encoding.py') diff --git a/test/lint/lint-python-utf8-encoding.py b/test/lint/lint-python-utf8-encoding.py new file mode 100755 index 0000000000..62fdc34d50 --- /dev/null +++ b/test/lint/lint-python-utf8-encoding.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2018-2022 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# +# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to +# avoid potential issues on the BSDs where the locale is not always set. + +import sys +import re + +from subprocess import check_output, CalledProcessError + +EXCLUDED_DIRS = ["src/crc32c/"] + + +def get_exclude_args(): + return [":(exclude)" + dir for dir in EXCLUDED_DIRS] + + +def check_fileopens(): + fileopens = list() + + try: + fileopens = check_output(["git", "grep", r" open(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines() + except CalledProcessError as e: + if e.returncode > 1: + raise e + + filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]", fileopen)] + + return filtered_fileopens + + +def check_checked_outputs(): + checked_outputs = list() + + try: + checked_outputs = check_output(["git", "grep", "check_output(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines() + except CalledProcessError as e: + if e.returncode > 1: + raise e + + filtered_checked_outputs = [checked_output for checked_output in checked_outputs if re.search(r"universal_newlines=True", checked_output) and not re.search(r"encoding=.(ascii|utf8|utf-8).", checked_output)] + + return filtered_checked_outputs + + +def main(): + exit_code = 0 + + nonexplicit_utf8_fileopens = check_fileopens() + if nonexplicit_utf8_fileopens: + print("Python's open(...) seems to be used to open text files without explicitly specifying encoding='utf8':\n") + for fileopen in nonexplicit_utf8_fileopens: + print(fileopen) + exit_code = 1 + + nonexplicit_utf8_checked_outputs = check_checked_outputs() + if nonexplicit_utf8_checked_outputs: + if nonexplicit_utf8_fileopens: + print("\n") + print("Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='utf8':\n") + for checked_output in nonexplicit_utf8_checked_outputs: + print(checked_output) + exit_code = 1 + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() -- cgit v1.2.3