From 035eef4be6071a060260907001363a0d12f1f2d2 Mon Sep 17 00:00:00 2001 From: Dimitri Date: Tue, 19 Apr 2022 02:06:17 +0200 Subject: lint: Convert lint-python-utf8-encoding.sh to Python --- test/lint/lint-python-utf8-encoding.py | 73 ++++++++++++++++++++++++++++++++++ test/lint/lint-python-utf8-encoding.sh | 28 ------------- 2 files changed, 73 insertions(+), 28 deletions(-) create mode 100755 test/lint/lint-python-utf8-encoding.py delete mode 100755 test/lint/lint-python-utf8-encoding.sh (limited to 'test/lint') diff --git a/test/lint/lint-python-utf8-encoding.py b/test/lint/lint-python-utf8-encoding.py new file mode 100755 index 0000000000..62fdc34d50 --- /dev/null +++ b/test/lint/lint-python-utf8-encoding.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2018-2022 The Bitcoin Core developers +# Distributed under the MIT software license, see the accompanying +# file COPYING or http://www.opensource.org/licenses/mit-license.php. +# +# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to +# avoid potential issues on the BSDs where the locale is not always set. + +import sys +import re + +from subprocess import check_output, CalledProcessError + +EXCLUDED_DIRS = ["src/crc32c/"] + + +def get_exclude_args(): + return [":(exclude)" + dir for dir in EXCLUDED_DIRS] + + +def check_fileopens(): + fileopens = list() + + try: + fileopens = check_output(["git", "grep", r" open(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines() + except CalledProcessError as e: + if e.returncode > 1: + raise e + + filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]", fileopen)] + + return filtered_fileopens + + +def check_checked_outputs(): + checked_outputs = list() + + try: + checked_outputs = check_output(["git", "grep", "check_output(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines() + except CalledProcessError as e: + if e.returncode > 1: + raise e + + filtered_checked_outputs = [checked_output for checked_output in checked_outputs if re.search(r"universal_newlines=True", checked_output) and not re.search(r"encoding=.(ascii|utf8|utf-8).", checked_output)] + + return filtered_checked_outputs + + +def main(): + exit_code = 0 + + nonexplicit_utf8_fileopens = check_fileopens() + if nonexplicit_utf8_fileopens: + print("Python's open(...) seems to be used to open text files without explicitly specifying encoding='utf8':\n") + for fileopen in nonexplicit_utf8_fileopens: + print(fileopen) + exit_code = 1 + + nonexplicit_utf8_checked_outputs = check_checked_outputs() + if nonexplicit_utf8_checked_outputs: + if nonexplicit_utf8_fileopens: + print("\n") + print("Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='utf8':\n") + for checked_output in nonexplicit_utf8_checked_outputs: + print(checked_output) + exit_code = 1 + + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/test/lint/lint-python-utf8-encoding.sh b/test/lint/lint-python-utf8-encoding.sh deleted file mode 100755 index 6e5b18fc23..0000000000 --- a/test/lint/lint-python-utf8-encoding.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env bash -# -# Copyright (c) 2018-2020 The Bitcoin Core developers -# Distributed under the MIT software license, see the accompanying -# file COPYING or http://www.opensource.org/licenses/mit-license.php. -# -# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to -# avoid potential issues on the BSDs where the locale is not always set. - -export LC_ALL=C -EXIT_CODE=0 -OUTPUT=$(git grep " open(" -- "*.py" ":(exclude)src/crc32c/" | grep -vE "encoding=.(ascii|utf8|utf-8)." | grep -vE "open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]") -if [[ ${OUTPUT} != "" ]]; then - echo "Python's open(...) seems to be used to open text files without explicitly" - echo "specifying encoding=\"utf8\":" - echo - echo "${OUTPUT}" - EXIT_CODE=1 -fi -OUTPUT=$(git grep "check_output(" -- "*.py" ":(exclude)src/crc32c/"| grep "universal_newlines=True" | grep -vE "encoding=.(ascii|utf8|utf-8).") -if [[ ${OUTPUT} != "" ]]; then - echo "Python's check_output(...) seems to be used to get program outputs without explicitly" - echo "specifying encoding=\"utf8\":" - echo - echo "${OUTPUT}" - EXIT_CODE=1 -fi -exit ${EXIT_CODE} -- cgit v1.2.3