aboutsummaryrefslogtreecommitdiff
path: root/test/lint/lint-python-utf8-encoding.py
blob: 62fdc34d50df4cf450e57837f4c9d01d54dc2913 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/env python3
#
# Copyright (c) 2018-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Make sure we explicitly open all text files using UTF-8 (or ASCII) encoding to
# avoid potential issues on the BSDs where the locale is not always set.

import sys
import re

from subprocess import check_output, CalledProcessError

EXCLUDED_DIRS = ["src/crc32c/"]


def get_exclude_args():
    return [":(exclude)" + dir for dir in EXCLUDED_DIRS]


def check_fileopens():
    fileopens = list()

    try:
        fileopens = check_output(["git", "grep", r" open(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines()
    except CalledProcessError as e:
        if e.returncode > 1:
            raise e

    filtered_fileopens = [fileopen for fileopen in fileopens if not re.search(r"encoding=.(ascii|utf8|utf-8).|open\([^,]*, ['\"][^'\"]*b[^'\"]*['\"]", fileopen)]

    return filtered_fileopens


def check_checked_outputs():
    checked_outputs = list()

    try:
        checked_outputs = check_output(["git", "grep", "check_output(", "--", "*.py"] + get_exclude_args(), universal_newlines=True, encoding="utf8").splitlines()
    except CalledProcessError as e:
        if e.returncode > 1:
            raise e

    filtered_checked_outputs = [checked_output for checked_output in checked_outputs if re.search(r"universal_newlines=True", checked_output) and not re.search(r"encoding=.(ascii|utf8|utf-8).", checked_output)]

    return filtered_checked_outputs


def main():
    exit_code = 0

    nonexplicit_utf8_fileopens = check_fileopens()
    if nonexplicit_utf8_fileopens:
        print("Python's open(...) seems to be used to open text files without explicitly specifying encoding='utf8':\n")
        for fileopen in nonexplicit_utf8_fileopens:
            print(fileopen)
        exit_code = 1

    nonexplicit_utf8_checked_outputs = check_checked_outputs()
    if nonexplicit_utf8_checked_outputs:
        if nonexplicit_utf8_fileopens:
            print("\n")
        print("Python's check_output(...) seems to be used to get program outputs without explicitly specifying encoding='utf8':\n")
        for checked_output in nonexplicit_utf8_checked_outputs:
            print(checked_output)
        exit_code = 1

    sys.exit(exit_code)


if __name__ == "__main__":
    main()