aboutsummaryrefslogtreecommitdiff
path: root/test/lint/lint-whitespace.py
blob: 3fb5b80013363390043a92cc8042ff7a1dcdef93 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python3
#
# Copyright (c) 2017-2022 The Bitcoin Core developers
# Distributed under the MIT software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#
# Check for new lines in diff that introduce trailing whitespace or
# tab characters instead of spaces.

# We can't run this check unless we know the commit range for the PR.

import argparse
import os
import re
import sys

from subprocess import check_output

EXCLUDED_DIRS = ["depends/patches/",
                 "contrib/guix/patches/",
                 "src/leveldb/",
                 "src/crc32c/",
                 "src/secp256k1/",
                 "src/minisketch/",
                 "doc/release-notes/",
                 "src/qt/locale"]

def parse_args():
    """Parse command line arguments."""
    parser = argparse.ArgumentParser(
        description="""
            Check for new lines in diff that introduce trailing whitespace
            or tab characters instead of spaces in unstaged changes, the
            previous n commits, or a commit-range.
        """,
        epilog=f"""
            You can manually set the commit-range with the COMMIT_RANGE
            environment variable (e.g. "COMMIT_RANGE='47ba2c3...ee50c9e'
            {sys.argv[0]}"). Defaults to current merge base when neither
            prev-commits nor the environment variable is set.
        """)

    parser.add_argument("--prev-commits", "-p", required=False, help="The previous n commits to check")

    return parser.parse_args()


def report_diff(selection):
    filename = ""
    seen = False
    seenln = False

    print("The following changes were suspected:")

    for line in selection:
        if re.match(r"^diff", line):
            filename = line
            seen = False
        elif re.match(r"^@@", line):
            linenumber = line
            seenln = False
        else:
            if not seen:
                # The first time a file is seen with trailing whitespace or a tab character, we print the
                # filename (preceded by a newline).
                print("")
                print(filename)
                seen = True
            if not seenln:
                print(linenumber)
                seenln = True
            print(line)


def get_diff(commit_range, check_only_code):
    exclude_args = [":(exclude)" + dir for dir in EXCLUDED_DIRS]

    if check_only_code:
        what_files = ["*.cpp", "*.h", "*.md", "*.py", "*.sh"]
    else:
        what_files = ["."]

    diff = check_output(["git", "diff", "-U0", commit_range, "--"] + what_files + exclude_args, universal_newlines=True, encoding="utf8")

    return diff


def main():
    args = parse_args()

    if not os.getenv("COMMIT_RANGE"):
        if args.prev_commits:
            commit_range = "HEAD~" + args.prev_commits + "...HEAD"
        else:
            # This assumes that the target branch of the pull request will be master.
            merge_base = check_output(["git", "merge-base", "HEAD", "master"], universal_newlines=True, encoding="utf8").rstrip("\n")
            commit_range = merge_base + "..HEAD"
    else:
        commit_range = os.getenv("COMMIT_RANGE")

    whitespace_selection = []
    tab_selection = []

    # Check if trailing whitespace was found in the diff.
    for line in get_diff(commit_range, check_only_code=False).splitlines():
        if re.match(r"^(diff --git|\@@|^\+.*\s+$)", line):
            whitespace_selection.append(line)

    whitespace_additions = [i for i in whitespace_selection if i.startswith("+")]

    # Check if tab characters were found in the diff.
    for line in get_diff(commit_range, check_only_code=True).splitlines():
        if re.match(r"^(diff --git|\@@|^\+.*\t)", line):
            tab_selection.append(line)

    tab_additions = [i for i in tab_selection if i.startswith("+")]

    ret = 0

    if len(whitespace_additions) > 0:
        print("This diff appears to have added new lines with trailing whitespace.")
        report_diff(whitespace_selection)
        ret = 1

    if len(tab_additions) > 0:
        print("This diff appears to have added new lines with tab characters instead of spaces.")
        report_diff(tab_selection)
        ret = 1

    sys.exit(ret)


if __name__ == "__main__":
    main()