Merge bitcoin/bitcoin#21873: test: minor fixes & improvements for files linter test

2227fc4e6203064b14e99bcf453601bd263a0196 test: minor fixes & improvements for files linter test (windsok) Pull request description: Couple of minor fixes & improvements for files linter test added in #21740 - Use a context manager when opening files, so that files are closed are we are done with them - Use the `-z` flag when shelling out to `git ls-files` so that we can catch newlines and other weird control characters in filenames. From the `git ls-files` manpage: ``` -z \0 line termination on output and do not quote filenames. See OUTPUT below for more information. Without the -z option, pathnames with "unusual" characters are quoted as explained for the configuration variable core.quotePath (see git-config(1)). Using -z the filename is output verbatim and the line is terminated by a NUL byte. ``` ACKs for top commit: MarcoFalke: cr ACK 2227fc4e6203064b14e99bcf453601bd263a0196 practicalswift: cr ACK 2227fc4e6203064b14e99bcf453601bd263a0196: patch looks correct Tree-SHA512: af059a805f4a7614162de85dea856052a45ab531895cb0431087e7fc9e037513fa7501bb5eb2fe43238adf5f09e77712ebdbb15b1486983359ad3661a3da0c60
author: MarcoFalke <falke.marco@gmail.com> 2021-05-07 10:23:36 +0200
committer: MarcoFalke <falke.marco@gmail.com> 2021-05-07 10:23:43 +0200
commit: a33f360fcdd29a05ecbe4ab606a6143f7de4d9dc (patch)
tree: 2bb36a18323b6da3b27a66459173a801b70e833c /test
parent: a0d1d487e93d238304f3e1e2df097b0fa190a54b (diff)
parent: 2227fc4e6203064b14e99bcf453601bd263a0196 (diff)
download: bitcoin-a33f360fcdd29a05ecbe4ab606a6143f7de4d9dc.tar.xz
1 files changed, 12 insertions, 16 deletions
diff --git a/test/lint/lint-files.py b/test/lint/lint-files.py
index 43ca882442..400921e5f3 100755
--- a/test/lint/lint-files.py
+++ b/test/lint/lint-files.py
@@ -13,8 +13,8 @@ import sys
 from subprocess import check_output
 from typing import Optional, NoReturn
 
-CMD_ALL_FILES = "git ls-files --full-name"
-CMD_SOURCE_FILES = 'git ls-files --full-name -- "*.[cC][pP][pP]" "*.[hH]" "*.[pP][yY]" "*.[sS][hH]"'
+CMD_ALL_FILES = "git ls-files -z --full-name"
+CMD_SOURCE_FILES = 'git ls-files -z --full-name -- "*.[cC][pP][pP]" "*.[hH]" "*.[pP][yY]" "*.[sS][hH]"'
 CMD_SHEBANG_FILES = "git grep --full-name --line-number -I '^#!'"
 ALLOWED_FILENAME_REGEXP = "^[a-zA-Z0-9/_.@][a-zA-Z0-9/_.@-]*$"
 ALLOWED_SOURCE_FILENAME_REGEXP = "^[a-z0-9_./-]+$"
@@ -72,16 +72,13 @@ def check_all_filenames() -> int:
     Checks every file in the repository against an allowed regexp to make sure only lowercase or uppercase
     alphanumerics (a-zA-Z0-9), underscores (_), hyphens (-), at (@) and dots (.) are used in repository filenames.
     """
-    # We avoid using rstrip() to ensure we catch filenames which accidentally include trailing whitespace
-    filenames = check_output(CMD_ALL_FILES, shell=True).decode("utf8").split("\n")
-    filenames = [filename for filename in filenames if filename != ""]  # removes the trailing empty list element
-
+    filenames = check_output(CMD_ALL_FILES, shell=True).decode("utf8").rstrip("\0").split("\0")
     filename_regex = re.compile(ALLOWED_FILENAME_REGEXP)
     failed_tests = 0
     for filename in filenames:
         if not filename_regex.match(filename):
             print(
-                f"""File "{filename}" does not not match the allowed filename regexp ('{ALLOWED_FILENAME_REGEXP}')."""
+                f"""File {repr(filename)} does not not match the allowed filename regexp ('{ALLOWED_FILENAME_REGEXP}')."""
             )
             failed_tests += 1
     return failed_tests
@@ -94,17 +91,14 @@ def check_source_filenames() -> int:
 
     Additionally there is an exception regexp for directories or files which are excepted from matching this regexp.
     """
-    # We avoid using rstrip() to ensure we catch filenames which accidentally include trailing whitespace
-    filenames = check_output(CMD_SOURCE_FILES, shell=True).decode("utf8").split("\n")
-    filenames = [filename for filename in filenames if filename != ""]  # removes the trailing empty list element
-
+    filenames = check_output(CMD_SOURCE_FILES, shell=True).decode("utf8").rstrip("\0").split("\0")
     filename_regex = re.compile(ALLOWED_SOURCE_FILENAME_REGEXP)
     filename_exception_regex = re.compile(ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP)
     failed_tests = 0
     for filename in filenames:
         if not filename_regex.match(filename) and not filename_exception_regex.match(filename):
             print(
-                f"""File "{filename}" does not not match the allowed source filename regexp ('{ALLOWED_SOURCE_FILENAME_REGEXP}'), or the exception regexp ({ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP})."""
+                f"""File {repr(filename)} does not not match the allowed source filename regexp ('{ALLOWED_SOURCE_FILENAME_REGEXP}'), or the exception regexp ({ALLOWED_SOURCE_FILENAME_EXCEPTION_REGEXP})."""
             )
             failed_tests += 1
     return failed_tests
@@ -116,15 +110,16 @@ def check_all_file_permissions() -> int:
 
     Additionally checks that for executable files, the file contains a shebang line
     """
-    filenames = check_output(CMD_ALL_FILES, shell=True).decode("utf8").strip().split("\n")
+    filenames = check_output(CMD_ALL_FILES, shell=True).decode("utf8").rstrip("\0").split("\0")
     failed_tests = 0
     for filename in filenames:
         file_meta = FileMeta(filename)
         if file_meta.permissions == ALLOWED_PERMISSION_EXECUTABLES:
-            shebang = open(filename, "rb").readline().rstrip(b"\n")
+            with open(filename, "rb") as f:
+                shebang = f.readline().rstrip(b"\n")
 
             # For any file with executable permissions the first line must contain a shebang
-            if shebang[:2] != b"#!":
+            if not shebang.startswith(b"#!"):
                 print(
                     f"""File "{filename}" has permission {ALLOWED_PERMISSION_EXECUTABLES} (executable) and is thus expected to contain a shebang '#!'. Add shebang or do "chmod {ALLOWED_PERMISSION_NON_EXECUTABLES} {filename}" to make it non-executable."""
                 )
@@ -176,7 +171,8 @@ def check_shebang_file_permissions() -> int:
 
             # *.py files which don't contain an `if __name__ == '__main__'` are not expected to be executed directly
             if file_meta.extension == "py":
-                file_data = open(filename, "r", encoding="utf8").read()
+                with open(filename, "r", encoding="utf8") as f:
+                    file_data = f.read()
                 if not re.search("""if __name__ == ['"]__main__['"]:""", file_data):
                     continue
author	MarcoFalke <falke.marco@gmail.com>	2021-05-07 10:23:36 +0200
committer	MarcoFalke <falke.marco@gmail.com>	2021-05-07 10:23:43 +0200
commit	a33f360fcdd29a05ecbe4ab606a6143f7de4d9dc (patch)
tree	2bb36a18323b6da3b27a66459173a801b70e833c /test
parent	a0d1d487e93d238304f3e1e2df097b0fa190a54b (diff)
parent	2227fc4e6203064b14e99bcf453601bd263a0196 (diff)
download	bitcoin-a33f360fcdd29a05ecbe4ab606a6143f7de4d9dc.tar.xz