scripts: New clean-header-guards.pl

The conventional way to ensure a header can be included multiple times is to bracket it like this: #ifndef HEADER_NAME_H #define HEADER_NAME_H ... #endif where HEADER_NAME_H is a symbol unique to this header. The endif may be optionally decorated like this: #endif /* HEADER_NAME_H */ Unconventional ways present in our code: * Identifiers reserved for any use: #define _FILEOP_H * Lowercase (bad idea for object-like macros): #define __linux_video_vga_h__ * Roundabout ways to say the same thing (and hide from grep): #if !defined(__PPC_MAC_H__) #endif /* !defined(__PPC_MAC_H__) */ * Redundant values: #define HW_ALPHA_H 1 * Funny redundant values: # define PXA_H "pxa.h" * Decorations with bangs: #endif /* !QEMU_ARM_GIC_INTERNAL_H */ The negation actually makes sense, but almost all our header guard #endif decorations don't negate. * Useless decorations: #endif /* audio.h */ Header guards are not the place to show off creativity. This script normalizes them to the conventional way, and cleans up whitespace while there. It warns when it renames guard symbols, and explains how to find occurences of these symbols that may have to be updated manually. Another issue is use of the same guard symbol in multiple headers. That's okay only for headers that cannot be used together, such as the *-user/*/target_syscall.h. This script can't tell, so it warns when it sees a reuse. The script also warns when preprocessing a header with its guard symbol defined produces anything but whitespace. The next commits will put the script to use. Signed-off-by: Markus Armbruster <armbru@redhat.com> Reviewed-by: Richard Henderson <rth@twiddle.net>
author: Markus Armbruster <armbru@redhat.com> 2016-06-28 13:07:36 +0200
committer: Markus Armbruster <armbru@redhat.com> 2016-07-12 16:19:16 +0200
commit: 2dbc4ebc1712a5cf9e6a36327dce0b465abd5bbe (patch)
tree: 586a51555349dd751c6af5bb67e2119a3deaf651 /scripts
parent: a9c94277f07d19d3eb14f199c3e93491aa3eae0e (diff)
1 files changed, 213 insertions, 0 deletions
diff --git a/scripts/clean-header-guards.pl b/scripts/clean-header-guards.pl
new file mode 100755
index 0000000000..54ab99ae29
--- /dev/null
+++ b/scripts/clean-header-guards.pl
@@ -0,0 +1,213 @@
+#!/usr/bin/perl -w
+#
+# Clean up include guards in headers
+#
+# Copyright (C) 2016 Red Hat, Inc.
+#
+# Authors:
+#  Markus Armbruster <armbru@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2 or
+# (at your option) any later version. See the COPYING file in the
+# top-level directory.
+#
+# Usage: scripts/clean-header-guards.pl [OPTION]... [FILE]...
+#     -c CC     Use a compiler other than cc
+#     -n        Suppress actual cleanup
+#     -v        Show which files are cleaned up, and which are skipped
+#
+# Does the following:
+# - Header files without a recognizable header guard are skipped.
+# - Clean up any untidy header guards in-place.  Warn if the cleanup
+#   renames guard symbols, and explain how to find occurences of these
+#   symbols that may have to be updated manually.
+# - Warn about duplicate header guard symbols.  To make full use of
+#   this warning, you should clean up *all* headers in one run.
+# - Warn when preprocessing a header with its guard symbol defined
+#   produces anything but whitespace.  The preprocessor is run like
+#   "cc -E -DGUARD_H -c -P -", and fed the test program on stdin.
+
+use strict;
+use Getopt::Std;
+
+# Stuff we don't want to clean because we import it into our tree:
+my $exclude = qr,^(disas/libvixl/|include/standard-headers/
+    |linux-headers/|pc-bios/|tests/tcg/|tests/multiboot/),x;
+# Stuff that is expected to fail the preprocessing test:
+my $exclude_cpp = qr,^include/libdecnumber/decNumberLocal.h,;
+
+my %guarded = ();
+my %old_guard = ();
+
+our $opt_c = "cc";
+our $opt_n = 0;
+our $opt_v = 0;
+getopts("c:nv");
+
+sub skipping {
+    my ($fname, $msg, $line1, $line2) = @_;
+
+    return if !$opt_v or $fname =~ $exclude;
+    print "$fname skipped: $msg\n";
+    print "    $line1" if defined $line1;
+    print "    $line2" if defined $line2;
+}
+
+sub gripe {
+    my ($fname, $msg) = @_;
+    return if $fname =~ $exclude;
+    print STDERR "$fname: warning: $msg\n";
+}
+
+sub slurp {
+    my ($fname) = @_;
+    local $/;                   # slurp
+    open(my $in, "<", $fname)
+        or die "can't open $fname for reading: $!";
+    return <$in>;
+}
+
+sub unslurp {
+    my ($fname, $contents) = @_;
+    open (my $out, ">", $fname)
+        or die "can't open $fname for writing: $!";
+    print $out $contents
+        or die "error writing $fname: $!";
+    close $out
+        or die "error writing $fname: $!";
+}
+
+sub fname2guard {
+    my ($fname) = @_;
+    $fname =~ tr/a-z/A-Z/;
+    $fname =~ tr/A-Z0-9/_/cs;
+    return $fname;
+}
+
+sub preprocess {
+    my ($fname, $guard) = @_;
+
+    open(my $pipe, "-|", "$opt_c -E -D$guard -c -P - <$fname")
+        or die "can't run $opt_c: $!";
+    while (<$pipe>) {
+        if ($_ =~ /\S/) {
+            gripe($fname, "not blank after preprocessing");
+            last;
+        }
+    }
+    close $pipe
+        or gripe($fname, "preprocessing failed ($opt_c exit status $?)");
+}
+
+for my $fname (@ARGV) {
+    my $text = slurp($fname);
+
+    $text =~ m,\A(\s*\n|\s*//\N*\n|\s*/\*.*?\*/\s*\n)*|,msg;
+    my $pre = $&;
+    unless ($text =~ /\G(.*\n)/g) {
+        $text =~ /\G.*/;
+        skipping($fname, "no recognizable header guard", "$&\n");
+        next;
+    }
+    my $line1 = $1;
+    unless ($text =~ /\G(.*\n)/g) {
+        $text =~ /\G.*/;
+        skipping($fname, "no recognizable header guard", "$&\n");
+        next;
+    }
+    my $line2 = $1;
+    my $body = substr($text, pos($text));
+
+    unless ($line1 =~ /^\s*\#\s*(if\s*\!\s*defined(\s*\()?|ifndef)\s*
+                       ([A-Za-z0-9_]+)/x) {
+        skipping($fname, "no recognizable header guard", $line1, $line2);
+        next;
+    }
+    my $guard = $3;
+    unless ($line2 =~ /^\s*\#\s*define\s+([A-Za-z0-9_]+)/) {
+        skipping($fname, "no recognizable header guard", $line1, $line2);
+        next;
+    }
+    my $guard2 = $1;
+    unless ($guard2 eq $guard) {
+        skipping($fname, "mismatched header guard ($guard vs. $guard2) ",
+                 $line1, $line2);
+        next;
+    }
+
+    unless ($body =~ m,\A((.*\n)*)
+                       (\s*\#\s*endif\s*(/\*\s*.*\s*\*/\s*)?\n?)
+                       (\n|\s)*\Z,x) {
+        skipping($fname, "can't find end of header guard");
+        next;
+    }
+    $body = $1;
+    my $line3 = $3;
+    my $endif_comment = $4;
+
+    my $oldg = $guard;
+
+    unless ($fname =~ $exclude) {
+        my @issues = ();
+        $guard =~ tr/a-z/A-Z/
+            and push @issues, "contains lowercase letters";
+        $guard =~ s/^_+//
+            and push @issues, "is a reserved identifier";
+        $guard =~ s/(_H)?_*$/_H/
+            and $& ne "_H" and push @issues, "doesn't end with _H";
+        unless ($guard =~ /^[A-Z][A-Z0-9_]*_H/) {
+            skipping($fname, "can't clean up odd guard symbol $oldg\n",
+                     $line1, $line2);
+            next;
+        }
+
+        my $exp = fname2guard($fname =~ s,.*/,,r);
+        unless ($guard =~ /\Q$exp\E\Z/) {
+            $guard = fname2guard($fname =~ s,^include/,,r);
+            push @issues, "doesn't match the file name";
+        }
+        if (@issues and $opt_v) {
+            print "$fname guard $oldg needs cleanup:\n    ",
+                join(", ", @issues), "\n";
+        }
+    }
+
+    $old_guard{$guard} = $oldg
+        if $guard ne $oldg;
+
+    if (exists $guarded{$guard}) {
+        gripe($fname, "guard $guard also used by $guarded{$guard}");
+    } else {
+        $guarded{$guard} = $fname;
+    }
+
+    unless ($fname =~ $exclude) {
+        my $newl1 = "#ifndef $guard\n";
+        my $newl2 = "#define $guard\n";
+        my $newl3 = "#endif\n";
+        $newl3 =~ s,\Z, /* $guard */, if defined $endif_comment;
+        if ($line1 ne $newl1 or $line2 ne $newl2 or $line3 ne $newl3) {
+            $pre =~ s/\n*\Z/\n\n/ if $pre =~ /\N/;
+            $body =~ s/\A\n*/\n/;
+            if ($opt_n) {
+                print "$fname would be cleaned up\n" if $opt_v;
+            } else {
+                unslurp($fname, "$pre$newl1$newl2$body$newl3");
+                print "$fname cleaned up\n" if $opt_v;
+            }
+        }
+    }
+
+    preprocess($fname, $opt_n ? $oldg : $guard)
+        unless $fname =~ $exclude or $fname =~ $exclude_cpp;
+}
+
+if (%old_guard) {
+    print STDERR "warning: guard symbol renaming may break things\n";
+    for my $guard (sort keys %old_guard) {
+        print STDERR "    $old_guard{$guard} -> $guard\n";
+    }
+    print STDERR "To find uses that may have to be updated try:\n";
+    print STDERR "    git grep -Ew '", join("|", sort values %old_guard),
+        "'\n";
+}
author	Markus Armbruster <armbru@redhat.com>	2016-06-28 13:07:36 +0200
committer	Markus Armbruster <armbru@redhat.com>	2016-07-12 16:19:16 +0200
commit	2dbc4ebc1712a5cf9e6a36327dce0b465abd5bbe (patch)
tree	586a51555349dd751c6af5bb67e2119a3deaf651 /scripts
parent	a9c94277f07d19d3eb14f199c3e93491aa3eae0e (diff)