Renew and extend the tool for checking adherence to C coding style rules

aims at checking most of https://www.openssl.org/policies/codingstyle.html and various requirements not yet explicitly stated there - see also #10725 add util/check-format.pl and its self-tests in util/check-format-test-{positives,negatives}.c remove util/openssl-format-source Reviewed-by: Richard Levitte <levitte@openssl.org> Reviewed-by: David von Oheimb <david.von.oheimb@siemens.com> (Merged from https://github.com/openssl/openssl/pull/10363)
author: Dr. David von Oheimb <David.von.Oheimb@siemens.com> 2020-03-09 11:03:21 +0100
committer: Dr. David von Oheimb <David.von.Oheimb@siemens.com> 2020-03-09 11:03:21 +0100
commit: 99a16e0459e5089c2cfb92ee775f1221a51b8d05 (patch)
tree: 4037504638169aad9004ad8850515d15f6c250e5 /util/check-format.pl
parent: c518117b99bc4aad62990e8a31b7bc1dae06d16c (diff)
1 files changed, 1118 insertions, 0 deletions
diff --git a/util/check-format.pl b/util/check-format.pl
new file mode 100644
index 0000000000..af77d20920
--- /dev/null
+++ b/util/check-format.pl
@@ -0,0 +1,1118 @@
+#!/usr/bin/perl
+#
+# Copyright 2020 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright Siemens AG 2019-2020
+#
+# Licensed under the Apache License 2.0 (the "License").
+# You may not use this file except in compliance with the License.
+# You can obtain a copy in the file LICENSE in the source distribution
+# or at https://www.openssl.org/source/license.html
+#
+# check-format.pl
+# - check formatting of C source according to OpenSSL coding style
+#
+# usage:
+#   check-format.pl [-l|--sloppy-len] [-l|--sloppy-bodylen]
+#                   [-s|--sloppy-spc] [-c|--sloppy-cmt] [-m|--sloppy-macro]
+#                   [-h|--sloppy-hang] [-1|--1-stmt]
+#                   <files>
+#
+# checks adherence to the formatting rules of the OpenSSL coding guidelines
+# assuming that the input files contain syntactically correct C code.
+# This pragmatic tool is incomplete and yields some false positives.
+# Still it should be useful for detecting most typical glitches.
+#
+# options:
+#  -l | --sloppy-len   increase accepted max line length from 80 to 84
+#  -l | --sloppy-bodylen do not report function body length > 200
+#  -s | --sloppy-spc   do not report whitespace nits
+#  -c | --sloppy-cmt   do not report indentation of comments
+#                      Otherwise for each multi-line comment the indentation of
+#                      its lines is checked for consistency. For each comment
+#                      that does not begin to the right of normal code its
+#                      indentation must be as for normal code, while in case it
+#                      also has no normal code to its right it is considered to
+#                      refer to the following line and may be indented equally.
+#  -m | --sloppy-macro allow missing extra indentation of macro bodies
+#  -h | --sloppy-hang  when checking hanging indentation, do not report
+#                      * same indentation as on line before
+#                      * same indentation as non-hanging indent level
+#                      * indentation moved left (not beyond non-hanging indent)
+#                        just to fit contents within the line length limit
+#  -1 | --1-stmt       do more aggressive checks for { 1 stmt } - see below
+#
+# There are non-trivial false positives and negatives such as the following.
+#
+# * When a line contains several issues of the same kind only one is reported.
+#
+# * When a line contains more than one statement this is (correctly) reported
+#   but in some situations the indentation checks for subsequent lines go wrong.
+#
+# * There is the special OpenSSL rule not to unnecessarily use braces around
+#   single statements:
+#   {
+#       stmt;
+#   }
+#   except within if ... else constructs where some branch contains more than one
+#   statement. Since the exception is hard to recognize when such branches occur
+#   after the current position (such that false positives would be reported)
+#   the tool by checks for this rule by defaul only for do/while/for bodies.
+#   Yet with the --1-stmt option false positives are preferred over negatives.
+#   False negatives occur if the braces are more than two non-empty lines apart.
+#
+# * Use of multiple consecutive spaces is regarded a coding style nit except
+#   when done in order to align certain columns over multiple lines, e.g.:
+#   # define AB  1
+#   # define CDE 22
+#   # define F   3333
+#   This pattern is recognized - and consequently double space not reported -
+#   for a given line if in the nonempty line before or after (if existing)
+#   for each occurrence of "  \S" (where \S means non-space) in the given line
+#   there is " \S" in the other line in the respective column position.
+#   This may lead to both false negatives (in case of coincidental " \S")
+#   and false positives (in case of more complex multi-column alignment).
+#
+# * When just part of control structures depend on #if(n)(def), which can be
+#   considered bad programming style, indentation false positives occur, e.g.:
+#   #if X
+#       if (1) /* bad style */
+#   #else
+#       if (2) /* bad style resulting in false positive */
+#   #endif
+#           c; /* resulting further false positive */
+
+use strict;
+# use List::Util qw[min max];
+use POSIX;
+
+use constant INDENT_LEVEL => 4;
+use constant MAX_LINE_LENGTH => 80;
+use constant MAX_BODY_LENGTH => 200;
+
+# global variables @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+# command-line options
+my $max_length = MAX_LINE_LENGTH;
+my $sloppy_bodylen = 0;
+my $sloppy_SPC = 0;
+my $sloppy_hang = 0;
+my $sloppy_cmt = 0;
+my $sloppy_macro = 0;
+my $extended_1_stmt = 0;
+
+while ($ARGV[0] =~ m/^-(\w|-[\w\-]+)$/) {
+    my $arg = $1; shift;
+    if ($arg =~ m/^(l|-sloppy-len)$/) {
+        $max_length += INDENT_LEVEL;
+    } elsif ($arg =~ m/^(b|-sloppy-bodylen)$/) {
+        $sloppy_bodylen = 1;
+    } elsif ($arg =~ m/^(s|-sloppy-spc)$/) {
+        $sloppy_SPC = 1;
+    } elsif ($arg =~ m/^(c|-sloppy-cmt)$/) {
+        $sloppy_cmt = 1;
+    } elsif ($arg =~ m/^(m|-sloppy-macro)$/) {
+        $sloppy_macro = 1;
+    } elsif ($arg =~ m/^(h|-sloppy-hang)$/) {
+        $sloppy_hang = 1;
+    } elsif ($arg =~ m/^(1|-1-stmt)$/) {
+        $extended_1_stmt = 1;
+    } else {
+        die("unknown option: -$arg");
+    }
+}
+
+# status variables
+my $self_test;             # whether the current input file is regarded to contain (positive/negative) self-tests
+my $line;                  # current line number
+my $line_before;           # number of previous not essentially empty line (containing at most whitespace and '\')
+my $line_before2;          # number of not essentially empty line before previous not essentially empty line
+my $contents;              # contents of current line
+my $contents_before;       # contents of $line_before, if $line_before > 0
+my $contents_before_;      # contents of $line_before after blinding comments etc., if $line_before > 0
+my $contents_before2;      # contents of $line_before2, if $line_before2 > 0
+my $contents_before_2;     # contents of $line_before2 after blinding comments etc., if $line_before2 > 0
+my $in_multiline_string;   # line starts within multi-line string literal
+my $count;                 # -1 or number of leading whitespace characters (except newline) in current line,
+                           # which should be $block_indent + $hanging_offset + $local_offset or $expr_indent
+my $count_before;          # number of leading whitespace characters (except line ending chars) in $contents_before
+my $has_label;             # current line contains label
+my $local_offset;          # current extra indent due to label, switch case/default, or leading closing brace(s)
+my $line_body_start;       # number of line where last function body started, or 0
+my $line_function_start;   # number of line where last function definition started, used if $line_body_start != 0
+my $last_function_header;  # header containing name of last function defined, used if $line_function_start != 0
+my $line_opening_brace;    # number of previous line with opening brace after do/while/for, optionally for if/else
+
+my $keyword_opening_brace; # name of previous keyword, used if $line_opening_brace != 0
+my $ifdef__cplusplus;      # line before contained '#ifdef __cplusplus' (used in header files)
+my $block_indent;          # currently required normal indentation at block/statement level
+my $hanging_offset;        # extra indent, which may be nested, for just one hanging statement or expr or typedef
+my @in_do_hanging_offsets; # stack of hanging offsets for nested 'do' ... 'while'
+my @in_if_hanging_offsets; # stack of hanging offsets for nested 'if' (but not its potential 'else' branch)
+my $if_maybe_terminated;   # 'if' ends and $hanging_offset should be reset unless the next line starts with 'else'
+my @nested_block_indents;  # stack of indentations at block/statement level, needed due to hanging statements
+my @nested_hanging_offsets;# stack of nested $hanging_offset values, in parallel to @nested_block_indents
+my @nested_in_typedecl;    # stack of nested $in_typedecl values, partly in parallel to @nested_block_indents
+my @nested_indents;        # stack of hanging indents due to parentheses, braces, brackets, or conditionals
+my @nested_symbols;        # stack of hanging symbols '(', '{', '[', or '?', in parallel to @nested_indents
+my @nested_conds_indents;  # stack of hanging indents due to conditionals ('?' ... ':')
+my $expr_indent;           # resulting hanging indent within (multi-line) expressions including type exprs, else 0
+my $hanging_symbol;        # character ('(', '{', '[', not: '?') responsible for $expr_indent, if $expr_indent != 0
+my $in_expr;               # in expression after if/while/for/switch/return/enum/LHS of assignment
+my $in_paren_expr;         # in parenthesized if/while/for condition and switch expression, if $expr_indent != 0
+my $in_typedecl;           # nesting level of typedef/struct/union/enum
+my $in_directive;          # number of lines so far within preprocessor directive, e.g., macro definition
+my $directive_nesting;     # currently required indentation of preprocessor directive according to #if(n)(def)
+my $directive_offset;      # indent offset within multi-line preprocessor directive, if $in_directive > 0
+my $in_macro_header;       # number of open parentheses + 1 in (multi-line) header of #define, if $in_directive > 0
+my $in_comment;            # number of lines so far within multi-line comment, or < 0 when end is on current line
+my $leading_comment;       # multi-line comment has no code before its beginning delimiter
+my $formatted_comment;     # multi-line comment beginning with "/*-", which indicates/allows special formatting
+my $comment_indent;        # comment indent, if $in_comment != 0
+my $num_reports_line = 0;  # number of issues found on current line
+my $num_reports = 0;       # total number of issues found
+my $num_indent_reports = 0;# total number of indentation issues found
+my $num_nesting_issues = 0;# total number of directive nesting issues found
+my $num_syntax_issues = 0; # total number of syntax issues found during sanity checks
+my $num_SPC_reports = 0;   # total number of whitespace issues found
+my $num_length_reports = 0;# total number of line length issues found
+
+sub reset_file_state {
+    $line = 0;
+    $line_before = 0;
+    $line_before2 = 0;
+    @nested_block_indents = ();
+    @nested_hanging_offsets = ();
+    @nested_in_typedecl = ();
+    @nested_symbols = ();
+    @nested_indents = ();
+    @nested_conds_indents = ();
+    $expr_indent = 0;
+    $in_paren_expr = 0;
+    $in_expr = 0;
+    $hanging_offset = 0;
+    @in_do_hanging_offsets = ();
+    @in_if_hanging_offsets = ();
+    $if_maybe_terminated = 0;
+    $block_indent = 0;
+    $ifdef__cplusplus = 0;
+    $in_multiline_string = 0;
+    $line_body_start = 0;
+    $line_opening_brace = 0;
+    $in_typedecl = 0;
+    $in_directive = 0;
+    $directive_nesting = 0;
+    $in_comment = 0;
+}
+
+# auxiliary submodules @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub report_flexibly {
+    my $line = shift;
+    my $msg = shift;
+    my $contents = shift;
+    my $report_SPC = $msg =~ /SPC/;
+    return if $report_SPC && $sloppy_SPC;
+
+    print "$ARGV:$line:$msg:$contents" unless $self_test;
+    $num_reports_line++;
+    $num_reports++;
+    $num_indent_reports++ if $msg =~ m/indent/;
+    $num_nesting_issues++ if $msg =~ m/directive nesting/;
+    $num_syntax_issues++  if $msg =~ m/unclosed|unexpected/;
+    $num_SPC_reports++    if $report_SPC;
+    $num_length_reports++ if $msg =~ m/length/;
+}
+
+sub report {
+    my $msg = shift;
+    report_flexibly($line, $msg, $contents);
+}
+
+sub parens_balance { # count balance of opening parentheses - closing parentheses
+    my $str = shift;
+    return $str =~ tr/\(// - $str =~ tr/\)//;
+}
+
+sub blind_nonspace { # blind non-space text of comment as @, preserving length and spaces
+    # the @ character is used because it cannot occur in normal program code so there is no confusion
+    # comment text is not blinded to whitespace in order to be able to check double SPC also in comments
+    my $comment_text = shift;
+    $comment_text =~ s/\.\s\s/.. /g; # in double SPC checks allow one extra space after period '.' in comments
+    return $comment_text =~ tr/ /@/cr;
+}
+
+# submodule for indentation checking/reporting @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub check_indent { # used for lines outside multi-line string literals
+    my $stmt_indent = $block_indent + $hanging_offset + $local_offset;
+    $stmt_indent = 0 if $stmt_indent < 0; # TODO maybe give warning/error
+    my $stmt_desc = $contents =~
+        m/^\s*\/\*/ ? "intra-line comment" :
+        $has_label ? "label" :
+        ($hanging_offset != 0 ? "hanging " : "").
+        ($hanging_offset != 0 ? "stmt/expr" : "stmt/decl"); # $in_typedecl is not fully to the point here
+    my ($ref_desc, $ref_indent) = $expr_indent == 0 ? ($stmt_desc, $stmt_indent)
+                                                    : ("hanging '$hanging_symbol'", $expr_indent);
+    my ($alt_desc, $alt_indent) = ("", $ref_indent);
+
+    # allow indent 1 for labels - this cannot happen for leading ':'
+    ($alt_desc, $alt_indent) = ("outermost position", 1) if $expr_indent == 0 && $has_label;
+
+    if (@nested_conds_indents != 0 && substr($_, $count, 1) eq ":") {
+        # leading ':' within stmt/expr/decl - this cannot happen for labels nor leading  '&&' or '||'
+        # allow special indent at level of corresponding "?"
+        ($alt_desc, $alt_indent) = ("leading ':'", @nested_conds_indents[-1]);
+    }
+    # allow extra indent offset leading '&&' or '||' - this cannot happen for leading ":"
+    ($alt_desc, $alt_indent) = ("leading '$1'", $ref_indent + INDENT_LEVEL) if $contents =~ m/^[\s@]*(\&\&|\|\|)/;
+
+    if ($expr_indent < 0) { # implies @nested_symbols != 0 && @nested_symbols[0] eq "{" && @nested_indents[-1] < 0
+        # allow normal stmt indentation level for hanging initializer/enum expressions after trailing '{'
+        # this cannot happen for labels and overrides special treatment of ':', '&&' and '||' for this line
+        ($alt_desc, $alt_indent) = ("lines after '{'", $stmt_indent);
+        # decide depending on current actual indentation, preventing forth and back
+        @nested_indents[-1] = $count == $stmt_indent ? $stmt_indent : -@nested_indents[-1]; # allow $stmt_indent
+        $ref_indent = $expr_indent = @nested_indents[-1];
+    }
+
+    # check consistency of indentation within multi-line comment (i.e., between its first, inner, and last lines)
+    if ($in_comment != 0 && $in_comment != 1) { # in multi-line comment but not on its first line
+        if (!$sloppy_cmt) {
+            if ($in_comment > 0) { # not at its end
+                report("indent = $count != $comment_indent within multi-line comment")
+                    if $count != $comment_indent;
+            } else {
+                my $tweak = $in_comment == -2 ? 1 : 0;
+                report("indent = ".($count + $tweak)." != $comment_indent at end of multi-line comment")
+                    if $count + $tweak != $comment_indent;
+            }
+        }
+        # do not check indentation of last line of non-leading multi-line comment
+        if ($in_comment < 0 && !$leading_comment) {
+            s/^(\s*)@/$1*/; # blind first '@' as '*' to prevent below delayed check for the line before
+            return;
+        }
+        return if $in_comment > 0; # not on its last line
+        # $comment_indent will be checked by the below checks for end of multi-line comment
+    }
+
+    # else check indentation of entire-line comment or entire-line end of multi-line comment
+    # ... w.r.t. indent of the following line by delayed check for the line before
+    if (($in_comment == 0 || $in_comment == 1) # no comment, intra-line comment, or begin of multi-line comment
+        && $line_before > 0 # there is a line before
+        && $contents_before_ =~ m/^(\s*)@[\s@]*$/) { # line before begins with '@', no code follows (except '\')
+        report_flexibly($line_before, "entire-line comment indent = $count_before != $count (of following line)",
+            $contents_before) if !$sloppy_cmt && $count_before != $count;
+    }
+    # ... but allow normal indentation for the current line, else above check will be done for the line before
+    if (($in_comment == 0 || $in_comment < 0) # (no commment,) intra-line comment or end of multi-line comment
+        && m/^(\s*)@[\s@]*$/) { # line begins with '@', no code follows (except '\')
+        if ($count == $ref_indent) { # indentation is like for (normal) code in this line
+            s/^(\s*)@/$1*/; # blind first '@' as '*' to prevent above delayed check for the line before
+            return;
+        }
+        return if !eof; # defer check of entire-line comment to next line
+    }
+
+    # else check indentation of leading intra-line comment or end of multi-line comment
+    if (m/^(\s*)@/) { # line begins with '@', i.e., any (remaining type of) comment
+        if (!$sloppy_cmt && $count != $ref_indent) {
+            report("intra-line comment indent = $count != $ref_indent") if $in_comment == 0;
+            report("multi-line comment indent = $count != $ref_indent") if $in_comment < 0;
+        }
+        return;
+    }
+
+    if ($sloppy_hang && ($hanging_offset != 0 || $expr_indent != 0)) {
+        # do not report same indentation as on the line before (potentially due to same violations)
+        return if $line_before > 0 && $count == $count_before;
+
+        # do not report indentation at normal indentation level while hanging expression indent would be required
+        return if $expr_indent != 0 && $count == $stmt_indent;
+
+        # do not report if contents have been shifted left of nested expr indent (but not as far as stmt indent)
+        # apparently aligned to the right in order to fit within line length limit
+        return if $stmt_indent < $count && $count < $expr_indent &&
+            length($contents) == MAX_LINE_LENGTH + length("\n");
+    }
+
+    report("indent = $count != $ref_indent for $ref_desc".
+           ($alt_desc eq ""
+            || $alt_indent == $ref_indent # prevent showing alternative that happens to have equal value
+            ? "" : " or $alt_indent for $alt_desc"))
+        if $count != $ref_indent && $count != $alt_indent;
+}
+
+# submodules handling indentation within expressions @@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+sub update_nested_indents { # may reset $in_paren_expr and in this case also resets $in_expr
+    my $str = shift;
+    my $start = shift; # defaults to 0
+    my $terminator_position = -1;
+    for (my $i = $start; $i < length($str); $i++) {
+        my $c;
+        my $curr = substr($str, $i);
+        if ($curr =~ m/^(.*?)([{}()?:;\[\]])(.*)$/) { # match from position $i the first {}()?:;[]
+            $c = $2;
+        } else {
+            last;
+        }
+        my ($head, $tail) = (substr($str, 0, $i).$1, $3);
+        $i += length($1) + length($2) - 1;
+
+        # stop at terminator outside 'for(..;..;..)', assuming that 'for' is followed by '('
+        return $i if $c eq ";" && (!$in_paren_expr || @nested_indents == 0);
+
+        my $in_stmt = $in_expr || @nested_symbols != 0; # not: || $in_typedecl != 0
+        if ($c =~ m/[{([?]/) { # $c is '{', '(', '[', or '?'
+            if ($c eq "{") { # '{' in any context
+                # cancel newly hanging_offset if opening brace '{' is after non-whitespace non-comment:
+                $hanging_offset -= INDENT_LEVEL if $hanging_offset > 0 && $head =~ m/[^\s\@]/;
+                push @nested_block_indents, $block_indent;
+                push @nested_hanging_offsets, $in_expr ? $hanging_offset : 0;
+                push @nested_in_typedecl, $in_typedecl if $in_typedecl != 0;
+                $block_indent += INDENT_LEVEL + $hanging_offset;
+                $hanging_offset = 0;
+            }
+            if ($c ne "{" || $in_stmt) { # for '{' inside stmt/expr (not: decl), for '(', '[', or '?' anywhere
+                $tail =~ m/^([\s@]*)([^\s\@])/;
+                push @nested_indents, defined $2
+                    ? $i + 1 + length($1) # actual indentation of following non-space non-comment
+                    : $c ne "{" ? +($i + 1)  # just after '(' or '[' if only whitespace thereafter
+                                : -($i + 1); # allow also $stmt_indent if '{' with only whitespace thereafter
+                push @nested_symbols, $c; # done also for '?' to be able to check correct nesting
+                push @nested_conds_indents, $i if $c eq "?"; # remember special alternative indent for ':'
+            }
+        } elsif ($c =~ m/[})\]:]/) { # $c is '}', ')', ']', or ':'
+            my $opening_c = ($c =~ tr/})]:/{([/r);
+            if (($c ne ":" || $in_stmt    # ignore ':' outside stmt/expr/decl
+                # in the presence of ':', one could add this sanity check:
+                # && !(# ':' after initial label/case/default
+                #      $head =~ m/^([\s@]*)(case\W.*$|\w+$)/ || # this matching would not work for
+                #                                               # multi-line expr after 'case'
+                #      # bitfield length within unsigned type decl
+                #      $tail =~ m/^[\s@]*\d+/                   # this matching would need improvement
+                #     )
+                )) {
+                if ($c ne "}" || $in_stmt) { # for '}' inside stmt/expr/decl, ')', ']', or ':'
+                    if (@nested_symbols != 0 &&
+                        @nested_symbols[-1] == $opening_c) { # for $c there was a corresponding $opening_c
+                        pop @nested_indents;
+                        pop @nested_symbols;
+                        pop @nested_conds_indents if $opening_c eq "?";
+                    } else {
+                        report("unexpected '$c' @ ".($in_paren_expr ? "(expr)" : "expr"));
+                        next;
+                    }
+                }
+                if ($c eq "}") { # '}' at block level but also inside stmt/expr/decl
+                    if (@nested_block_indents == 0) {
+                        report("unexpected '}'");
+                    } else {
+                        $block_indent = pop @nested_block_indents;
+                        $hanging_offset = pop @nested_hanging_offsets;
+                        $in_typedecl = pop @nested_in_typedecl if @nested_in_typedecl != 0;
+                    }
+                }
+                if ($in_paren_expr && !grep(/\(/, @nested_symbols)) { # end of (expr)
+                    check_nested_nonblock_indents("(expr)");
+                    $in_paren_expr = $in_expr = 0;
+                    report("code after (expr)")
+                        if $tail =~ m/^([^{]*)/ && $1 =~ m/[^\s\@;]/; # non-space non-';' before any '{'
+                }
+            }
+        }
+    }
+    return -1;
+}
+
+sub check_nested_nonblock_indents {
+    my $position = shift;
+    while (@nested_symbols != 0) {
+        my $symbol = pop @nested_symbols;
+        report("unclosed '$symbol' in $position");
+        if ($symbol eq "{") { # repair stack of blocks
+            $block_indent = pop @nested_block_indents;
+            $hanging_offset = pop @nested_hanging_offsets;
+            $in_typedecl = pop @nested_in_typedecl if @nested_in_typedecl != 0;
+        }
+    }
+    @nested_indents = ();
+    @nested_conds_indents = ();
+}
+
+# start of main program @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+reset_file_state();
+
+while (<>) { # loop over all lines of all input files
+    $self_test = $ARGV =~ m/check-format-test/;
+    $line++;
+    s/\r$//; # strip any trailing CR '\r' (which are typical on Windows systems)
+    $contents = $_;
+
+    # check for illegal characters
+    if (m/(.*?)([\x00-\x09\x0B-\x1F\x7F-\xFF])/) {
+        my $col = length($1);
+        report(($2 eq "\x09" ? "TAB" : $2 eq "\x0D" ? "CR " : $2 =~ m/[\x00-\x1F]/ ? "non-printable"
+                : "non-7bit char") . " at column $col") ;
+    }
+
+    # check for whitespace at EOL
+    report("trailing whitespace at EOL") if m/\s\n$/;
+
+    # assign to $count the actual indentation level of the current line
+    chomp; # remove trailing NL '\n'
+    m/^(\s*)/;
+    $count = length($1); # actual indentation
+    $has_label = 0;
+    $local_offset = 0;
+
+    # character/string literals @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+    s/\\["']/@@/g; # blind all '"' and "'" escaped by '\' (typically within character literals or string literals)
+
+    # handle multi-line string literals to avoid confusion on starting/ending '"' and trailing '\'
+    if ($in_multiline_string) {
+        if (s#^([^"]*)"#($1 =~ tr/"/@/cr).'@'#e) { # string literal terminated by '"'
+            # string contents and its terminating '"' have been blinded as '@'
+            $count = -1; # do not check indentation
+        } else {
+            report("multi-line string literal not terminated by '\"' and trailing '\' is missing")
+                unless s#^([^\\]*)\s*\\\s*$#$1#; # strip trailing '\' plus any whitespace around
+            goto LINE_FINISHED;
+        }
+    }
+
+    # blind contents of character and string literals as @, preserving length (but not spaces)
+    # this prevents confusing any of the matching below, e.g., of whitespace and comment delimiters
+    s#('[^']*')#$1 =~ tr/'/@/cr#eg; # handle all intra-line character literals
+    s#("[^"]*")#$1 =~ tr/"/@/cr#eg; # handle all intra-line string literals
+    $in_multiline_string =          # handle trailing string literal terminated by '\'
+        s#^(([^"]*"[^"]*")*[^"]*)("[^"]*)\\(\s*)$#$1.($3 =~ tr/"/@/cr).'"'.$4#e;
+        # its contents have been blinded and the trailing '\' replaced by '"'
+
+    # strip any other trailing '\' along with any whitespace around it such that it does not interfere with various
+    # matching below; the later handling of multi-line macro definitions uses $contents where it is not stripped
+    s#^(.*?)\s*\\\s*$#$1#; # trailing '\' possibly preceded and/or followed by whitespace
+
+    # comments @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+    # do/prepare checks within multi-line comments
+    my $self_test_exception = $self_test ? "@" : "";
+    if ($in_comment > 0) { # this still includes the last line of multi-line commment
+        my ($head, $any_symbol, $cmt_text) = m/^(\s*)(.?)(.*)$/;
+        if ($any_symbol eq "*") {
+            report("no SPC after leading '*' in multi-line comment") if $cmt_text =~ m|^[^/\s$self_test_exception]|;
+        } else {
+            report("no leading '*' in multi-line comment");
+        }
+        $in_comment++;
+    }
+
+    # detect end of comment, must be within multi-line comment, check if it is preceded by non-whitespace text
+    if ((my ($head, $tail) = m|^(.*?)\*/(.*)$|) && $1 ne '/') { # ending comment: '*/'
+        report("no SPC nor '*' before '*/'") if $head =~ m/[^*\s]$/;
+        report("no SPC after '*/'") if $tail =~ m/^[^\s,;)}\]]/; # no space or ,;)}] after '*/'
+        if (!($head =~ m|/\*|)) { # not begin of comment '/*', which is is handled below
+            if ($in_comment == 0) {
+                report("unexpected '*/' outside comment");
+                $_ = "$head@@".$tail; # blind the "*/"
+            } else {
+                report("text before '*/' in multi-line comment") if ($head =~ m/\S/); # non-SPC before '*/'
+                $in_comment = -1; # indicate that multi-line comment ends on current line
+                if ($count > 0) {
+                    # make indentation of end of multi-line comment appear like of leading intra-line comment
+                    $head =~ s/^(\s*)\s/$1@/; # replace the last leading space by '@'
+                    $count--;
+                    $in_comment = -2; # indicate that multi-line comment ends on current line, with tweak
+                }
+                my $cmt_text = $head;
+                $_ = blind_nonspace($cmt_text)."@@".$tail;
+            }
+        }
+    }
+
+    # detect begin of comment, check if it is followed by non-space text
+  MATCH_COMMENT:
+    if (my ($head, $opt_minus, $tail) = m|^(.*?)/\*(-?)(.*)$|) { # begin of comment: '/*'
+        report("no SPC before '/*'")
+            if $head =~ m/[^\s\*]$/; # no space (nor '*', needed to allow '*/' here) before comment delimiter
+        report("no SPC nor '*' after '/*' or '/*-'") if $tail =~ m/^[^\s*$self_test_exception]/;
+        my $cmt_text = $opt_minus.$tail; # preliminary
+        if ($in_comment > 0) {
+            report("unexpected '/*' inside multi-line comment");
+        } elsif ($tail =~ m|^(.*?)\*/(.*)$|) { # comment end: */ on same line
+            report("unexpected '/*' inside intra-line comment") if $1 =~ /\/\*/;
+            # blind comment text, preserving length and spaces
+            ($cmt_text, my $rest) = ($opt_minus.$1, $2);
+            $_ = "$head@@".blind_nonspace($cmt_text)."@@".$rest;
+            goto MATCH_COMMENT;
+        } else { # begin of multi-line comment
+            my $self_test_exception = $self_test ? "(@\d?)?" : "";
+            report("text after '/*' in multi-line comment")
+                unless $tail =~ m/^$self_test_exception.?\s*$/;
+            # tail not essentially empty, first char already checked
+            # adapt to actual indentation of first line
+            $comment_indent = length($head) + 1;
+            $_ = "$head@@".blind_nonspace($cmt_text);
+            $in_comment = 1;
+            $leading_comment = $head =~ m/^\s*$/; # there is code before beginning delimiter
+            $formatted_comment = $opt_minus eq "-";
+        }
+    }
+
+    if ($in_comment > 1) { # still inside multi-line comment (not at its begin or end)
+        m/^(\s*)\*?(\s*)(.*)$/;
+        $_ = $1."@".$2.blind_nonspace($3);
+    }
+
+    # handle special case of line after '#ifdef __cplusplus' (which typically appears in header files)
+    if ($ifdef__cplusplus) {
+        $ifdef__cplusplus = 0;
+        $_ = "$1 $2" if $contents =~ m/^(\s*extern\s*"C"\s*)\{(\s*)$/; # ignore opening brace in 'extern "C" {'
+        goto LINE_FINISHED if m/^\s*\}\s*$/; # ignore closing brace '}'
+    }
+
+    # check for over-long lines,
+    # while allowing trailing (also multi-line) string literals to go past $max_length
+    my $len = length; # total line length (without trailing '\n')
+    if ($len > $max_length &&
+        !(m/^(.*)"[^"]*"\s*[\)\}\]]*[,;]?\s*$/ # string literal terminated by '"' (or '\'), then maybe )}],;
+          && length($1) < $max_length)
+        # this allows over-long trailing string literals with beginning col before $max_length
+        ) {
+        report("line length = $len > ".MAX_LINE_LENGTH);
+    }
+
+    # handle C++ / C99 - style end-of-line comments
+    if (my ($head, $cmt_text) = m|^(.*?)//(.*$)|) {
+        report("'//' end-of-line comment");  # the '//' comment style is not allowed for C90
+        # blind comment text, preserving length and spaces
+        $_ = "$head@@".blind_nonspace($cmt_text);
+    }
+
+    # at this point all non-space portions of any types of comments have been blinded as @
+
+    goto LINE_FINISHED if m/^\s*$/; # essentially empty line: just whitespace (and maybe a trailing '\')
+
+    # intra-line whitespace nits @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+    my $in_multiline_comment = ($in_comment > 1 || $in_comment < 0); # $in_multiline_comment refers to line before
+    if (!$sloppy_SPC && !($in_multiline_comment && $formatted_comment)) {
+        sub dbl_SPC {
+            my $intra_line = shift;
+            return "double SPC".($intra_line =~ m/@\s\s/ ?
+                                 $in_comment != 0 ? " in multi-line comment"
+                                                  : " in intra-line comment" : "");
+        }
+        sub split_line_head {
+            my $comment_symbol =
+                $in_comment != 0 ? "@" : ""; # '@' will match the blinded leading '*' in multi-line comment
+                                             # $in_comment may pertain to the following line due to delayed check
+            # do not check for double SPC in leading spaces including any '#' (or '*' within multi-line comment)
+            shift =~ m/^(\s*([#$comment_symbol]\s*)?)(.*?)\s*$/;
+            return ($1, $3);
+        }
+        my ($head , $intra_line ) = split_line_head($_);
+        my ($head1, $intra_line1) = split_line_head($contents_before_ ) if $line_before > 0;
+        my ($head2, $intra_line2) = split_line_head($contents_before_2) if $line_before2 > 0;
+        if ($line_before > 0) { # check with one line delay, such that at least $contents_before is available
+            sub column_alignments_only {
+                my $head = shift;
+                my $intra = shift;
+                my $contents = shift;
+                # check if all double SPC in $intra is used only for multi-line column alignment with $contents
+                my $offset = length($head);
+                for (my $col = 0; $col < length($intra) - 2; $col++) {
+                   return 0 if substr($intra   , $col, 3) =~ m/\s\s\S/ # double space (after leading space)
+                          && !(substr($contents, $col + $offset + 1, 2) =~ m/\s\S/)
+                }
+                return 1;
+            }
+            report_flexibly($line_before, dbl_SPC($intra_line1), $contents_before) if $intra_line1 =~ m/\s\s\S/ &&
+               !(    column_alignments_only($head1, $intra_line1, $_                )    # compare with $line
+                 || ($line_before2 > 0 &&
+                     column_alignments_only($head1, $intra_line1, $contents_before_2))); # compare w/ $line_before2
+            report(dbl_SPC($intra_line)) if $intra_line  =~ m/\s\s\S/ && eof
+                && ! column_alignments_only($head , $intra_line , $contents_before_ )  ; # compare w/ $line_before
+        } elsif (eof) { # special case: just one line exists
+            report(dbl_SPC($intra_line)) if $intra_line  =~ m/\s\s\S/;
+        }
+        # ignore paths in #include
+        $intra_line =~ s/^(include\s*)(".*?"|<.*?>)/$1/e if $head =~ m/#/;
+        # treat op= and comparison operators as simple '=', simplifying matching below
+        $intra_line =~ s/([\+\-\*\/\/%\&\|\^\!<>=]|<<|>>)=/=/g;
+        # treat (type) variables within macro, indicated by trailing '\', as 'int' simplifying matching below
+        $intra_line =~ s/[A-Z_]+/int/g if $contents =~ m/^(.*?)\s*\\\s*$/;
+        # treat double &&, ||, <<, and >> as single ones, simplifying matching below
+        $intra_line =~ s/(&&|\|\||<<|>>)/substr($1, 0, 1)/eg;
+        # remove blinded comments etc. directly before ,;)}
+        while ($intra_line =~ s/\s*@+([,;)}\]])/$1/e) {} # /g does not work here
+        # treat remaining blinded comments and string literal contents as (single) space during matching below
+        $intra_line =~ s/@+/ /g;                     # note that double SPC has already been handled above
+        $intra_line =~ s/\s+$//;                     # strip any (resulting) space at EOL
+        $intra_line =~ s/(for\s*\();;(\))/"$1$2"/eg; # strip ';;' in for (;;)
+        $intra_line =~ s/(=\s*)\{ /"$1@ "/eg;        # do not report {SPC in initializers such as ' = { 0, };'
+        $intra_line =~ s/, \};/, @;/g;               # do not report SPC} in initializers such as ' = { 0, };'
+        report("SPC before '$1'") if $intra_line =~ m/[\w)\]]\s+(\+\+|--)/;  # postfix ++/-- with preceding space
+        report("SPC after '$1'")  if $intra_line =~ m/(\+\+|--)\s+[a-zA-Z_(]/; # prefix ++/-- with following space
+        $intra_line =~ s/\.\.\./@/g;                 # blind '...'
+        report("SPC before '$1'") if $intra_line =~ m/\s(\.|->)/;            # '.' or '->' with preceding space
+        report("SPC after '$1'")  if $intra_line =~ m/(\.|->)\s/;            # '.' or '->' with following space
+        $intra_line =~ s/\-\>|\+\+|\-\-/@/g;         # blind '->,', '++', and '--'
+        report("SPC before '$2'")     if $intra_line =~ m/[^:]\s+(;)/;       # space before ';' but not after ':'
+        report("SPC before '$1'")     if $intra_line =~ m/\s([,)\]])/;       # space before ,)]
+        report("SPC after '$1'")      if $intra_line =~ m/([(\[~!])\s/;      # space after ([~!
+        report("SPC after '$1'")      if $intra_line =~ m/(defined)\s/;      # space after 'defined'
+        report("no SPC before '=' or '<op>='") if $intra_line =~ m/\S(=)/;   # '=' etc. without preceding space
+        report("no SPC before '$1'")  if $intra_line =~ m/\S([|\/%<>^\?])/;  # |/%<>^? without preceding space
+        # TODO ternary ':' without preceding SPC, while allowing no SPC before ':' after 'case'
+        report("no SPC before '$1'")  if $intra_line =~ m/[^\s{()\[]([+\-])/;# +/- without preceding space or {()[
+                                                                             # or ')' (which is used f type casts)
+        report("no SPC before '$1'")  if $intra_line =~ m/[^\s{()\[*]([*])/; # '*' without preceding space or {()[*
+        report("no SPC before '$1'")  if $intra_line =~ m/[^\s{()\[]([&])/;  # '&' without preceding space or {()[
+        report("no SPC after ternary '$1'") if $intra_line =~ m/(:)[^\s\d]/; # ':' without following space or digit
+        report("no SPC after '$1'")   if $intra_line =~ m/([,;=|\/%<>^\?])\S/; # ,;=|/%<>^? without following space
+        report("no SPC after binary '$1'") if $intra_line=~m/([*])[^\sa-zA-Z_(),*]/;# '*' w/o space or \w(),* after
+        # TODO unary '*' must not be followed by SPC
+        report("no SPC after binary '$1'") if $intra_line=~m/([&])[^\sa-zA-Z_(]/;  # '&' w/o following space or \w(
+        # TODO unary '&' must not be followed by SPC
+        report("no SPC after binary '$1'") if $intra_line=~m/([+\-])[^\s\d(]/;  # +/- w/o following space or \d(
+        # TODO unary '+' and '-' must not be followed by SPC
+        report("no SPC after '$2'")   if $intra_line =~ m/(^|\W)(if|while|for|switch|case)[^\w\s]/; # kw w/o SPC
+        report("no SPC after '$2'")   if $intra_line =~ m/(^|\W)(return)[^\w\s;]/;  # return w/o SPC or ';'
+        report("SPC after function/macro name")
+                                      if $intra_line =~ m/(\w+)\s+\(/        # fn/macro name with space before '('
+       && !($1 =~ m/^(if|while|for|switch|return|typedef|void|char|unsigned|int|long|float|double)$/) # not keyword
+                                    && !(m/^\s*#\s*define\s/); # we skip macro definitions here because macros
+                                    # without parameters but with body beginning with '(', e.g., '#define X (1)',
+                                    # would lead to false positives - TODO also check for macros with parameters
+        report("no SPC before '{'")   if $intra_line =~ m/[^\s{(\[]\{/;      # '{' without preceding space or {([
+        report("no SPC after '}'")    if $intra_line =~ m/\}[^\s,;\])}]/;    # '}' without following space or ,;])}
+    }
+
+    # preprocessor directives @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+    # handle preprocessor directives
+    if (m/^\s*#(\s*)(\w+)/) { # line beginning with '#'
+        my $space_count = length($1); # maybe could also use indentation before '#'
+        my $directive = $2;
+        report("indent = $count != 0 for '#'") if $count != 0;
+        $directive_nesting-- if $directive =~ m/^(else|elif|endif)$/;
+        if ($directive_nesting < 0) {
+            $directive_nesting = 0;
+            report("unexpected '#$directive'");
+        }
+        report("'#' directive nesting = $space_count != $directive_nesting") if $space_count != $directive_nesting;
+        $directive_nesting++ if $directive =~ m/^if|ifdef|ifndef|else|elif$/;
+        $ifdef__cplusplus = m/^\s*#\s*ifdef\s+__cplusplus\s*$/;
+        goto POSTPROCESS_DIRECTIVE unless $directive =~ m/^define$/; # skip normal code handling except for #define
author	Dr. David von Oheimb <David.von.Oheimb@siemens.com>	2020-03-09 11:03:21 +0100
committer	Dr. David von Oheimb <David.von.Oheimb@siemens.com>	2020-03-09 11:03:21 +0100
commit	99a16e0459e5089c2cfb92ee775f1221a51b8d05 (patch)
tree	4037504638169aad9004ad8850515d15f6c250e5 /util/check-format.pl
parent	c518117b99bc4aad62990e8a31b7bc1dae06d16c (diff)