util/kconfig_lint: Turn handle_expressions() into a parser

I wished there was a way to do this in smaller steps, but with
every line fixed an error somewhere else became visible. Here
is a (probably incomplete) list of the issues:

* Only one set of parentheses was supported. This is a hard
  to solve problem without a real parser (one solution is to
  use an recursive RE, see below).

* The precedence order was wrong. Might have been adapted just
  to give a positive result for the arbitrary state of the tree.

* Numbered match variables (e.g. $1, $2, etc.) are not local.
  Calling handle_expressions() recursively once with $1, then
  with $2, resulted in using the final $2 after the first
  recursive call (garbage, practically).

Also, symbol and expression parsing was mixed, making things
harder to follow.

To remedy the issues:

* Split handle_symbol() out. It is called with whitespace
  stripped, to keep the uglier REs in handle_expressions().

* Match balanced parentheses and quotes when splitting
  expressions. In this recursive RE

    /(\((?:[^\(\)]++|(?-1))*\))/

  the `(?-1)` references the outer-most group, thus the whole
  expression itself. So it matches a pair of parentheses with
  a mix of non-parentheses and the recursive rule itself inside.
  This allows us to:

* Order the expression matches according to their precedence
  rules. Now we can match `<expr> '||' <expr>` first as we should
  and everything else falls into its place.

* Remove the bail-out that silenced the undefined behavior.

Change-Id: Ibc1be79adc07792f0721f0dc08b50422b6da88a9
Signed-off-by: Nico Huber <nico.h@gmx.de>
Reviewed-on: https://review.coreboot.org/c/coreboot/+/52067
Tested-by: build bot (Jenkins) <no-reply@coreboot.org>
Reviewed-by: Angel Pons <th3fanbus@gmail.com>
diff --git a/util/lint/kconfig_lint b/util/lint/kconfig_lint
index 43a3ca6..7f042f6 100755
--- a/util/lint/kconfig_lint
+++ b/util/lint/kconfig_lint
@@ -947,57 +947,74 @@
 }
 
 #-------------------------------------------------------------------------------
-# handle_expressions - log which symbols are being used
+# handle_symbol - log which symbols are being used
+#-------------------------------------------------------------------------------
+sub handle_symbol {
+    my ( $symbol, $filename, $line_no ) = @_;
+
+    #filter constant symbols first
+    if ( $symbol =~ /^[yn]$/ ) {                    # constant y/n
+        return;
+    }
+    if ( $symbol =~ /^-?(?:0x)?\p{XDigit}+$/ ) {    # int/hex values
+        return;
+    }
+    if ( $symbol =~ /^"[^"]*"$/ ) {                 # string values
+        return;
+    }
+
+    if ( $symbol =~ /^([A-Za-z0-9_]+)$/ ) {         # actual symbol
+        add_referenced_symbol( $1, $filename, $line_no, 'expression' );
+    }
+    else {
+        show_error("Unrecognized expression: expected symbol, "
+                    . "found '$symbol' in $filename line $line_no.");
+    }
+}
+
+#-------------------------------------------------------------------------------
+# handle_expressions - find symbols in expressions
 #-------------------------------------------------------------------------------
 sub handle_expressions {
     my ( $exprline, $inside_config, $filename, $line_no ) = @_;
 
-    return unless ($exprline);
+    my $strip = qr/\s*(.*[^\s]+)\s*/;
 
-    #filter constant symbols first
-    if ( $exprline =~ /^\s*"?([yn])"?\s*$/ ) {    # constant y/n
-        return;
-    }
-    elsif ( $exprline =~ /^\s*"?((?:-)\d+)"?\s*$/ ) {    # int values
-        return;
-    }
-    elsif ( $exprline =~ /^\s*"?((?:-)?(?:0x)?\p{XDigit})+"?\s*$/ ) {    # hex values
-        return;
-    }
-    elsif ( $exprline =~ /^\s*("[^"]*")\s*$/ ) {                         # String values
-        return;
-    }
-    elsif ( $exprline =~ /^\s*([A-Za-z0-9_]+)\s*$/ ) {                   # <symbol>                             (1)
-        add_referenced_symbol( $1, $filename, $line_no, 'expression' );
-    }
-    elsif ( $exprline =~ /^\s*!(.+)$/ ) {                                # '!' <expr>                           (5)
+    my $parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
+    my $quotes = qr/"[^"]*"/;
+    my $balanced = qr/((?:$parens|$quotes|[^\(\)"])+)/;
 
+    if ( $exprline =~ /^\s*$balanced\s*(?:\|\||&&)\s*(.+)$/ ) {
+        # <expr> '||' <expr>, <expr> '&&' <expr>                  (7)(6)
+        my ( $lhs, $rhs ) = ( $1, $3 );
+        handle_expressions( $lhs, $inside_config, $filename, $line_no );
+        handle_expressions( $rhs, $inside_config, $filename, $line_no );
+    }
+    elsif ( $exprline =~ /^\s*!(.+)$/ ) {
+        # '!' <expr>                                                 (5)
         handle_expressions( $1, $inside_config, $filename, $line_no );
     }
-    elsif ( $exprline =~ /^\s*\(([^)]+)\)\s*$/ ) {                       # '(' <expr> ')'                       (4)
+    elsif ( $exprline =~ /^\s*$parens\s*$/ ) {
+        # '(' <expr> ')'                                             (4)
+        $exprline =~ /^\s*\((.*)\)\s*$/;
         handle_expressions( $1, $inside_config, $filename, $line_no );
     }
-    elsif ( $exprline =~ /^\s*(.+)\s*!=\s*(.+)\s*$/ ) {                  # <symbol> '!=' <symbol>               (3)
-        handle_expressions( $1, $inside_config, $filename, $line_no );
-        handle_expressions( $2, $inside_config, $filename, $line_no );
+    elsif ( $exprline =~ /^\s*($quotes|[^"\s]+)\s*!=$strip$/ ) {
+        # <symbol> '!=' <symbol>                                     (3)
+        my ( $lhs, $rhs ) = ( $1, $2 );
+        handle_symbol( $lhs, $filename, $line_no );
+        handle_symbol( $rhs, $filename, $line_no );
     }
-    elsif ( $exprline =~ /^\s*(.+)\s*=\s*(.+)\s*$/ ) {                   # <symbol> '=' <symbol>                (2)
-        handle_expressions( $1, $inside_config, $filename, $line_no );
-        handle_expressions( $2, $inside_config, $filename, $line_no );
+    elsif ( $exprline =~ /^\s*($quotes|[^"\s]+)\s*=$strip$/ ) {
+        # <symbol> '=' <symbol>                                      (2)
+        my ( $lhs, $rhs ) = ( $1, $2 );
+        handle_symbol( $lhs, $filename, $line_no );
+        handle_symbol( $rhs, $filename, $line_no );
     }
-    elsif ( $exprline =~ /^\s*([^(]+|\(.+\))\s*&&\s*(.+)\s*$/ ) {        # <expr> '&&' <expr>                   (6)
-        handle_expressions( $1, $inside_config, $filename, $line_no );
-        handle_expressions( $2, $inside_config, $filename, $line_no );
+    elsif ( $exprline =~ /^$strip$/ ) {
+        # <symbol>                                                   (1)
+        handle_symbol( $1, $filename, $line_no );
     }
-    elsif ( $exprline =~ /^\s*([^(]+|\(.+\))\s*\|\|\s*(.+)\s*$/ ) {      # <expr> '||' <expr>                   (7)
-        handle_expressions( $1, $inside_config, $filename, $line_no );
-        handle_expressions( $2, $inside_config, $filename, $line_no );
-    }
-    else {
-        show_error("Unrecognized expression '$exprline' in $filename line $line_no.");
-    }
-
-    return;
 }
 
 #-------------------------------------------------------------------------------