#! /usr/bin/perl
#   Since abinit ignores any variable that it cannot identify in the input
# file, and uses default values that exist for many variables, mistyped
# variable names go undetected and can yield wrong results.
#   Developped from an idea of J Zwanziger, this script checks the variables
# names in an abinit input file against the complete list from the file
# Infos/keyhr.html, displays all the names that cannot be identified and
# returns their count.
#
# Copyright (C) 2005 ABINIT group (LSi)
# This file is distributed under the terms of the
# GNU General Public License, see ~ABINIT/Infos/copyright
# or http://www.gnu.org/copyleft/gpl.txt .
# For the initials of contributors, see ~ABINIT/Infos/contributors .
#
# NOTES : 1) this script is dependent of the file keyhr.html format;
# 2) under Unix, a chkinabi  script will be automatically generated by 
# the command  make perl  in the ~ABINIT directory.
#
# USAGE :
# unix shell: chkinabi [<-m abinit> | -m MODULE ] [-i] inputfile
# Windows DOS box: [perl] chkinabi.pl [<-m abinit> | -m MODULE] [-i] inputfile
#   switch -i will include some internal variables (with % sign)
# By default, this script assumes that the input file is to be used by the
# "abinit" module. Another module than "abinit" can be specified using the
# -m MODULE option. The file Infos/MODULE_help.html must exist and should
# contain the complete list of variables for MODULE with the same format than
# the file Infos/keyhr.html . At the time of the first version of the present
# script, only "aim" and "anadbb" comply to this requirement.
#
#   WARNING ! Some input variables are not defined in input files for serie
# v2 tests #83,90-97, serie v4 tests #06,52,54,61-64,71,...
#
$, = ' ';               # set output field separator
$\ = "\n";              # set output record separator

$debug = 0;		# verbose mode defaults to off
# $debug = 2;	# 2 for verbose, 3 for intensive debugging
# The following file holds the complete list of input variables for abinit
$Module = 'abinit';	# default module
# analyze parameters
$internal = 0;		# flag for %internal variables (default is exclude)
$CurArg = 0;
if ($ARGV[$CurArg] eq '-m') {	# check variables list for another module ?
  $Module = $ARGV[$CurArg+1];	# aim, anaddb ...
  $CurArg +=2;
  }
$ModHelp = $Module eq 'abinit' ? 'keyhr' : $Module.'_help';  # help file
if ($ARGV[$CurArg] eq '-i') {	# include %internal variables
  $internal = 1;		# turn on flag
  $CurArg ++;
  }
if ($ARGV[$CurArg] eq '') {
  print "Missing $Module input file name";
  exit 28;
  }
$inputFile = $ARGV[$CurArg];
if (! -f $inputFile) {
  print "Input file $inputFile for $Module does not exist";
  exit 28;
  }
$InVarsFile = "Infos/$ModHelp.html";

# if the Infos/Module_Help.html file cannot be found, try from the
# parent and grandparent directories in case the current one is a tests one.
if (! -f $InVarsFile) {
  if (-f "../$InVarsFile") {
    $InVarsFile = "../$InVarsFile";
    }
  elsif (-f "../../$InVarsFile") {
    $InVarsFile = "../../$InVarsFile";
    }
  else {
    print "Invalid module $Module, file $InVarsFile does not exist";
    exit 24;
    }
  }
# read the file with input variables and build list
$rc = open(VARS,"<$InVarsFile");
if ($rc eq '') {
  print "Error opening file $InVarsFile";
  exit 24;
  }
$linect = 0;		# line counter
$initVars = 0;		# count of variables with same initial letter
$initial = ' ';		# no initial letter found yet, search title
$globVars = 0;		# complete list variables count
@Input_vars = ();	# complete list of variables
#   Since a variable name in the input file normally begins with a letter, some
# variables can have multiple values, and multiple variables can be defined in
# a single line, the input file will be scanned for words beginning with a
# letter and check they are present in the complete list.
#   Some alphabetic keywords, non-atomic units,.. in input files might be
# mistaken for a variable, (see abinis_help.html, e.g. eV in test v3#31).
# They should be listed in the following array:
@keywds = ('eV','ev','Ry','ry','K','Angstrom','angstrom','Hartree','hartree','Bohr','bohr','au');
#   Since some variables can be defined in the input files with a 1-2 digits
# numeric suffix (e.g. densty in test v1#68) that is missing in the complete
# input variables list, the following sublist will contain those which full name
# also ends with a number (e.g. exchn2n3 defined in test v4#92) :
@numNdVars = ();
#   Some variables can also be suffixed with :  + or * to define the starting
# value, the increment for an arithmetic serie or for a geometric one (e.g.
# v1#64,74).
#   Some input variables have a value of type string that can begin with a
# letter (e.g. cmlfile defined in test v3#68) and so be erronously considered as
# an undefined variable. Before those uncommon variables can be detected
# automatically, they will have to be EXPLICITELY listed in the following array:
@StringValVar = ('cmlfile','xcname','xcname3');
#   Finally, there are also some special cases as those using the sqrt(...)
# function (e.g. test v3#42).
#
while ($_ = <VARS>) {
  $linect ++;
  chop $_;
  if ($initial eq ' ') {
    next if (! /Alphabetical list of .*input variables/);
    print "Alphabetical list found at line $linect" if ($debug >= 2);
    $initial = '/a';	# magic string found, search A
    }
  elsif ($initial eq '/a') {
    if ($_ eq ' <p>A.' || $_ eq ' <br>A.') {
      $initial = 'A';
      print "Letter $initial found at line $linect, previous count= $initVars" if ($debug >= 2);
      }
    next;
    }
  else {
    if (/(^ <)(br|p)(>[B-Z]\.)/) {	# next letter
        $initial = substr($3,1,1);
        print "Letter $initial found at line $linect, previous count= $initVars" if ($debug >= 3);
        $initVars = 0;
      }
    elsif (/^ (%?)(<a href=")(.*\.html#)(.*">)(.*)(<\/a>)(.*)/) {
      $IntVar = $1;	# '%' or ''
# multiple variables sometimes form a coma-separated list (e.g. cpus, cpum,...)
      $varlist = $5;
      while ($varlist =~ /(\s*)(\S*)(\s*,\s*)(.*)/ ){
	$varname = "$IntVar$2";
	&NewVar($varname);
	$varlist = $4;
        }
      if ($varlist ne '') {
	$varname = "$IntVar$varlist";
	&NewVar($varname);
        }
      }
    }
  }
close (VARS);
if ($debug >= 2) {
  print "Total variables found: $globVars";
  print @Input_vars if ($debug >= 3);
  print "Numeric-ending variables: @numNdVars";
  print "Non-atomic units: @keywds";
  }
if ($globVars == 0) {
  print "Error, no variables found for $Module in $InVarsFile";
  exit 24;
  }
#
$rc = open(INPUT,"<$inputFile");
if ($rc eq '') {
  print "Error opening file $inputFile";
  exit 28;
  }
# read the input file
$linect = 0;		# line counter
$UnknVars = ();		# unrecognized variables list
$UnknCnt = 0;		# unrecognized variables count
while ($_ = <INPUT>) {
  $linect ++;
  chop $_;
# handle in-line comments
  $iUXcom = index($_,'#');	# unix comment
  $iFTcom = index($_,'!');	# Fortran comment
# if both # and ! are present, consider the first one as comment delimitor
# e.g. in Test_v2/t05.in and Tutorial/t31.in
  if ($iFTcom < 0 || ($iFTcom >= 0 && $iUXcom >= 0 && $iFTcom > $iUXcom)) {
    $ixCom = $iUXcom;
    }
  else {
    $ixCom = $iFTcom;
    }
  $_ = substr ($_,0,$ixCom) if ($ixCom >= 0);	# drop in-line comment
  @Token_List = split(' ',$_);
  $stringvalue = 0;
  TOKEN:
  foreach $token (@Token_List) {
    if ($stringvalue != 0) {	# possible string value of previous variable
      $stringvalue = 0;
      next;    			# skip string
      }
    next if ($token !~ /^[a-z].*/);	# drop what doesn't begin with a letter
    foreach $var (@StringValVar) {	# check against vars with string value
      if ($token eq $var) {
        $stringvalue = 1;
	print "Variable $token with string value found at line $linect" if ($debug >= 2);
        next TOKEN;
        }
      }
    $radix = $token;
# The following pattern matching test will detect double indices (loop)
# with metacharacter (i.e. 1-9) used for the first one and both arithmetic/
# geometric series values or 1-9 sequence for the second one
# Examples can be found in tests v1 #83, 84, 89 input files.
    if ($token =~ /(.*)(\?[:+*1-9]$)/) {
      $radix = $1;
      print "Double indices with 1st metachar suffix dropped for $token found at line $linect" if ($debug >= 2);
      }
# The following pattern matching test will detect double indices (loop)
# with metacharacter (i.e. 1-9) used for the second one and both arithmetic/
# geometric series values or 1-9 sequence for the first one
    elsif ($token =~ /(.*)([:+*1-9]\?$)/) {
      $radix = $1;
      print "Double indices with 2nd metachar suffix dropped for $token found at line $linect" if ($debug >= 2);
      }
# The following pattern matching test includes both arrays of single indices
# defined by jdtset with values 1-99 or arrays with double indices (loop)
# defined by udtset(2) with values 1-9. The coherence between ndtset and jdtset
# (or udtset) is NOT CHECKED here (e.g. "token10" is a valid variable name
# when jdtset is defined but invalid with udtset), e.g. test v1 #68.
    elsif ($token =~ /(.*\D)(\d{1,2}$)/ ) {	# ending with 1 or 2 digit(s) ?
      foreach $var (@numNdVars) {	# check against digit-ending variables
        if ($token eq $var) {
	  print "Digit-ending variable $token found at line $linect" if ($debug >= 2);
          next TOKEN;
          }
        }
      $radix = $1;		# drop numeric suffix (for multiple data sets)
      print "Numeric suffix dropped for $token found at line $linect" if ($debug >= 2);
      }
# The following pattern matching test will detect arithmetic/geometric series
# starting value or increment/ratio, e.g. tests v1 #64,74,78
    elsif ($token =~ /(.*)([:+*]$)/) {	# special suffixes : + *
      $radix = $1;
      print "Serie suffix dropped for $token found at line $linect" if ($debug >= 2);
      }
    $found = '';
    foreach $var (@Input_vars) {	# check against complete variables list
      if ($var =~ /^\%.*/) {		# handle internal variable
        if ($internal == 1) {
	  $var = substr($var,1);	# drop % to compare
	  }
	else {
	  next;			# otherwise, skip
	  }
	}
      if ($radix eq $var) {
        $found = "Normal variable $radix";	# hit
	last;
        }
      }
# check against keywords, non-atomic units, ...
    foreach $var (@keywds) {
      if ($token eq $var) {
        $found = "Keyword/unit $token";		# hit
	last;
        }
      }
# handle special cases; sqrt
    if ( $token =~ /sqrt\(.*\)/ ) {
      $found = "Special $token";
      }
#
    if ($found eq '') {
      print "Error, unable to identify variable $token at line $linect";
      @UnknVars = (@UnknVars,$token);
      $UnknCnt ++;
      }
    elsif ($debug >= 3) {
      print "$found at line $linect found in list";
      }
    }
  }
close (INPUT);
#
print "Processing completed for $inputFile, $UnknCnt error(s) detected";
print "Unidentified variables:",@UnknVars if ($UnknCnt > 0);
exit ($UnknCnt);
#
# subroutine NewVar
# 	do more checking on new variable, add to list, update counters
sub NewVar {
  local ($variable) = @_;
  print "Variable $variable at line $linect added to list" if ($debug >= 3);
# add to list of variables which name ends with a number if this applies
  @numNdVars = (@numNdVars,$variable) if ($variable =~ /.*\d$/);
# add to complete list of variables
  @Input_vars = (@Input_vars,$variable);
# bump counters
  $initVars ++;
  $globVars ++;
  return;
  }
