MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: AnalyzeTextFilesData.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use Text::ParseWords;
  31 use Benchmark;
  32 use FileUtil;
  33 use TextUtil;
  34 use StatisticsUtil;
  35 
  36 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  37 
  38 # Autoflush STDOUT
  39 $| = 1;
  40 
  41 # Starting message...
  42 $ScriptName = basename($0);
  43 print "\n$ScriptName: Starting...\n\n";
  44 $StartTime = new Benchmark;
  45 
  46 # Get the options and setup script...
  47 SetupScriptUsage();
  48 if ($Options{help} || @ARGV < 1) {
  49   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  50 }
  51 
  52 my(@TextFilesList);
  53 @TextFilesList = ExpandFileNames(\@ARGV, "csv tsv");
  54 
  55 print "Processing options...\n";
  56 my(%OptionsInfo);
  57 ProcessOptions();
  58 
  59 # Collect column information for all the text files...
  60 print "Checking input text file(s)...\n";
  61 my(%TextFilesInfo);
  62 RetrieveTextFilesInfo();
  63 ProcessColumnsInfo();
  64 
  65 # Generate output files...
  66 my($FileIndex);
  67 if (@TextFilesList > 1) {
  68   print "\nProcessing text files...\n";
  69 }
  70 for $FileIndex (0 .. $#TextFilesList) {
  71   if ($TextFilesInfo{FileOkay}[$FileIndex]) {
  72     print "\nProcessing file $TextFilesList[$FileIndex]...\n";
  73     AnalyzeTextFile($FileIndex);
  74   }
  75 }
  76 print "\n$ScriptName:Done...\n\n";
  77 
  78 $EndTime = new Benchmark;
  79 $TotalTime = timediff ($EndTime, $StartTime);
  80 print "Total time: ", timestr($TotalTime), "\n";
  81 
  82 ###############################################################################
  83 
  84 # Analyze data...
  85 sub AnalyzeTextFile {
  86   my($Index) = @_;
  87   my($TextFile, $Line, $InDelim, $ColNum, $Value, @LineWords, @ColNumsToAnalyze, %ColValuesToAnalyzeMap);
  88 
  89   $TextFile = $TextFilesList[$Index];
  90   $InDelim = $TextFilesInfo{InDelim}[$Index];
  91   @ColNumsToAnalyze = @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]};
  92   %ColValuesToAnalyzeMap = ();
  93   for $ColNum (@ColNumsToAnalyze) {
  94     @{$ColValuesToAnalyzeMap{$ColNum}} = ();
  95   }
  96 
  97   my($LineCount, $InvalidLineCount, @InvalidColLabels);
  98 
  99   open TEXTFILE, "$TextFile" or die "Error: Can't open $TextFile: $! \n";
 100   # Skip over column labels line in text file and collect appropriate column data
 101   # for analysis...
 102   $Line = GetTextLine(\*TEXTFILE);
 103   $LineCount = 1;
 104   $InvalidLineCount = 0;
 105   while ($Line = GetTextLine(\*TEXTFILE)) {
 106     $LineCount++;
 107     @LineWords = quotewords($InDelim, 0, $Line);
 108     @InvalidColLabels = ();
 109     COLNUM: for $ColNum (@ColNumsToAnalyze) {
 110       $Value = $LineWords[$ColNum];
 111       if ($OptionsInfo{CheckData}) {
 112         if (!IsNumerical($Value)) {
 113           push @InvalidColLabels, $TextFilesInfo{ColLabels}[$Index][$ColNum];
 114           next COLNUM;
 115         }
 116       }
 117       push @{$ColValuesToAnalyzeMap{$ColNum}}, $Value;
 118     }
 119     if (@InvalidColLabels) {
 120       $InvalidLineCount++;
 121       if ($OptionsInfo{DetailLevel} >=4 ) {
 122         print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for column(s) - ", JoinWords(\@InvalidColLabels, ", ", 0)," - to be analyzed: $Line \n";
 123       }
 124       elsif ($OptionsInfo{DetailLevel} >= 3) {
 125         print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for column(s) - ", JoinWords(\@InvalidColLabels, ", ", 0)," - to be analyzed...\n";
 126       }
 127       elsif ($OptionsInfo{DetailLevel} >= 2) {
 128         print "Line number $LineCount contains ", scalar(@InvalidColLabels)," non-numerical or empty value(s) for columns to be analyzed...\n";
 129       }
 130     }
 131   }
 132   if ($InvalidLineCount && ($OptionsInfo{DetailLevel} >= 1)) {
 133     print "Non-numerical or empty data present in $InvalidLineCount line(s)...\n";
 134   }
 135   close TEXTFILE;
 136 
 137   # Perform the analysis...
 138   my(@SpecifiedFunctionNames, $SpecifiedFunction);
 139   @SpecifiedFunctionNames = ();
 140 
 141   for $SpecifiedFunction (@{$OptionsInfo{SpecifiedStatisticalFunctions}}) {
 142     if ($SpecifiedFunction !~ /^(Covariance|Correlation|Frequency|Rsquare|StandardScores|StandardScoresN)$/i) {
 143       push @SpecifiedFunctionNames, $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)};
 144     }
 145   }
 146   if (@SpecifiedFunctionNames) {
 147     PerformAnalysis($Index, \@SpecifiedFunctionNames, \%ColValuesToAnalyzeMap)
 148   }
 149   if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare})) {
 150     if ($OptionsInfo{AllColumnPairs}) {
 151       PerformMatrixAnalysis($Index, \%ColValuesToAnalyzeMap);
 152     }
 153     else {
 154       # Perform pairwise analysis for specified columns and write out calculated values - correlation
 155       # rsquare, or covariance - in the same file.
 156       PerformColumnPairAnalysis($Index, \%ColValuesToAnalyzeMap);
 157     }
 158   }
 159   if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscoresn}) ) {
 160     PerformStandardScoresAnalysis($Index, \%ColValuesToAnalyzeMap);
 161   }
 162   if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{frequency})) {
 163     PerformFrequencyAnalysis($Index, \%ColValuesToAnalyzeMap);
 164   }
 165 }
 166 
 167 # Calculate values for various statistical functions...
 168 sub PerformAnalysis {
 169   my($Index, $SpecifiedFunctionNamesRef, $ColValuesToAnalyzeMapRef) = @_;
 170   my($NewTextFile, $Line, $SpecifiedFunction, $Label, @ColLabels, @ColNumsToAnalyze);
 171 
 172   $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . $OptionsInfo{FileNameMode} . "." . $TextFilesInfo{OutFileExt}[$Index];
 173 
 174   print "Generating new text file $NewTextFile...\n";
 175   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 176 
 177   # Write out column labels...
 178   @ColLabels = ();
 179   push @ColLabels, "ColumnID";
 180   for $SpecifiedFunction (@{$SpecifiedFunctionNamesRef}) {
 181     $Label = $SpecifiedFunction;
 182     if ($SpecifiedFunction =~ /^(KLargest|KSmallest)$/i) {
 183       my($KthValue);
 184       $KthValue = ($SpecifiedFunction =~ /^KLargest$/i) ? $OptionsInfo{KLargest} : $OptionsInfo{KSmallest};
 185       $Label = AddNumberSuffix($KthValue) . "$SpecifiedFunction";
 186       $Label =~ s/K//g;
 187     }
 188     elsif ($SpecifiedFunction =~ /^TrimMean$/i) {
 189       $Label = "${SpecifiedFunction}($OptionsInfo{TrimFraction})";
 190     }
 191     push @ColLabels, $Label;
 192   }
 193   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 194   print NEWTEXTFILE "$Line\n";
 195 
 196   # Go over each column to be analyzed...
 197   @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
 198 
 199   # Turn off "strict"; otherwise, invoking statistical functions using function name string
 200   # is problematic.
 201   no strict;
 202 
 203   my($ColValuesRef, $ColNum, $Value, @RowValues, %CalculatedValues);
 204   %CalculatedValues = ();
 205   for $ColNum (@ColNumsToAnalyze) {
 206     @RowValues = ();
 207     # Setup column id...
 208     push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum];
 209     $ColValuesRef =  \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
 210     FUNCTIONNAME: for $SpecifiedFunction (@{$SpecifiedFunctionNamesRef}) {
 211       $Value = "";
 212       if (!@{$ColValuesToAnalyzeMapRef->{$ColNum}}) {
 213         # Invalid column values...
 214         push @RowValues, $Value;
 215         next FUNCTIONNAME;
 216       }
 217       if ($SpecifiedFunction =~ /^Count$/i) {
 218         $Value = @{$ColValuesToAnalyzeMapRef->{$ColNum}};
 219       }
 220       elsif ($SpecifiedFunction =~ /^KLargest$/i) {
 221         $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{KLargest});
 222       }
 223       elsif ($SpecifiedFunction =~ /^KSmallest$/i) {
 224         $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{KSmallest});
 225       }
 226       elsif ($SpecifiedFunction =~ /^StandardDeviation$/i) {
 227         if (exists($CalculatedValues{$ColNum}{StandardDeviation})) {
 228           $Value = $CalculatedValues{$ColNum}{StandardDeviation};
 229         }
 230         else {
 231           $Value = &$SpecifiedFunction($ColValuesRef);
 232           $CalculatedValues{$ColNum}{StandardDeviation} = $Value;
 233         }
 234       }
 235       elsif ($SpecifiedFunction =~ /^StandardError$/i) {
 236         if (!exists($CalculatedValues{$ColNum}{StandardDeviation})) {
 237           $Value = StandardDeviation($ColValuesRef);
 238           $CalculatedValues{$ColNum}{StandardDeviation} = $Value;
 239         }
 240         if (defined $CalculatedValues{$ColNum}{StandardDeviation}) {
 241           $Value = &$SpecifiedFunction($CalculatedValues{$ColNum}{StandardDeviation}, @{$ColValuesToAnalyzeMapRef->{$ColNum}});
 242         }
 243       }
 244       elsif ($SpecifiedFunction =~ /^TrimMean$/i) {
 245         $Value = &$SpecifiedFunction($ColValuesRef, $OptionsInfo{TrimFraction});
 246       }
 247       else {
 248         $Value = &$SpecifiedFunction($ColValuesRef);
 249       }
 250       # Format the output value. And add zero to get rid of tariling zeros...
 251       $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : "";
 252       push @RowValues, $Value;
 253     }
 254     $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 255     print NEWTEXTFILE "$Line\n";
 256   }
 257   close NEWTEXTFILE;
 258 }
 259 
 260 # Calculate covariance, correlation, rsquare for specified column pairs....
 261 sub PerformColumnPairAnalysis {
 262   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 263   my($NewTextFile, @ColLabels, $Line, $CalculateCorrelation, $CalculateRSquare, $CalculateCovariance);
 264   $CalculateCorrelation = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) ? 1 : 0;
 265   $CalculateRSquare = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) ? 1 : 0;
 266   $CalculateCovariance = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) ? 1 : 0;
 267 
 268   $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "ColumnPairsAnalysis." .  $TextFilesInfo{OutFileExt}[$Index];
 269   print "Generating new text file $NewTextFile...\n";
 270   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 271 
 272   # Write out the column labels...
 273   @ColLabels = ();
 274   push @ColLabels, ("ColumnID1", "ColumnID2");
 275   if ($CalculateCorrelation || $CalculateRSquare) {
 276     push @ColLabels, "Correlation";
 277     if ($CalculateRSquare) {
 278       push @ColLabels, "RSquare";
 279     }
 280   }
 281   if ($CalculateCovariance) {
 282     push @ColLabels, "Covariance";
 283   }
 284   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 285   print NEWTEXTFILE "$Line\n";
 286 
 287   # Go over each column pair...
 288   my($CorrelationValue, $RSquareValue, $CovarianceValue,  $ColIndex, $ColNum1, $ColNum2, $ColValuesRef1, $ColValuesRef2, @ColPairs1ToAnalyze, @ColPairs2ToAnalyze, @RowValues, $Value);
 289 
 290   @ColPairs1ToAnalyze = @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]};
 291   @ColPairs2ToAnalyze = @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]};
 292   for $ColIndex (0 .. $#ColPairs1ToAnalyze) {
 293     @RowValues = ();
 294     $ColNum1 = $ColPairs1ToAnalyze[$ColIndex];
 295     $ColNum2 = $ColPairs2ToAnalyze[$ColIndex];
 296     $ColValuesRef1 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum1}};
 297     $ColValuesRef2 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum2}};
 298 
 299     # Setup column ids...
 300     push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 301     push @RowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum2];
 302 
 303     if (@$ColValuesRef1 != @$ColValuesRef2) {
 304       # Print a warning...
 305       warn "Warning: Skipping analysis for column pair $TextFilesInfo{ColLabels}[$Index][$ColNum1], $TextFilesInfo{ColLabels}[$Index][$ColNum2]: Number of valid data values must be same.\n";
 306       if ($CalculateCorrelation || $CalculateRSquare) {
 307         push @RowValues, "";
 308         if ($CalculateRSquare) {
 309           push @RowValues, "";
 310         }
 311       }
 312       if ($CalculateCovariance) {
 313         push @RowValues, "";
 314       }
 315     }
 316     else {
 317       # Calculate appropriate value...
 318       if ($CalculateCorrelation || $CalculateRSquare) {
 319         $CorrelationValue = Correlation($ColValuesRef1, $ColValuesRef2);
 320         $Value = (defined($CorrelationValue) && length($CorrelationValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CorrelationValue) + 0) : "";
 321         push @RowValues, $Value;
 322         if ($CalculateRSquare) {
 323           $RSquareValue = (defined($CorrelationValue) && length($CorrelationValue)) ? ($CorrelationValue ** 2) : "";
 324           $Value = (length($RSquareValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $RSquareValue) + 0) : "";
 325           push @RowValues, $Value;
 326         }
 327       }
 328       if ($CalculateCovariance) {
 329         $CovarianceValue = Covariance($ColValuesRef1, $ColValuesRef2);
 330         $Value = (defined($CovarianceValue) && length($CovarianceValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CovarianceValue) + 0) : "";
 331         push @RowValues, $Value;
 332       }
 333     }
 334     $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 335     print NEWTEXTFILE "$Line\n";
 336   }
 337   close NEWTEXTFILE;
 338 }
 339 
 340 # Generate histogram numbers...
 341 sub PerformFrequencyAnalysis {
 342   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 343   my($NewTextFile, $ColLabel, @ColLabels, @RowValues, $Line, $ColNum, @ColNumsToAnalyze, $ColValuesRef, $BinValue, $FrequencyValue, $Value, %FrequencyMap);
 344 
 345   @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
 346   for $ColNum (@ColNumsToAnalyze) {
 347     $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . $TextFilesInfo{ColLabels}[$Index][$ColNum] . "FrequencyAnalysis." .  $TextFilesInfo{OutFileExt}[$Index];
 348     print "Generating new text file $NewTextFile...\n";
 349     open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 350 
 351     # Write out the column labels...
 352     @ColLabels = ();
 353     push @ColLabels , ("Bins", "Frequency");
 354     $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 355     print NEWTEXTFILE "$Line\n";
 356 
 357     #Calculate and write out frequency values...
 358     %FrequencyMap = ();
 359     $ColValuesRef =  \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
 360     if (@$ColValuesRef) {
 361       if (@{$OptionsInfo{BinRange}}) {
 362         %FrequencyMap = Frequency($ColValuesRef, \@{$OptionsInfo{BinRange}});
 363       }
 364       else {
 365         %FrequencyMap = Frequency($ColValuesRef, $OptionsInfo{NumOfBins});
 366       }
 367     }
 368     for $BinValue (sort { $a <=> $b }  keys %FrequencyMap) {
 369       $FrequencyValue = $FrequencyMap{$BinValue};
 370 
 371       @RowValues = ();
 372       $Value = (length($BinValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $BinValue) + 0) : "";
 373       push @RowValues, $Value;
 374       $Value = (length($FrequencyValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $FrequencyValue) + 0) : "";
 375       push @RowValues, $Value;
 376 
 377       $Line = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 378       print NEWTEXTFILE "$Line\n";
 379     }
 380     close NEWTEXTFILE;
 381   }
 382 }
 383 
 384 # Calculate covariance, correlation/rsquare matrices....
 385 sub PerformMatrixAnalysis {
 386   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 387   my($CorrelationTextFile, $CovarianceTextFile, $RSquareTextFile, $CalculateCorrelation, $CalculateRSquare, $CalculateCovariance);
 388 
 389   $CalculateCorrelation = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) ? 1 : 0;
 390   $CalculateRSquare = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) ? 1 : 0;
 391   $CalculateCovariance = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) ? 1 : 0;
 392 
 393   $CorrelationTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "CorrelationMatrix." .  $TextFilesInfo{OutFileExt}[$Index];
 394   $RSquareTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "RSquareMatrix." .  $TextFilesInfo{OutFileExt}[$Index];
 395   $CovarianceTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "CovarianceMatrix." .  $TextFilesInfo{OutFileExt}[$Index];
 396 
 397   my($TextFilesList, $Delimiter);
 398   $TextFilesList =  "";
 399   if ($CalculateCorrelation || $CalculateRSquare) {
 400     $TextFilesList = $CorrelationTextFile;
 401     if ($CalculateRSquare) {
 402       $TextFilesList .= ", $CorrelationTextFile";
 403     }
 404   }
 405   $Delimiter = length($TextFilesList) ? "," : "";
 406   if ($CalculateCovariance) {
 407     $TextFilesList .= "${Delimiter} ${CorrelationTextFile}";
 408   }
 409   if ($TextFilesList =~ /\,/) {
 410     print "Generating new text files $TextFilesList...\n"
 411   }
 412   else {
 413     print "Generating new text file $TextFilesList...\n"
 414   }
 415   if ($CalculateCorrelation || $CalculateRSquare) {
 416     open CORRELATIONTEXTFILE, ">$CorrelationTextFile" or die "Error: Can't open $CorrelationTextFile: $! \n";
 417     if ($CalculateRSquare) {
 418       open RSQUARETEXTFILE, ">$RSquareTextFile" or die "Error: Can't open $RSquareTextFile: $! \n";
 419     }
 420   }
 421   if ($CalculateCovariance) {
 422     open COVARIANCETEXTFILE, ">$CovarianceTextFile" or die "Error: Can't open $CovarianceTextFile: $! \n";
 423   }
 424 
 425   my($Line, $Value, $CorrelationValue, $RSquareValue, $CovarianceValue, $ColNum, $ColNum1, $ColNum2, $ColValuesRef1, $ColValuesRef2, @ColLabels, @CovarianceRowValues, @CorrelationRowValues, @RSquareRowValues);
 426 
 427   # Write out the column labels...
 428   @ColLabels = ();
 429   push @ColLabels, "";
 430   for $ColNum (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 431     push @ColLabels, $TextFilesInfo{ColLabels}[$Index][$ColNum];
 432   }
 433   $Line = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 434   if ($CalculateCorrelation || $CalculateRSquare) {
 435     print CORRELATIONTEXTFILE "$Line\n";
 436     if ($CalculateRSquare) {
 437       print RSQUARETEXTFILE "$Line\n";
 438     }
 439   }
 440   if ($CalculateCovariance) {
 441     print COVARIANCETEXTFILE "$Line\n";
 442   }
 443 
 444   # Due to symmetric nature of these matrices, only one half needs to be
 445   # calculated. So, just calculate the lower half and copy it to upper half...
 446   my(%CorrelationMatrixMap, %RSquareMatrixMap, %CovarianceMatrixMap);
 447 
 448   %CorrelationMatrixMap = (); %RSquareMatrixMap = (); %CovarianceMatrixMap = ();
 449   for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 450     for $ColNum2 (0 .. $ColNum1) {
 451       $ColValuesRef1 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum1}};
 452       $ColValuesRef2 =  \@{$ColValuesToAnalyzeMapRef->{$ColNum2}};
 453       if ($CalculateCorrelation || $CalculateRSquare) {
 454         $CorrelationValue = Correlation($ColValuesRef1, $ColValuesRef2);
 455         $CorrelationValue = (defined($CorrelationValue) && length($CorrelationValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CorrelationValue) + 0) : "";
 456         $CorrelationMatrixMap{$ColNum1}{$ColNum2} = $CorrelationValue;
 457         if ($ColNum1 != $ColNum2) {
 458           $CorrelationMatrixMap{$ColNum2}{$ColNum1} = $CorrelationValue;
 459         }
 460         if ($CalculateRSquare) {
 461           $RSquareValue = (defined($CorrelationValue) && length($CorrelationValue)) ? ($CorrelationValue ** 2) : "";
 462           $RSquareValue = (length($RSquareValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $RSquareValue) + 0) : "";
 463           $RSquareMatrixMap{$ColNum1}{$ColNum2} = $RSquareValue;
 464           if ($ColNum1 != $ColNum2) {
 465             $RSquareMatrixMap{$ColNum2}{$ColNum1} = $RSquareValue;
 466           }
 467         }
 468       }
 469       if ($CalculateCovariance) {
 470         $CovarianceValue = Covariance($ColValuesRef1, $ColValuesRef2);
 471         $CovarianceValue = (defined($CovarianceValue) && length($CovarianceValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $CovarianceValue) + 0) : "";
 472         $CovarianceMatrixMap{$ColNum1}{$ColNum2} = $CovarianceValue;
 473         if ($ColNum1 != $ColNum2) {
 474           $CovarianceMatrixMap{$ColNum2}{$ColNum1} = $CovarianceValue;
 475         }
 476       }
 477     }
 478   }
 479 
 480   # Write out the matrices...
 481   for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 482     @CorrelationRowValues = ();
 483     @RSquareRowValues = ();
 484     @CovarianceRowValues = ();
 485     if ($CalculateCorrelation || $CalculateRSquare) {
 486       push @CorrelationRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 487       if ($CalculateRSquare) {
 488         push @RSquareRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 489       }
 490     }
 491     if ($CalculateCovariance) {
 492       push @CovarianceRowValues, $TextFilesInfo{ColLabels}[$Index][$ColNum1];
 493     }
 494     for $ColNum2 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 495       if ($CalculateCorrelation || $CalculateRSquare) {
 496         push @CorrelationRowValues, $CorrelationMatrixMap{$ColNum1}{$ColNum2};
 497         if ($CalculateRSquare) {
 498           push @RSquareRowValues, $RSquareMatrixMap{$ColNum1}{$ColNum2};
 499         }
 500       }
 501       if ($CalculateCovariance) {
 502         push @CovarianceRowValues, $CovarianceMatrixMap{$ColNum1}{$ColNum2};
 503       }
 504     }
 505     if ($CalculateCorrelation || $CalculateRSquare) {
 506       $Line = JoinWords(\@CorrelationRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 507       print CORRELATIONTEXTFILE "$Line\n";
 508       if ($CalculateRSquare) {
 509         $Line = JoinWords(\@RSquareRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 510         print RSQUARETEXTFILE "$Line\n";
 511       }
 512     }
 513     if ($CalculateCovariance) {
 514       $Line = JoinWords(\@CovarianceRowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 515       print COVARIANCETEXTFILE "$Line\n";
 516     }
 517   }
 518   if ($CalculateCorrelation || $CalculateRSquare) {
 519     close CORRELATIONTEXTFILE;
 520     if ($CalculateRSquare) {
 521       close RSQUARETEXTFILE;
 522     }
 523   }
 524   if ($CalculateCovariance) {
 525     close COVARIANCETEXTFILE;
 526   }
 527 }
 528 
 529 # Calculate standard scores...
 530 sub PerformStandardScoresAnalysis {
 531   my($Index, $ColValuesToAnalyzeMapRef) = @_;
 532   my($StandardScores, $StandardScoresN, $NewTextFile, @ColLabels, $Label, $NewLine);
 533 
 534   $StandardScores = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) ? 1 : 0;
 535   $StandardScoresN = exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscoresn}) ? 1 : 0;
 536 
 537   $NewTextFile = $TextFilesInfo{OutFileRoot}[$Index] . "StandardScores." .  $TextFilesInfo{OutFileExt}[$Index];
 538   print "Generating new text file $NewTextFile...\n";
 539   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 540 
 541   my($ColValuesRef, $ColNum, @ColNumsToAnalyze);
 542   # Write out column labels...
 543   @ColLabels = ();
 544   @ColNumsToAnalyze = @{$TextFilesInfo{ColNumsToAnalyze}[$Index]};
 545   for $ColNum (@ColNumsToAnalyze) {
 546     $Label = $TextFilesInfo{ColLabels}[$Index][$ColNum];
 547     if ($StandardScores) {
 548       push @ColLabels, "${Label}\(StandardScores)";
 549     }
 550     if ($StandardScoresN) {
 551       push @ColLabels, "${Label}\(StandardScoresN)";
 552     }
 553   }
 554   $NewLine = JoinWords(\@ColLabels, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 555   print NEWTEXTFILE "$NewLine\n";
 556 
 557   # Go over each column to be analyzed and calculate standard deviation
 558   # and mean values...
 559   my(%StandardDeviationMap, %StandardDeviationNMap, %MeanMap);
 560   %StandardDeviationMap = ();
 561   %StandardDeviationNMap = ();
 562   %MeanMap = ();
 563   for $ColNum (@ColNumsToAnalyze) {
 564     $ColValuesRef =  \@{$ColValuesToAnalyzeMapRef->{$ColNum}};
 565     if (!exists($MeanMap{$ColNum})) {
 566       $MeanMap{$ColNum} = Mean($ColValuesRef);
 567     }
 568     if ($StandardScores) {
 569       if (!exists($StandardDeviationMap{$ColNum})) {
 570         $StandardDeviationMap{$ColNum} = StandardDeviation($ColValuesRef);
 571       }
 572     }
 573     if ($StandardScoresN) {
 574       if (!exists($StandardDeviationNMap{$ColNum})) {
 575         $StandardDeviationNMap{$ColNum} = StandardDeviationN($ColValuesRef);
 576       }
 577     }
 578   }
 579   #
 580   # Go over each row and calculate standard scores for each column
 581   # using (x[i] - mean) / (n - 1) for StandardScores and (x[i] - mean) / n
 582   # for StandardScoresN; write out the calculated values as well...
 583 
 584   my($TextFile, $InDelim, $Line, $Value, $ValueOkay, $ScoreValue, @RowValues, @LineWords);
 585   $TextFile = $TextFilesList[$Index];
 586   $InDelim = $TextFilesInfo{InDelim}[$Index];
 587 
 588   open TEXTFILE, "$TextFile" or die "Error: Can't open $TextFile: $! \n";
 589   $Line = GetTextLine(\*TEXTFILE);
 590   while ($Line = GetTextLine(\*TEXTFILE)) {
 591     @LineWords = quotewords($InDelim, 0, $Line);
 592     @RowValues = ();
 593     COLNUM: for $ColNum (@ColNumsToAnalyze) {
 594       $Value = $LineWords[$ColNum];
 595       $ValueOkay = ($OptionsInfo{CheckData} && !IsNumerical($Value)) ? 0 : 1;
 596       if ($StandardScores) {
 597         $ScoreValue = $ValueOkay ? (($Value - $MeanMap{$ColNum})/$StandardDeviationMap{$ColNum}) : "";
 598         $ScoreValue = (defined($ScoreValue) && length($ScoreValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $ScoreValue) + 0) : "";
 599         push @RowValues, $ScoreValue;
 600       }
 601       if ($StandardScoresN) {
 602         $ScoreValue = $ValueOkay ? (($Value - $MeanMap{$ColNum})/$StandardDeviationNMap{$ColNum}) : "";
 603         $ScoreValue = (defined($ScoreValue) && length($ScoreValue)) ? (sprintf("%.$OptionsInfo{Precision}f", $ScoreValue) + 0) : "";
 604         push @RowValues, $ScoreValue;
 605       }
 606     }
 607     $NewLine = JoinWords(\@RowValues, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 608     print NEWTEXTFILE "$NewLine\n";
 609   }
 610   close TEXTFILE;
 611   close NEWTEXTFILE;
 612 }
 613 
 614 # Make sure the specified columns exists in text files...
 615 sub ProcessColumnsInfo {
 616   my($Index, $TextFile, $ColNum, $NewColNum, $ColIndex, @ColNumsToAnalyze, %UniqueColNumsToAnalyzeMap);
 617 
 618   @{$TextFilesInfo{ColNumsToAnalyze}} = ();
 619   @{$TextFilesInfo{ColPairs1ToAnalyze}} = ();
 620   @{$TextFilesInfo{ColPairs2ToAnalyze}} = ();
 621   @{$TextFilesInfo{UniqueColNumsToAnalyze}} = ();
 622 
 623   FILELIST: for $Index (0 .. $#TextFilesList) {
 624     $TextFile = $TextFilesList[$Index];
 625 
 626     @{$TextFilesInfo{ColNumsToAnalyze}[$Index]} = ();
 627     @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]} = ();
 628     @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]} = ();
 629     @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]} = ();
 630 
 631     %UniqueColNumsToAnalyzeMap = ();
 632 
 633     if ($TextFilesInfo{FileOkay}[$Index]) {
 634       @ColNumsToAnalyze = ();
 635       if (@{$OptionsInfo{SpecifiedColumns}}) {
 636         if ($OptionsInfo{ColMode} =~ /^colnum$/i) {
 637           for $ColNum (@{$OptionsInfo{SpecifiedColumns}}) {
 638             if ($ColNum >=1 && $ColNum <= $TextFilesInfo{ColCount}[$Index]) {
 639               $NewColNum = $ColNum -1;
 640               push @ColNumsToAnalyze, $NewColNum;
 641             }
 642           }
 643         }
 644         else {
 645           my($ColLabel);
 646           for $ColLabel (@{$OptionsInfo{SpecifiedColumns}}) {
 647             if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel})) {
 648               push @ColNumsToAnalyze, $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel};
 649             }
 650           }
 651         }
 652       }
 653       elsif (defined  $OptionsInfo{Columns} && $OptionsInfo{Columns} =~ /^All$/i) {
 654         for $ColNum (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 655           push @ColNumsToAnalyze, $ColNum;
 656         }
 657       }
 658       else {
 659         push @ColNumsToAnalyze, 0;
 660       }
 661       if (@ColNumsToAnalyze) {
 662         push @{$TextFilesInfo{ColNumsToAnalyze}[$Index]}, @ColNumsToAnalyze;
 663         # Set up unique columns map as well...
 664         for $ColNum (@ColNumsToAnalyze) {
 665           if (!exists $UniqueColNumsToAnalyzeMap{$ColNum}) {
 666             $UniqueColNumsToAnalyzeMap{$ColNum} = $ColNum;
 667           }
 668         }
 669       }
 670       else {
 671         warn "Warning: Ignoring file $TextFile: None of the columns specified, @{$OptionsInfo{SpecifiedColumns}}, using \"--columns\" option exist.\n";
 672         $TextFilesInfo{FileOkay}[$Index] = 0;
 673         next FILELIST;
 674       }
 675       if (!$OptionsInfo{Overwrite} && exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{frequency})) {
 676         # Make sure specific frequency files don't exist...
 677         my($FrequencyFile);
 678         for $ColNum (@ColNumsToAnalyze) {
 679           $FrequencyFile = $TextFilesInfo{OutFileRoot}[$Index] . $TextFilesInfo{ColLabels}[$Index][$ColNum] . "FrequencyAnalysis." .  $TextFilesInfo{OutFileExt}[$Index];
 680           if (-e $FrequencyFile) {
 681             warn "Warning: Ignoring file $TextFile: The file $FrequencyFile already exists.\n";
 682             $TextFilesInfo{FileOkay}[$Index] = 0;
 683             next FILELIST;
 684           }
 685         }
 686       }
 687       # Setup specified column pairs...
 688       if (exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation} || exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance} || exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) {
 689         my(@ColPairsToAnalyze, $ColNum1, $ColNum2);
 690         if (@{$OptionsInfo{SpecifiedColumnPairs}}) {
 691           # Make sure both columns exist...
 692           if ($OptionsInfo{ColMode} =~ /^colnum$/i) {
 693             for ($ColIndex = 0; (($ColIndex + 1) < @{$OptionsInfo{SpecifiedColumnPairs}}); $ColIndex += 2 ) {
 694               $ColNum1 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex];
 695               $ColNum2 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex + 1];
 696               if ($ColNum1 >=1 && $ColNum1 <= $TextFilesInfo{ColCount}[$Index] && $ColNum2 >=1 && $ColNum2 <= $TextFilesInfo{ColCount}[$Index]) {
 697                 $ColNum1 -= 1;
 698                 $ColNum2 -= 1;
 699                 push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
 700               }
 701             }
 702           }
 703           else {
 704             my($ColLabel1, $ColLabel2);
 705             for ($ColIndex = 0; (($ColIndex + 1) < @{$OptionsInfo{SpecifiedColumnPairs}}); $ColIndex += 2 ) {
 706               $ColLabel1 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex];
 707               $ColLabel2 = $OptionsInfo{SpecifiedColumnPairs}[$ColIndex + 1];
 708               if (exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel1}) && exists($TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel2})) {
 709                 $ColNum1 = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel1};
 710                 $ColNum2 = $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel2};
 711                 push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
 712               }
 713             }
 714           }
 715         }
 716         elsif ($OptionsInfo{AllColumnPairs}) {
 717           for $ColNum1 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 718             for $ColNum2 (0 .. ($TextFilesInfo{ColCount}[$Index] - 1)) {
 719               push @ColPairsToAnalyze, ($ColNum1, $ColNum2);
 720             }
 721           }
 722         }
 723         else {
 724           if ($TextFilesInfo{ColCount}[$Index] >= 2) {
 725             push @ColPairsToAnalyze, (0,1);
 726           }
 727         }
 728         if (@ColPairsToAnalyze) {
 729           if (@ColPairsToAnalyze % 2) {
 730             warn "Warning: Ignoring file $TextFile: Invalid number of values specified using \"--columnpairs\" option: It must contain even number of valid values.\n";
 731             $TextFilesInfo{FileOkay}[$Index] = 0;
 732             next FILELIST;
 733           }
 734           else {
 735             for ($ColIndex = 0; $ColIndex < @ColPairsToAnalyze; $ColIndex += 2) {
 736               push @{$TextFilesInfo{ColPairs1ToAnalyze}[$Index]}, $ColPairsToAnalyze[$ColIndex];
 737               push @{$TextFilesInfo{ColPairs2ToAnalyze}[$Index]}, $ColPairsToAnalyze[$ColIndex + 1];
 738             }
 739             # Set up unique columns map as well...
 740             for $ColNum (@ColPairsToAnalyze) {
 741               if (!exists $UniqueColNumsToAnalyzeMap{$ColNum}) {
 742                 $UniqueColNumsToAnalyzeMap{$ColNum} = $ColNum;
 743               }
 744             }
 745           }
 746         }
 747       }
 748       # Setup uniques columns array...
 749       push @{$TextFilesInfo{UniqueColNumsToAnalyze}[$Index]}, (sort keys %UniqueColNumsToAnalyzeMap);
 750     }
 751   }
 752 }
 753 
 754 # Retrieve information about input text files...
 755 sub RetrieveTextFilesInfo {
 756   my($Index, $TextFile, $FileDir, $FileName, $FileExt, $InDelim, $Line, @ColLabels, $OutFileRoot,  $OutFile, $OutFileExt, $ColNum, $ColLabel);
 757 
 758   %TextFilesInfo = ();
 759 
 760   @{$TextFilesInfo{FileOkay}} = ();
 761   @{$TextFilesInfo{ColCount}} = ();
 762   @{$TextFilesInfo{ColLabels}} = ();
 763   @{$TextFilesInfo{ColLabelToNumMap}} = ();
 764   @{$TextFilesInfo{InDelim}} = ();
 765   @{$TextFilesInfo{OutFileRoot}} = ();
 766   @{$TextFilesInfo{OutFileExt}} = ();
 767 
 768   FILELIST: for $Index (0 .. $#TextFilesList) {
 769     $TextFile = $TextFilesList[$Index];
 770 
 771     $TextFilesInfo{FileOkay}[$Index] = 0;
 772     $TextFilesInfo{ColCount}[$Index] = 0;
 773     $TextFilesInfo{InDelim}[$Index] = "";
 774     $TextFilesInfo{OutFileRoot}[$Index] = "";
 775     $TextFilesInfo{OutFileExt}[$Index] = "";
 776 
 777     @{$TextFilesInfo{ColLabels}[$Index]} = ();
 778     %{$TextFilesInfo{ColLabelToNumMap}[$Index]} = ();
 779 
 780     if (!(-e $TextFile)) {
 781       warn "Warning: Ignoring file $TextFile: It doesn't exist\n";
 782       next FILELIST;
 783     }
 784     if (!CheckFileType($TextFile, "csv tsv")) {
 785       warn "Warning: Ignoring file $TextFile: It's not a csv or tsv file\n";
 786       next FILELIST;
 787     }
 788     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 789     if ($FileExt =~ /^tsv$/i) {
 790       $InDelim = "\t";
 791     }
 792     else {
 793       $InDelim = "\,";
 794       if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 795         warn "Warning: Ignoring file $TextFile: The value specified, $Options{indelim}, for option \"--indelim\" is not valid for csv files\n";
 796         next FILELIST;
 797       }
 798       if ($Options{indelim} =~ /^semicolon$/i) {
 799         $InDelim = "\;";
 800       }
 801     }
 802 
 803     if (!open TEXTFILE, "$TextFile") {
 804       warn "Warning: Ignoring file $TextFile: Couldn't open it: $! \n";
 805       next FILELIST;
 806     }
 807 
 808     $Line = GetTextLine(\*TEXTFILE);
 809     @ColLabels = quotewords($InDelim, 0, $Line);
 810     close TEXTFILE;
 811 
 812     $FileDir = ""; $FileName = ""; $FileExt = "";
 813     ($FileDir, $FileName, $FileExt) = ParseFileName($TextFile);
 814     $FileExt = "csv";
 815     if ($Options{outdelim} =~ /^tab$/i) {
 816       $FileExt = "tsv";
 817     }
 818     $OutFileExt = $FileExt;
 819     if ($Options{root} && (@TextFilesList == 1)) {
 820       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($Options{root});
 821       if ($RootFileName && $RootFileExt) {
 822         $FileName = $RootFileName;
 823       }
 824       else {
 825         $FileName = $Options{root};
 826       }
 827       $OutFileRoot = $FileName;
 828     }
 829     else {
 830       $OutFileRoot = $FileName;
 831     }
 832     $OutFile = $OutFileRoot . $OptionsInfo{FileNameMode} . ".$OutFileExt";
 833 
 834     if (lc($OutFile) eq lc($TextFile)) {
 835       warn "Warning: Ignoring file $TextFile:Output file name, $OutFile, is same as input text file name, $TextFile\n";
 836       next FILELIST;
 837     }
 838     if (!$Options{overwrite}) {
 839       if (-e $OutFile) {
 840         warn "Warning: Ignoring file $TextFile: The file $OutFile already exists\n";
 841         next FILELIST;
 842       }
 843       if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) || exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare})) {
 844         if ($OptionsInfo{AllColumnPairs}) {
 845           if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{covariance}) && (-e "${OutFileRoot}CovarianceMatrix.${FileExt}")) {
 846             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}Covariance.${FileExt} already exists.\n";
 847             next FILELIST;
 848           }
 849           if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{correlation}) && (-e "${OutFileRoot}CorrelationMatrix.${FileExt}")) {
 850             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}CorrelationMatrix.${FileExt} already exists.\n";
 851             next FILELIST;
 852           }
 853           if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{rsquare}) && (-e "${OutFileRoot}RSquareMatrix.${FileExt}")) {
 854             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}RSquareMatrix.${FileExt} already exists.\n";
 855             next FILELIST;
 856           }
 857         }
 858         else {
 859           if (-e "${OutFileRoot}ColumnPairsAnalysis.${FileExt}") {
 860             warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}ColumnPairsAnalysis.${FileExt} already exists.\n";
 861             next FILELIST;
 862           }
 863         }
 864       }
 865       if (exists($OptionsInfo{SpecifiedStatisticalFunctionsMap}{standardscores}) && (-e "${OutFileRoot}StandardScores.${FileExt}")) {
 866         warn "Warning: Ignoring file $TextFile: The file ${OutFileRoot}StandardScores.${FileExt} already exists.\n";
 867         next FILELIST;
 868       }
 869     }
 870 
 871     $TextFilesInfo{FileOkay}[$Index] = 1;
 872     $TextFilesInfo{InDelim}[$Index] = $InDelim;
 873     $TextFilesInfo{OutFileRoot}[$Index] = "$OutFileRoot";
 874     $TextFilesInfo{OutFileExt}[$Index] = "$OutFileExt";
 875 
 876     $TextFilesInfo{ColCount}[$Index] = @ColLabels;
 877     push @{$TextFilesInfo{ColLabels}[$Index]}, @ColLabels;
 878     for $ColNum (0 .. $#ColLabels) {
 879       $ColLabel = $ColLabels[$ColNum];
 880       $TextFilesInfo{ColLabelToNumMap}[$Index]{$ColLabel} = $ColNum;
 881     }
 882   }
 883 }
 884 
 885 # Process option values...
 886 sub ProcessOptions {
 887   %OptionsInfo = ();
 888 
 889   $OptionsInfo{Mode} = $Options{mode};
 890 
 891   $OptionsInfo{DetailLevel} = $Options{detail};
 892 
 893   # Setup supported statistical functions...
 894   my($SupportedFunction, @SupportedStatisticaFunctions, %SupportedStatisticaFunctionsMap);
 895   %SupportedStatisticaFunctionsMap = ();
 896   @SupportedStatisticaFunctions = qw(Average AverageDeviation Correlation Count Covariance GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN StandardError StandardScores StandardScoresN TrimMean Variance VarianceN);
 897 
 898   for $SupportedFunction (@SupportedStatisticaFunctions) {
 899     $SupportedStatisticaFunctionsMap{lc($SupportedFunction)} = $SupportedFunction;
 900   }
 901 
 902   # Setup a list of functions to use for analysis...
 903   my($SpecifiedFunction);
 904   %{$OptionsInfo{SpecifiedStatisticalFunctionsMap}} = ();
 905   @{$OptionsInfo{SpecifiedStatisticalFunctions}} = ();
 906   # Check mode values...
 907   if ($Options{mode} =~ /^DescriptiveStatisticsBasic$/i ) {
 908     $OptionsInfo{FileNameMode} = "DescriptiveStatisticsBasic";
 909     @{$OptionsInfo{SpecifiedStatisticalFunctions}} = qw(Count Maximum Minimum Mean Median StandardDeviation StandardError Variance Sum);
 910   }
 911   elsif ($Options{mode} =~ /^DescriptiveStatisticsAll$/i ) {
 912     $OptionsInfo{FileNameMode} = "DescriptiveStatisticsAll";
 913     @{$OptionsInfo{SpecifiedStatisticalFunctions}} = qw(Count Maximum Minimum Mean GeometricMean HarmonicMean TrimMean Median Mode StandardDeviation Kurtosis Skewness StandardError Variance  RSquare Frequency  KLargest KSmallest Sum);
 914   }
 915   elsif ($Options{mode} =~ /^All$/i ) {
 916     $OptionsInfo{FileNameMode} = "AllStatistics";
 917     @{$OptionsInfo{SpecifiedStatisticalFunctions}} = @SupportedStatisticaFunctions;
 918   }
 919   else {
 920     $OptionsInfo{FileNameMode} = "SpecifiedStatistics";
 921     # Comma delimited list of functions...
 922     my($Mode, @SpecifiedFunctions, @UnsupportedSpecifiedFunctions);
 923     $Mode = $Options{mode};
 924     $Mode =~ s/ //g;
 925     @SpecifiedFunctions = split ",", $Mode;
 926     @UnsupportedSpecifiedFunctions = ();
 927     for $SpecifiedFunction (@SpecifiedFunctions) {
 928       if (exists($SupportedStatisticaFunctionsMap{lc($SpecifiedFunction)})) {
 929         push @{$OptionsInfo{SpecifiedStatisticalFunctions}}, $SpecifiedFunction;
 930       }
 931       else {
 932         push @UnsupportedSpecifiedFunctions, $SpecifiedFunction;
 933       }
 934     }
 935     if (@UnsupportedSpecifiedFunctions) {
 936       if (@UnsupportedSpecifiedFunctions > 1) {
 937         warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedFunctions, ", ", 0)," - for option \"-m --mode\" are not valid.\n";
 938       }
 939       else {
 940         warn "Error: The value specified, @UnsupportedSpecifiedFunctions , for option \"-m --mode\" is not valid.\n";
 941       }
 942       die "Allowed values:", JoinWords(\@SupportedStatisticaFunctions, ", ", 0), "\n";
 943     }
 944   }
 945   FUNCTION: for $SpecifiedFunction (@{$OptionsInfo{SpecifiedStatisticalFunctions}}) {
 946     if (exists $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)} ) {
 947       next FUNCTION;
 948     }
 949     $OptionsInfo{SpecifiedStatisticalFunctionsMap}{lc($SpecifiedFunction)} = $SupportedStatisticaFunctionsMap{lc($SpecifiedFunction)};
 950   }
 951 
 952   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,");
 953   $OptionsInfo{OutQuote} = ($Options{quote} =~ /yes/i ) ? 1 : 0;
 954 
 955   $OptionsInfo{Overwrite} = defined $Options{overwrite} ? $Options{overwrite} : undef;
 956   $OptionsInfo{Root} = defined $Options{root} ? $Options{root} : undef;
 957 
 958   $OptionsInfo{CheckData} = $Options{fast} ? 0 : 1;
 959   $OptionsInfo{Precision} = $Options{precision};
 960 
 961   $OptionsInfo{KLargest} = $Options{klargest};
 962   $OptionsInfo{KSmallest} = $Options{ksmallest};
 963 
 964   $OptionsInfo{TrimFraction} = $Options{trimfraction};
 965 
 966   # Setup frequency bin values...
 967   $OptionsInfo{NumOfBins} = 10;
 968   @{$OptionsInfo{BinRange}} = ();
 969   if ($Options{frequencybins} =~ /\,/) {
 970     my($BinValue, @SpecifiedBinRange);
 971     @SpecifiedBinRange = split /\,/,  $Options{frequencybins};
 972     if (@SpecifiedBinRange < 2) {
 973       die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain at least two values. \n";
 974     }
 975     for $BinValue (@SpecifiedBinRange) {
 976       if (!IsNumerical($BinValue)) {
 977         die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Contains non numeric values. \n";
 978       }
 979     }
 980     my($Index1, $Index2);
 981     for $Index1 (0 .. $#SpecifiedBinRange) {
 982       for $Index2 (($Index1 + 1) .. $#SpecifiedBinRange) {
 983         if ($SpecifiedBinRange[$Index1] >= $SpecifiedBinRange[$Index2]) {
 984           die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid: Must contain values in ascending order. \n";
 985         }
 986       }
 987     }
 988     push @{$OptionsInfo{BinRange}}, @SpecifiedBinRange;
 989   }
 990   else {
 991     $OptionsInfo{NumOfBins} = $Options{frequencybins};
 992     if (!IsPositiveInteger($OptionsInfo{NumOfBins})) {
 993       die "Error: The value specified, $Options{frequencybins}, for option \"--frequencybins\" is not valid. Allowed values: positive integer or \"number,number,[number]...\". \n";
 994     }
 995   }
 996 
 997   # Setup specified columns...
 998   $OptionsInfo{ColMode} = $Options{colmode};
 999   $OptionsInfo{Columns} = defined $Options{columns} ? $Options{columns} : undef;
1000 
1001   @{$OptionsInfo{SpecifiedColumns}} = ();
1002   if (defined $Options{columns} && $Options{columns} !~ /^All$/i) {
1003     my(@SpecifiedValues) = split ",", $Options{columns};
1004     if ($Options{colmode} =~ /^colnum$/i) {
1005       my($ColValue);
1006       for $ColValue (@SpecifiedValues) {
1007         if (!IsPositiveInteger($ColValue)) {
1008           die "Error: Column value, $ColValue, specified using \"--columns\" is not valid: Allowed integer values: > 0.\n";
1009         }
1010       }
1011     }
1012     push @{$OptionsInfo{SpecifiedColumns}}, @SpecifiedValues;
1013   }
1014   @{$OptionsInfo{SpecifiedColumnPairs}} = ();
1015   $OptionsInfo{AllColumnPairs} = (defined($Options{columnpairs}) && $Options{columnpairs} =~ /^AllPairs$/i) ? 1 : 0;
1016   if (defined($Options{columnpairs}) && !$OptionsInfo{AllColumnPairs}) {
1017     my(@SpecifiedValues) = split ",", $Options{columnpairs};
1018     if (@SpecifiedValues % 2) {
1019       die "Error: Invalid number of values specified using \"--columnpairs\" option: It must contain even number of values.\n";
1020     }
1021     if ($Options{colmode} =~ /^colnum$/i) {
1022       my($ColValue);
1023       for $ColValue (@SpecifiedValues) {
1024         if (!IsPositiveInteger($ColValue)) {
1025           die "Error: Column value, $ColValue, specified using \"--columnpairs\" is not valid: Allowed integer values: > 0.\n";
1026         }
1027       }
1028     }
1029     push @{$OptionsInfo{SpecifiedColumnPairs}}, @SpecifiedValues;
1030   }
1031 
1032 }
1033 
1034 # Setup script usage  and retrieve command line arguments specified using various options...
1035 sub SetupScriptUsage {
1036 
1037   # Retrieve all the options...
1038   %Options = ();
1039   $Options{colmode} = "colnum";
1040   $Options{detail} = 1;
1041   $Options{indelim} = "comma";
1042   $Options{frequencybins} = 10;
1043   $Options{klargest} = 2;
1044   $Options{ksmallest} = 2;
1045   $Options{mode} = "DescriptiveStatisticsBasic";
1046   $Options{outdelim} = "comma";
1047   $Options{precision} = 2;
1048   $Options{quote} = "yes";
1049   $Options{trimfraction} = 0.1;
1050 
1051   if (!GetOptions(\%Options, "colmode|c=s", "columns=s", "columnpairs=s", "detail|d=i", "frequencybins=s", "fast|f", "help|h", "indelim=s", "klargest=i", "ksmallest=i", "mode|m=s", "outdelim=s", "overwrite|o", "precision|p=i", "quote|q=s", "root|r=s", "trimfraction=f", "workingdir|w=s")) {
1052     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
1053   }
1054   if ($Options{workingdir}) {
1055     if (! -d $Options{workingdir}) {
1056       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
1057     }
1058     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
1059   }
1060   if ($Options{colmode} !~ /^(colnum|collabel)$/i) {
1061     die "Error: The value specified, $Options{colmode}, for option \"-c --colmode\" is not valid. Allowed values: colnum or collabel\n";
1062   }
1063   if (!IsPositiveInteger($Options{detail})) {
1064     die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Allowed values: > 0\n";
1065   }
1066   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
1067     die "Error: The value specified, $Options{indelim}, for option \"--indelim\" is not valid. Allowed values: comma or semicolon\n";
1068   }
1069   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
1070     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
1071   }
1072   if ($Options{quote} !~ /^(yes|no)$/i) {
1073     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: yes or no\n";
1074   }
1075   if (!IsPositiveInteger($Options{precision})) {
1076     die "Error: The value specified, $Options{precision}, for option \"-p --precision\" is not valid. Allowed values: > 0 \n";
1077   }
1078   if (!IsPositiveInteger($Options{klargest})) {
1079     die "Error: The value specified, $Options{klargest}, for option \"--klargest\" is not valid. Allowed values: > 0 \n";
1080   }
1081   if (!IsPositiveInteger($Options{ksmallest})) {
1082     die "Error: The value specified, $Options{ksmallest}, for option \"--ksmallest\" is not valid. Allowed values: > 0 \n";
1083   }
1084   if (IsFloat($Options{trimfraction})) {
1085     if ($Options{trimfraction} <= 0 || $Options{trimfraction} >= 1.0) {
1086       die "Error: The value specified, $Options{trimfraction}, for option \"--trimfraction\" is not valid. Allowed values: > 0 and < 1.0\n";
1087     }
1088   }
1089   else {
1090     die "Error: The value specified, $Options{trimfraction}, for option \"--trimfraction\" is not valid. Allowed values: > 0 and < 1.0\n";
1091   }
1092 }
1093