MayaChemTools

   1 package StatisticsUtil;
   2 #
   3 # File: StatisticsUtil.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Exporter;
  28 
  29 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  30 
  31 @ISA = qw(Exporter);
  32 @EXPORT = qw(Average AverageDeviation Covariance Correlation Euclidean Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Permutations Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN  StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN);
  33 @EXPORT_OK = qw();
  34 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  35 
  36 # Compute the mean of an array of numbers
  37 sub Average {
  38   my($XArrayRef) = @_;
  39   return Mean($XArrayRef);
  40 }
  41 
  42 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n
  43 sub AverageDeviation {
  44   my($XArrayRef) = @_;
  45 
  46   if (!@$XArrayRef) {
  47     return undef;
  48   }
  49   my($AverageDeviation, $Mean, $Value, $SumOfDeviations);
  50 
  51   $AverageDeviation = 0;
  52   $Mean = Mean($XArrayRef);
  53   foreach $Value (@$XArrayRef) {
  54     $SumOfDeviations += abs($Value - $Mean);
  55   }
  56   $AverageDeviation = $SumOfDeviations / @$XArrayRef;
  57 
  58   return $AverageDeviation;
  59 }
  60 
  61 # Compute correlation coefficient between two arrays of numbers
  62 sub Correlation {
  63   my($XArrayRef, $YArrayRef) = @_;
  64   return PearsonCorrelation($XArrayRef, $YArrayRef);
  65 }
  66 
  67 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n
  68 sub Covariance {
  69   my($XArrayRef, $YArrayRef) = @_;
  70 
  71   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
  72     return undef;
  73   }
  74   my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations);
  75 
  76   $Covariance = 0;
  77   $XMean = Mean($XArrayRef);
  78   $YMean = Mean($YArrayRef);
  79   $ProductOfDeviations = 0;
  80   for $Index (0 .. $#{@$XArrayRef}) {
  81     $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean));
  82   }
  83   $Covariance = $ProductOfDeviations / @$XArrayRef;
  84   return $Covariance;
  85 }
  86 
  87 # Compute the euclidean distance of an array of numbers: SQRT( SUM( x[i] ** 2) )
  88 sub Euclidean {
  89   my($XArrayRef) = @_;
  90 
  91   if (!@$XArrayRef) {
  92     return undef;
  93   }
  94   my($SumOfSquares);
  95 
  96   $SumOfSquares = SumOfSquares($XArrayRef);
  97 
  98   return sqrt $SumOfSquares;
  99 }
 100 
 101 # Compute factorial of a number...
 102 sub Factorial {
 103   my($Num) = @_;
 104 
 105   return _Factorial($Num, 1);
 106 }
 107 
 108 # Perform factorial division of two numbers...
 109 sub FactorialDivison {
 110   my($Numerator, $Denominator) = @_;
 111 
 112   # Only works for integer numbers...
 113   if ($Numerator <= 0 || ($Numerator != int($Numerator)) ||
 114      $Denominator <= 0 || ($Denominator != int($Denominator)) ) {
 115     return undef;
 116   }
 117   my($LargerNum, $SmallerNum, $Result);
 118   $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator;
 119   $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator;
 120 
 121   $Result = _Factorial($LargerNum, $SmallerNum);
 122   if ($Numerator < $Denominator) {
 123     $Result = 1/$Result;
 124   }
 125   return $Result;
 126 }
 127 
 128 # Calculate factorial of a number upto a specific limit...
 129 sub _Factorial {
 130   my($Num, $Limit) = @_;
 131 
 132   # Only works for integer numbers...
 133   if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) {
 134     return undef;
 135   }
 136 
 137   my($Result) = 1;
 138 
 139   while ($Num > $Limit) {
 140     $Result *= $Num;
 141     $Num--;
 142   }
 143   return $Result;
 144 }
 145 
 146 # Generate all possible permuations or a specific permutations of items in an array
 147 # and return a reference to an array containing array references to generated permuations...
 148 #
 149 # This alogrithm is based on the example provided by Mark Jason-Dominus, and is available
 150 # at CPAN as mjd_permute standalone script.
 151 #
 152 sub Permutations {
 153   my(@DataToPermute) = @_;
 154   my($PermutationNum, $NumOfPermutations, @Permutations);
 155 
 156   if (!@DataToPermute) {
 157     return undef;
 158   }
 159 
 160   @Permutations = ();
 161   $NumOfPermutations = Factorial(scalar @DataToPermute);
 162 
 163   for ($PermutationNum = 0; $PermutationNum < $NumOfPermutations; $PermutationNum++) {
 164     my @Permutation = @DataToPermute[_PermutationNumToPermutation($PermutationNum, $#DataToPermute)];
 165     push @Permutations, \@Permutation;
 166   }
 167 
 168   return \@Permutations;
 169 }
 170 
 171 # Generte Nth permutation for a collection of specific size...
 172 #
 173 sub _PermutationNumToPermutation {
 174   my($Num, $Size) = @_;
 175 
 176   return _PatternToPermutation(_PermutationNumToPattern($Num, $Size));
 177 }
 178 
 179 # Generate Nth pattern for a collection of specific size...
 180 #
 181 sub _PermutationNumToPattern {
 182   my($Num, $Size) = @_;
 183   my($Index, @Pattern);
 184 
 185   $Index = 1;
 186 
 187   while ($Index <= $Size + 1) {
 188     push @Pattern, $Num % $Index;
 189     $Num = int($Num/$Index);
 190     $Index++;
 191   }
 192 
 193   return @Pattern;
 194 }
 195 
 196 # Generate permutation of integers from pattern...
 197 #
 198 sub _PatternToPermutation {
 199   my(@Pattern) = @_;
 200   my(@Source, @Permutation);
 201 
 202   @Source = (0 .. $#Pattern);
 203 
 204   while (@Pattern) {
 205     push @Permutation, splice(@Source, (pop @Pattern), 1);
 206   }
 207 
 208   return @Permutation;
 209 }
 210 
 211 # Compute the frequency of occurance of values in an array of numbers. Three different
 212 # invocation methods are supported:
 213 #
 214 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into
 215 # 10 bins.
 216 #
 217 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the
 218 # numbers into specified bins.
 219 #
 220 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins.
 221 #
 222 # A hash array is returned with keys and values representing range and frequency values respectively.
 223 # The frequency value for a specific key corresponds to all the values which are greater than
 224 # the previous key and less than or equal to the current key. A key value representing maximum value is
 225 # added for generating frequency distribution for specific number of bins, and whenever the maximum
 226 # array value is greater than the maximum specified in bin range, it is also added to bin range.
 227 #
 228 sub Frequency {
 229   my($XArrayRef) = @_;
 230 
 231   if (!@$XArrayRef) {
 232     return undef;
 233   }
 234 
 235   my($BinRange, $NumOfBins, $BinRangeSpecified);
 236 
 237   $BinRangeSpecified = 0;
 238   $NumOfBins = 10;
 239   if (@_ == 2) {
 240     if (ref($_[1]) eq 'ARRAY') {
 241       $BinRange = $_[1];
 242       if (!(@$BinRange && (@$BinRange > 1))) {
 243         return undef;
 244       }
 245       # Make sure the bin range contains values in increasing order...
 246       my($Index1, $Index2);
 247       for $Index1 (0 .. $#{@$BinRange}) {
 248         for $Index2 (($Index1 + 1) .. $#{@$BinRange}) {
 249           if ($BinRange->[$Index1] >= $BinRange->[$Index2]) {
 250             return undef;
 251           }
 252         }
 253       }
 254       $BinRangeSpecified = 1;
 255     }
 256     else {
 257       $NumOfBins = $_[1];
 258       if ($NumOfBins <= 1) {
 259         return undef;
 260       }
 261     }
 262   }
 263 
 264   # Setup range keys...
 265   my(@RangeKeys);
 266   @RangeKeys = ();
 267 
 268   my($MinValue, $MaxValue) = Range($XArrayRef);
 269   if ($BinRangeSpecified) {
 270     push @RangeKeys, @$BinRange;
 271     if ($MaxValue > $RangeKeys[$#RangeKeys]) {
 272       push @RangeKeys, $MaxValue;
 273     }
 274   }
 275   else {
 276     my($MinValue, $MaxValue) = Range($XArrayRef);
 277     my($Interval) = ($MaxValue - $MinValue)/$NumOfBins;
 278     my($KeyValue) = $MinValue + $Interval;
 279     while ($KeyValue  < $MaxValue) {
 280       push @RangeKeys, $KeyValue;
 281       $KeyValue += $Interval;
 282     }
 283     push @RangeKeys, $MaxValue;
 284   }
 285 
 286   #Setup frequency hash array...
 287   my(%FrequencyMap);
 288   %FrequencyMap = ();
 289 
 290   %FrequencyMap = map { $_ => 0 } @RangeKeys;
 291 
 292   # Count values...
 293   my($Key, $Value);
 294 
 295   VALUE: for $Value (@$XArrayRef) {
 296       for $Key (@RangeKeys) {
 297         if ($Value <= $Key) {
 298           $FrequencyMap{$Key} += 1;
 299           next VALUE;
 300         }
 301       }
 302   }
 303   return (%FrequencyMap);
 304 }
 305 
 306 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) )
 307 sub GeometricMean {
 308   my($XArrayRef) = @_;
 309 
 310   if (!@$XArrayRef) {
 311     return undef;
 312   }
 313   my($Mean, $Product, $Value);
 314   $Product = 1;
 315   foreach $Value (@$XArrayRef) {
 316     if ($Value <= 0 ) {
 317       return undef;
 318     }
 319     $Product *= $Value;
 320   }
 321   $Mean = $Product ** (1 / @$XArrayRef);
 322   return $Mean;
 323 }
 324 
 325 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n )
 326 sub HarmonicMean {
 327   my($XArrayRef) = @_;
 328 
 329   if (!@$XArrayRef) {
 330     return undef;
 331   }
 332   my($Mean, $Sum, $Value);
 333   $Sum = 0;
 334   foreach $Value (@$XArrayRef) {
 335     if ($Value <= 0 ) {
 336       return undef;
 337     }
 338     $Sum += 1/$Value;
 339   }
 340   $Mean = 1/($Sum/@$XArrayRef);
 341   return $Mean;
 342 }
 343 
 344 # Return the k-largest value from an array of numbers
 345 sub KLargest {
 346   my($XArrayRef, $K) = @_;
 347 
 348   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 349     return undef;
 350   }
 351   my($KLargest, @SortedXArray);
 352   @SortedXArray = sort { $b <=> $a } @$XArrayRef;
 353   $KLargest = $SortedXArray[$K - 1];
 354   return $KLargest;
 355 }
 356 
 357 # Return the k-smallest value from an array of numbers
 358 sub KSmallest {
 359   my($XArrayRef, $K) = @_;
 360 
 361   if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) {
 362     return undef;
 363   }
 364   my($KSmallest, @SortedXArray);
 365   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 366   $KSmallest = $SortedXArray[$K - 1];
 367   return $KSmallest;
 368 }
 369 
 370 # Compute the kurtosis of an array of numbers:
 371 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)}  SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)}
 372 #
 373 sub Kurtosis {
 374   my($XArrayRef) = @_;
 375 
 376   if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) {
 377     return undef;
 378   }
 379   my($Kurtosis, $Mean, $StandardDeviation, $Value);
 380   $Mean = Mean($XArrayRef);
 381   if (!defined $Mean) {
 382     return undef;
 383   }
 384   $StandardDeviation = StandardDeviation($XArrayRef);
 385   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 386     return undef;
 387   }
 388 
 389   my($SumOfScores, $SampleSize);
 390   $SumOfScores = 0;
 391   for $Value (@$XArrayRef) {
 392     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4;
 393   }
 394   $SampleSize = @$XArrayRef;
 395   $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3)));
 396   return $Kurtosis;
 397 }
 398 
 399 # Return the smallest value from an array of numbers
 400 sub Minimum {
 401   my($XArrayRef) = @_;
 402   return KSmallest($XArrayRef, 1);
 403 }
 404 
 405 # Return the largest value from an array of numbers
 406 sub Maximum {
 407   my($XArrayRef) = @_;
 408   return KLargest($XArrayRef, 1);
 409 }
 410 
 411 # Compute the mean of an array of numbers: SUM( x[i] ) / n
 412 sub Mean {
 413   my($XArrayRef) = @_;
 414 
 415   if (!@$XArrayRef) {
 416     return undef;
 417   }
 418   my($Mean, $Sum, $Value);
 419   $Sum = 0;
 420   foreach $Value (@$XArrayRef) {
 421     $Sum += $Value;
 422   }
 423   $Mean = $Sum / @$XArrayRef;
 424   return $Mean;
 425 }
 426 
 427 # Compute the median value of an array of numbers. For an even number array, it's
 428 # the average of two middle values.
 429 #
 430 # For even values of n: Xsorted[(n - 1)/2 + 1]
 431 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2
 432 #
 433 sub Median {
 434   my($XArrayRef) = @_;
 435 
 436   if (!@$XArrayRef) {
 437     return undef;
 438   }
 439   my($Median, @SortedXArray);
 440   $Median = 0;
 441   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 442   if (@$XArrayRef % 2) {
 443     my($MidIndex);
 444     $MidIndex = int(@SortedXArray - 1)/2;
 445     $Median = $SortedXArray[$MidIndex];
 446   }
 447   else {
 448     # Even number array...
 449     my($MidPosition);
 450     $MidPosition = int(@SortedXArray / 2);
 451     $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2;
 452   }
 453   return $Median;
 454 }
 455 
 456 # Return the most frequently occuring value in an array of numbers
 457 sub Mode {
 458   my($XArrayRef) = @_;
 459 
 460   if (!@$XArrayRef) {
 461     return undef;
 462   }
 463   my($Value, %ValueToCountMap, @CountList, @SortedCountList);
 464   %ValueToCountMap = ();
 465   @CountList = ();
 466   @SortedCountList = ();
 467   for $Value (@$XArrayRef) {
 468     if (exists $ValueToCountMap{$Value}) {
 469       $ValueToCountMap{$Value} += 1;
 470     }
 471     else {
 472       $ValueToCountMap{$Value} = 1;
 473     }
 474   }
 475   for $Value (keys %ValueToCountMap) {
 476     push @CountList, $ValueToCountMap{$Value};
 477   }
 478   @SortedCountList = sort { $b <=> $a } @CountList;
 479 
 480   # Make sure the frequency of mode value is greater than one and check for
 481   # multiple modes as well...
 482   #
 483   my($ModeCount, $ModeValue);
 484   $ModeCount = $SortedCountList[0];
 485   if ($ModeCount <= 1) {
 486     return undef;
 487   }
 488   # Get the first mode value...
 489   VALUE: for $Value (keys %ValueToCountMap) {
 490     if ($ValueToCountMap{$Value} == $ModeCount) {
 491       $ModeValue = $Value;
 492       # Set it to zero to skip it next time...
 493       $ValueToCountMap{$Value} = 0;
 494       last VALUE;
 495     }
 496   }
 497 
 498   if (wantarray) {
 499     # Retrieve all the modes...
 500     my(@Modes, $Count);
 501     @Modes = ();
 502     push @Modes, $ModeValue;
 503     for $Count (@SortedCountList) {
 504       if ($Count == $ModeCount) {
 505       VALUE: for $Value (keys %ValueToCountMap) {
 506           if ($ValueToCountMap{$Value} == $ModeCount) {
 507             push @Modes, $Value;
 508             # Set it to zero to skip it next time...
 509             $ValueToCountMap{$Value} = 0;
 510             last VALUE;
 511           }
 512         }
 513       }
 514     }
 515     return sort {$b <=> $a} @Modes;
 516   }
 517   else {
 518     return $ModeValue;
 519   }
 520 }
 521 
 522 
 523 # Compute the Pearson correlation coefficient between two arrays of numbers:
 524 #
 525 #  SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 ))   )
 526 #
 527 # It returns values in the range from -1.0 to 1.0
 528 sub PearsonCorrelation {
 529   my($XArrayRef, $YArrayRef) = @_;
 530 
 531   if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) {
 532     return undef;
 533   }
 534   my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations);
 535 
 536   $Correlation = 0;
 537   $XMean = Mean($XArrayRef);
 538   $YMean = Mean($YArrayRef);
 539   $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0;
 540   for $Index (0 .. $#{@$XArrayRef}) {
 541     $XValueDeviation = $XArrayRef->[$Index] - $XMean;
 542     $YValueDeviation = $YArrayRef->[$Index] - $YMean;
 543     $ProductOfDeviations += ($XValueDeviation * $YValueDeviation);
 544     $SquareOfXDeviations += $XValueDeviation ** 2;
 545     $SquareOfYDeviations += $YValueDeviation ** 2;
 546   }
 547   $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations *  $SquareOfYDeviations);
 548   return $Correlation;
 549 }
 550 
 551 # Return the smallest and largest values from an array of numbers
 552 sub Range {
 553   my($XArrayRef) = @_;
 554 
 555   if (!@$XArrayRef) {
 556     return (undef, undef);
 557   }
 558   my($Smallest, $Largest, @SortedXArray);
 559   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 560   $Smallest = $SortedXArray[0];
 561   $Largest = $SortedXArray[$#SortedXArray];
 562   return ($Smallest, $Largest);
 563 }
 564 
 565 # Compute square of the Pearson correlation coefficient between two arrays of numbers.
 566 #
 567 sub RSquare {
 568   my($XArrayRef, $YArrayRef) = @_;
 569   my($RSquare, $Correlation);
 570 
 571   $RSquare = undef;
 572   $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef);
 573   if (defined $Correlation) {
 574     $RSquare = $Correlation ** 2;
 575   }
 576   return $RSquare;
 577 }
 578 
 579 # Compute the skewness of an array of numbers:
 580 #  {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 }
 581 #
 582 sub Skewness {
 583   my($XArrayRef) = @_;
 584 
 585   if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) {
 586     return undef;
 587   }
 588   my($Skewness, $Mean, $StandardDeviation, $Value);
 589   $Mean = Mean($XArrayRef);
 590   if (!defined $Mean) {
 591     return undef;
 592   }
 593   $StandardDeviation = StandardDeviation($XArrayRef);
 594   if (!(defined $StandardDeviation &&  $StandardDeviation != 0)) {
 595     return undef;
 596   }
 597 
 598   my($SumOfScores, $SampleSize);
 599   $SumOfScores = 0;
 600   for $Value (@$XArrayRef) {
 601     $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3;
 602   }
 603   $SampleSize = @$XArrayRef;
 604   $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores;
 605   return $Skewness;
 606 }
 607 
 608 # Compute the standard deviation of an array of numbers
 609 sub StandardDeviation {
 610   my($XArrayRef) = @_;
 611   return _CalculateStandardDeviation($XArrayRef, 2);
 612 }
 613 
 614 # Compute the standard deviation of an array of numbers representing entire population
 615 sub StandardDeviationN {
 616   my($XArrayRef) = @_;
 617   return _CalculateStandardDeviation($XArrayRef, 1);
 618 }
 619 
 620 # Compute the standard deviation of an array of numbers.
 621 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n )
 622 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) )
 623 #
 624 sub _CalculateStandardDeviation {
 625   my($XArrayRef, $Mode) = @_;
 626 
 627   if (!@$XArrayRef) {
 628     return undef;
 629   }
 630   my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N);
 631 
 632   $StandardDeviation = 0;
 633   $Mean = Mean($XArrayRef);
 634   $SquareOfDeviations = 0;
 635   foreach $Value (@$XArrayRef) {
 636     $SquareOfDeviations += ($Value - $Mean) ** 2;
 637   }
 638   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 639   $StandardDeviation =  sqrt($SquareOfDeviations / $N);
 640 
 641   return $StandardDeviation;
 642 }
 643 
 644 # Compute the standard error using standard deviation and sample size
 645 sub StandardError {
 646   my($StandardDeviation, $Count) = @_;
 647 
 648   if ($Count <= 0) {
 649     return undef;
 650   }
 651   my($StandardError);
 652   $StandardError = $StandardDeviation / sqrt($Count);
 653 
 654   return $StandardError;
 655 }
 656 
 657 # Standardize the value using mean and standard deviation
 658 sub Standardize {
 659   my($Value, $Mean, $StandardDeviation) = @_;
 660 
 661   if ($StandardDeviation <= 0) {
 662     return undef;
 663   }
 664   my($StandardizedValue);
 665   $StandardizedValue = ($Value - $Mean)/$StandardDeviation;
 666 
 667   return $StandardizedValue;
 668 }
 669 
 670 # Compute the standard deviation above the mean for an array of numbers.
 671 sub StandardScores {
 672   my($XArrayRef) = @_;
 673   return _CalculateStandardScores($XArrayRef, 2);
 674 }
 675 
 676 # Compute the standard deviation above the mean for an array of numbers representing entire population
 677 sub StandardScoresN {
 678   my($XArrayRef) = @_;
 679   return _CalculateStandardScores($XArrayRef, 1);
 680 }
 681 
 682 # Compute the standard deviation above the mean for an array of numbers.
 683 # Mode 1:  (x[i] - mean) / n
 684 # Mode 2:  (x[i] - mean) / (n - 1)
 685 #
 686 sub _CalculateStandardScores {
 687   my($XArrayRef, $Mode) = @_;
 688 
 689   if (!@$XArrayRef) {
 690     return undef;
 691   }
 692   my(@StandardScores, $Mean, $StandardDeviation, $Value);
 693 
 694   $Mean = Mean($XArrayRef);
 695   $StandardDeviation = _CalculateStandardDeviation($XArrayRef, $Mode);
 696   if (!(defined($StandardDeviation) && $StandardDeviation > 0)) {
 697     return undef;
 698   }
 699   @StandardScores = ();
 700   for $Value (@$XArrayRef) {
 701     push @StandardScores, ($Value - $Mean)/$StandardDeviation;
 702   }
 703 
 704   return @StandardScores;
 705 }
 706 
 707 # Compute the product of an array of numbers
 708 sub Product {
 709   my($XArrayRef) = @_;
 710 
 711   if (!@$XArrayRef) {
 712     return undef;
 713   }
 714   my($Product, $Value);
 715   $Product = 1;
 716   foreach $Value (@$XArrayRef) {
 717     $Product *= $Value;
 718   }
 719   return $Product;
 720 }
 721 
 722 # Compute the sum of an array of numbers
 723 sub Sum {
 724   my($XArrayRef) = @_;
 725 
 726   if (!@$XArrayRef) {
 727     return undef;
 728   }
 729   my($Sum, $Value);
 730   $Sum = 0;
 731   foreach $Value (@$XArrayRef) {
 732     $Sum += $Value;
 733   }
 734   return $Sum;
 735 }
 736 
 737 # Compute the sum of squares of an array of numbers
 738 sub SumOfSquares {
 739   my($XArrayRef) = @_;
 740 
 741   if (!@$XArrayRef) {
 742     return undef;
 743   }
 744   my($SumOfSquares, $Value);
 745   $SumOfSquares = 0;
 746   foreach $Value (@$XArrayRef) {
 747     $SumOfSquares += $Value ** 2;
 748   }
 749   return $SumOfSquares;
 750 }
 751 
 752 # Compute the mean of an array of numbers by excluding a fraction of
 753 # numbers from the top and bottom of the data set.
 754 sub TrimMean {
 755   my($XArrayRef, $FractionToExclude) = @_;
 756 
 757   if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) {
 758     return undef;
 759   }
 760   my($NumberToExclude);
 761   $NumberToExclude = int(@$XArrayRef * $FractionToExclude);
 762   $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude;
 763   if ($NumberToExclude == @$XArrayRef) {
 764     return undef;
 765   }
 766   my($Mean, $Sum, $Index, $FirstIndex, $LastIndex);
 767   $FirstIndex = $NumberToExclude/2;
 768   $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1;
 769   $Sum = 0;
 770   my(@SortedXArray);
 771   @SortedXArray = sort { $a <=> $b } @$XArrayRef;
 772   for $Index ($FirstIndex .. $LastIndex) {
 773     $Sum += $SortedXArray[$Index];
 774   }
 775   $Mean = $Sum/(@SortedXArray - $NumberToExclude);
 776   return $Mean;
 777 }
 778 
 779 # Compute the variance of an array of numbers
 780 sub Variance {
 781   my($XArrayRef) = @_;
 782   return _CalculateVariance($XArrayRef, 2);
 783 }
 784 
 785 # Compute the variance of an array of numbers representing entire population
 786 sub VarianceN {
 787   my($XArrayRef) = @_;
 788   return _CalculateVariance($XArrayRef, 1);
 789 }
 790 
 791 # Compute the variance of an array of numbers:
 792 # Mode 1: SUM( (x[i] - Xmean)^2  / n )
 793 # Mode 2: SUM( (x[i] - Xmean)^2  / (n - 1) )
 794 #
 795 sub _CalculateVariance {
 796   my($XArrayRef, $Mode) = @_;
 797 
 798   if (!@$XArrayRef) {
 799     return undef;
 800   }
 801   my($Variance, $Value, $SquareOfDeviations, $Mean, $N);
 802 
 803   $Variance = 0;
 804   $Mean = Mean($XArrayRef);
 805   $SquareOfDeviations = 0;
 806   foreach $Value (@$XArrayRef) {
 807     $SquareOfDeviations += ($Value - $Mean) ** 2;
 808   }
 809   $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1);
 810   $Variance =  $SquareOfDeviations / $N;
 811 
 812   return $Variance;
 813 }
 814