1 package StatisticsUtil; 2 # 3 # File: StatisticsUtil.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Exporter; 28 29 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 30 31 @ISA = qw(Exporter); 32 @EXPORT = qw(Average AverageDeviation Covariance Correlation Euclidean Factorial FactorialDivison GeometricMean Frequency HarmonicMean KLargest KSmallest Kurtosis Maximum Minimum Mean Median Mode PearsonCorrelation Permutations Product Range RSquare Skewness Sum SumOfSquares StandardDeviation StandardDeviationN StandardError Standardize StandardScores StandardScoresN TrimMean Variance VarianceN); 33 @EXPORT_OK = qw(); 34 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 35 36 # Compute the mean of an array of numbers 37 sub Average { 38 my($XArrayRef) = @_; 39 return Mean($XArrayRef); 40 } 41 42 # Compute the average of the absolute deviation of an array of numbers: SUM( ABS(x[i] - Xmean) ) / n 43 sub AverageDeviation { 44 my($XArrayRef) = @_; 45 46 if (!@$XArrayRef) { 47 return undef; 48 } 49 my($AverageDeviation, $Mean, $Value, $SumOfDeviations); 50 51 $AverageDeviation = 0; 52 $Mean = Mean($XArrayRef); 53 foreach $Value (@$XArrayRef) { 54 $SumOfDeviations += abs($Value - $Mean); 55 } 56 $AverageDeviation = $SumOfDeviations / @$XArrayRef; 57 58 return $AverageDeviation; 59 } 60 61 # Compute correlation coefficient between two arrays of numbers 62 sub Correlation { 63 my($XArrayRef, $YArrayRef) = @_; 64 return PearsonCorrelation($XArrayRef, $YArrayRef); 65 } 66 67 # Compute the covariance between two arrays of numbers: SUM( (x[i] - Xmean) (y[i] - Ymean) ) / n 68 sub Covariance { 69 my($XArrayRef, $YArrayRef) = @_; 70 71 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 72 return undef; 73 } 74 my($Covariance, $XMean, $YMean, $Index, $ProductOfDeviations); 75 76 $Covariance = 0; 77 $XMean = Mean($XArrayRef); 78 $YMean = Mean($YArrayRef); 79 $ProductOfDeviations = 0; 80 for $Index (0 .. $#{@$XArrayRef}) { 81 $ProductOfDeviations += (($XArrayRef->[$Index] - $XMean) * ($YArrayRef->[$Index] - $YMean)); 82 } 83 $Covariance = $ProductOfDeviations / @$XArrayRef; 84 return $Covariance; 85 } 86 87 # Compute the euclidean distance of an array of numbers: SQRT( SUM( x[i] ** 2) ) 88 sub Euclidean { 89 my($XArrayRef) = @_; 90 91 if (!@$XArrayRef) { 92 return undef; 93 } 94 my($SumOfSquares); 95 96 $SumOfSquares = SumOfSquares($XArrayRef); 97 98 return sqrt $SumOfSquares; 99 } 100 101 # Compute factorial of a number... 102 sub Factorial { 103 my($Num) = @_; 104 105 return _Factorial($Num, 1); 106 } 107 108 # Perform factorial division of two numbers... 109 sub FactorialDivison { 110 my($Numerator, $Denominator) = @_; 111 112 # Only works for integer numbers... 113 if ($Numerator <= 0 || ($Numerator != int($Numerator)) || 114 $Denominator <= 0 || ($Denominator != int($Denominator)) ) { 115 return undef; 116 } 117 my($LargerNum, $SmallerNum, $Result); 118 $LargerNum = ($Numerator > $Denominator) ? $Numerator : $Denominator; 119 $SmallerNum = ($Numerator < $Denominator) ? $Numerator : $Denominator; 120 121 $Result = _Factorial($LargerNum, $SmallerNum); 122 if ($Numerator < $Denominator) { 123 $Result = 1/$Result; 124 } 125 return $Result; 126 } 127 128 # Calculate factorial of a number upto a specific limit... 129 sub _Factorial { 130 my($Num, $Limit) = @_; 131 132 # Only works for integer numbers... 133 if ($Num <= 0 || ($Num != int($Num)) || $Limit < 1) { 134 return undef; 135 } 136 137 my($Result) = 1; 138 139 while ($Num > $Limit) { 140 $Result *= $Num; 141 $Num--; 142 } 143 return $Result; 144 } 145 146 # Generate all possible permuations or a specific permutations of items in an array 147 # and return a reference to an array containing array references to generated permuations... 148 # 149 # This alogrithm is based on the example provided by Mark Jason-Dominus, and is available 150 # at CPAN as mjd_permute standalone script. 151 # 152 sub Permutations { 153 my(@DataToPermute) = @_; 154 my($PermutationNum, $NumOfPermutations, @Permutations); 155 156 if (!@DataToPermute) { 157 return undef; 158 } 159 160 @Permutations = (); 161 $NumOfPermutations = Factorial(scalar @DataToPermute); 162 163 for ($PermutationNum = 0; $PermutationNum < $NumOfPermutations; $PermutationNum++) { 164 my @Permutation = @DataToPermute[_PermutationNumToPermutation($PermutationNum, $#DataToPermute)]; 165 push @Permutations, \@Permutation; 166 } 167 168 return \@Permutations; 169 } 170 171 # Generte Nth permutation for a collection of specific size... 172 # 173 sub _PermutationNumToPermutation { 174 my($Num, $Size) = @_; 175 176 return _PatternToPermutation(_PermutationNumToPattern($Num, $Size)); 177 } 178 179 # Generate Nth pattern for a collection of specific size... 180 # 181 sub _PermutationNumToPattern { 182 my($Num, $Size) = @_; 183 my($Index, @Pattern); 184 185 $Index = 1; 186 187 while ($Index <= $Size + 1) { 188 push @Pattern, $Num % $Index; 189 $Num = int($Num/$Index); 190 $Index++; 191 } 192 193 return @Pattern; 194 } 195 196 # Generate permutation of integers from pattern... 197 # 198 sub _PatternToPermutation { 199 my(@Pattern) = @_; 200 my(@Source, @Permutation); 201 202 @Source = (0 .. $#Pattern); 203 204 while (@Pattern) { 205 push @Permutation, splice(@Source, (pop @Pattern), 1); 206 } 207 208 return @Permutation; 209 } 210 211 # Compute the frequency of occurance of values in an array of numbers. Three different 212 # invocation methods are supported: 213 # 214 # Frequency(\@ArrayRef) : Using the smallest and largest values, group the numbers into 215 # 10 bins. 216 # 217 # Frequency(\@ArrayRef, $NumOfBins) : Using the smallest and largest values, group the 218 # numbers into specified bins. 219 # 220 # Frequency(\@ArrayRef, \@BinRange): Use bin range to goup the values into different bins. 221 # 222 # A hash array is returned with keys and values representing range and frequency values respectively. 223 # The frequency value for a specific key corresponds to all the values which are greater than 224 # the previous key and less than or equal to the current key. A key value representing maximum value is 225 # added for generating frequency distribution for specific number of bins, and whenever the maximum 226 # array value is greater than the maximum specified in bin range, it is also added to bin range. 227 # 228 sub Frequency { 229 my($XArrayRef) = @_; 230 231 if (!@$XArrayRef) { 232 return undef; 233 } 234 235 my($BinRange, $NumOfBins, $BinRangeSpecified); 236 237 $BinRangeSpecified = 0; 238 $NumOfBins = 10; 239 if (@_ == 2) { 240 if (ref($_[1]) eq 'ARRAY') { 241 $BinRange = $_[1]; 242 if (!(@$BinRange && (@$BinRange > 1))) { 243 return undef; 244 } 245 # Make sure the bin range contains values in increasing order... 246 my($Index1, $Index2); 247 for $Index1 (0 .. $#{@$BinRange}) { 248 for $Index2 (($Index1 + 1) .. $#{@$BinRange}) { 249 if ($BinRange->[$Index1] >= $BinRange->[$Index2]) { 250 return undef; 251 } 252 } 253 } 254 $BinRangeSpecified = 1; 255 } 256 else { 257 $NumOfBins = $_[1]; 258 if ($NumOfBins <= 1) { 259 return undef; 260 } 261 } 262 } 263 264 # Setup range keys... 265 my(@RangeKeys); 266 @RangeKeys = (); 267 268 my($MinValue, $MaxValue) = Range($XArrayRef); 269 if ($BinRangeSpecified) { 270 push @RangeKeys, @$BinRange; 271 if ($MaxValue > $RangeKeys[$#RangeKeys]) { 272 push @RangeKeys, $MaxValue; 273 } 274 } 275 else { 276 my($MinValue, $MaxValue) = Range($XArrayRef); 277 my($Interval) = ($MaxValue - $MinValue)/$NumOfBins; 278 my($KeyValue) = $MinValue + $Interval; 279 while ($KeyValue < $MaxValue) { 280 push @RangeKeys, $KeyValue; 281 $KeyValue += $Interval; 282 } 283 push @RangeKeys, $MaxValue; 284 } 285 286 #Setup frequency hash array... 287 my(%FrequencyMap); 288 %FrequencyMap = (); 289 290 %FrequencyMap = map { $_ => 0 } @RangeKeys; 291 292 # Count values... 293 my($Key, $Value); 294 295 VALUE: for $Value (@$XArrayRef) { 296 for $Key (@RangeKeys) { 297 if ($Value <= $Key) { 298 $FrequencyMap{$Key} += 1; 299 next VALUE; 300 } 301 } 302 } 303 return (%FrequencyMap); 304 } 305 306 # Compute the geometric mean of an array of numbers: NthROOT( PRODUCT(x[i]) ) 307 sub GeometricMean { 308 my($XArrayRef) = @_; 309 310 if (!@$XArrayRef) { 311 return undef; 312 } 313 my($Mean, $Product, $Value); 314 $Product = 1; 315 foreach $Value (@$XArrayRef) { 316 if ($Value <= 0 ) { 317 return undef; 318 } 319 $Product *= $Value; 320 } 321 $Mean = $Product ** (1 / @$XArrayRef); 322 return $Mean; 323 } 324 325 # Compute the harmonic mean of an array of numbers: 1 / ( SUM(1/x[i]) / n ) 326 sub HarmonicMean { 327 my($XArrayRef) = @_; 328 329 if (!@$XArrayRef) { 330 return undef; 331 } 332 my($Mean, $Sum, $Value); 333 $Sum = 0; 334 foreach $Value (@$XArrayRef) { 335 if ($Value <= 0 ) { 336 return undef; 337 } 338 $Sum += 1/$Value; 339 } 340 $Mean = 1/($Sum/@$XArrayRef); 341 return $Mean; 342 } 343 344 # Return the k-largest value from an array of numbers 345 sub KLargest { 346 my($XArrayRef, $K) = @_; 347 348 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 349 return undef; 350 } 351 my($KLargest, @SortedXArray); 352 @SortedXArray = sort { $b <=> $a } @$XArrayRef; 353 $KLargest = $SortedXArray[$K - 1]; 354 return $KLargest; 355 } 356 357 # Return the k-smallest value from an array of numbers 358 sub KSmallest { 359 my($XArrayRef, $K) = @_; 360 361 if (!(@$XArrayRef && ($K > 0) && ($K <= @$XArrayRef))) { 362 return undef; 363 } 364 my($KSmallest, @SortedXArray); 365 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 366 $KSmallest = $SortedXArray[$K - 1]; 367 return $KSmallest; 368 } 369 370 # Compute the kurtosis of an array of numbers: 371 # [ {n(n + 1)/(n - 1)(n - 2)(n - 3)} SUM{ ((x[i] - Xmean)/STDDEV)^4 } ] - {3((n - 1)^2)}/{(n - 2)(n-3)} 372 # 373 sub Kurtosis { 374 my($XArrayRef) = @_; 375 376 if (!@$XArrayRef || ((@$XArrayRef - 3) <= 0)) { 377 return undef; 378 } 379 my($Kurtosis, $Mean, $StandardDeviation, $Value); 380 $Mean = Mean($XArrayRef); 381 if (!defined $Mean) { 382 return undef; 383 } 384 $StandardDeviation = StandardDeviation($XArrayRef); 385 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 386 return undef; 387 } 388 389 my($SumOfScores, $SampleSize); 390 $SumOfScores = 0; 391 for $Value (@$XArrayRef) { 392 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 4; 393 } 394 $SampleSize = @$XArrayRef; 395 $Kurtosis = ((($SampleSize * ($SampleSize + 1))/(($SampleSize - 1) * ($SampleSize - 2) * ($SampleSize - 3))) * $SumOfScores) - ((3 * (($SampleSize - 1) ** 2))/(($SampleSize - 2) * ($SampleSize - 3))); 396 return $Kurtosis; 397 } 398 399 # Return the smallest value from an array of numbers 400 sub Minimum { 401 my($XArrayRef) = @_; 402 return KSmallest($XArrayRef, 1); 403 } 404 405 # Return the largest value from an array of numbers 406 sub Maximum { 407 my($XArrayRef) = @_; 408 return KLargest($XArrayRef, 1); 409 } 410 411 # Compute the mean of an array of numbers: SUM( x[i] ) / n 412 sub Mean { 413 my($XArrayRef) = @_; 414 415 if (!@$XArrayRef) { 416 return undef; 417 } 418 my($Mean, $Sum, $Value); 419 $Sum = 0; 420 foreach $Value (@$XArrayRef) { 421 $Sum += $Value; 422 } 423 $Mean = $Sum / @$XArrayRef; 424 return $Mean; 425 } 426 427 # Compute the median value of an array of numbers. For an even number array, it's 428 # the average of two middle values. 429 # 430 # For even values of n: Xsorted[(n - 1)/2 + 1] 431 # For odd values of n: (Xsorted[n/2] + Xsorted[n/2 + 1])/2 432 # 433 sub Median { 434 my($XArrayRef) = @_; 435 436 if (!@$XArrayRef) { 437 return undef; 438 } 439 my($Median, @SortedXArray); 440 $Median = 0; 441 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 442 if (@$XArrayRef % 2) { 443 my($MidIndex); 444 $MidIndex = int(@SortedXArray - 1)/2; 445 $Median = $SortedXArray[$MidIndex]; 446 } 447 else { 448 # Even number array... 449 my($MidPosition); 450 $MidPosition = int(@SortedXArray / 2); 451 $Median = ($SortedXArray[$MidPosition - 1] + $SortedXArray[$MidPosition]) / 2; 452 } 453 return $Median; 454 } 455 456 # Return the most frequently occuring value in an array of numbers 457 sub Mode { 458 my($XArrayRef) = @_; 459 460 if (!@$XArrayRef) { 461 return undef; 462 } 463 my($Value, %ValueToCountMap, @CountList, @SortedCountList); 464 %ValueToCountMap = (); 465 @CountList = (); 466 @SortedCountList = (); 467 for $Value (@$XArrayRef) { 468 if (exists $ValueToCountMap{$Value}) { 469 $ValueToCountMap{$Value} += 1; 470 } 471 else { 472 $ValueToCountMap{$Value} = 1; 473 } 474 } 475 for $Value (keys %ValueToCountMap) { 476 push @CountList, $ValueToCountMap{$Value}; 477 } 478 @SortedCountList = sort { $b <=> $a } @CountList; 479 480 # Make sure the frequency of mode value is greater than one and check for 481 # multiple modes as well... 482 # 483 my($ModeCount, $ModeValue); 484 $ModeCount = $SortedCountList[0]; 485 if ($ModeCount <= 1) { 486 return undef; 487 } 488 # Get the first mode value... 489 VALUE: for $Value (keys %ValueToCountMap) { 490 if ($ValueToCountMap{$Value} == $ModeCount) { 491 $ModeValue = $Value; 492 # Set it to zero to skip it next time... 493 $ValueToCountMap{$Value} = 0; 494 last VALUE; 495 } 496 } 497 498 if (wantarray) { 499 # Retrieve all the modes... 500 my(@Modes, $Count); 501 @Modes = (); 502 push @Modes, $ModeValue; 503 for $Count (@SortedCountList) { 504 if ($Count == $ModeCount) { 505 VALUE: for $Value (keys %ValueToCountMap) { 506 if ($ValueToCountMap{$Value} == $ModeCount) { 507 push @Modes, $Value; 508 # Set it to zero to skip it next time... 509 $ValueToCountMap{$Value} = 0; 510 last VALUE; 511 } 512 } 513 } 514 } 515 return sort {$b <=> $a} @Modes; 516 } 517 else { 518 return $ModeValue; 519 } 520 } 521 522 523 # Compute the Pearson correlation coefficient between two arrays of numbers: 524 # 525 # SUM( (x[i] - Xmean)(y[i] - Ymean) ) / SQRT( SUM( (x[i] - Xmean)^2 )(SUM( (y[i] - Ymean)^2 )) ) 526 # 527 # It returns values in the range from -1.0 to 1.0 528 sub PearsonCorrelation { 529 my($XArrayRef, $YArrayRef) = @_; 530 531 if (!(@$XArrayRef && @$YArrayRef && (@$XArrayRef == @$YArrayRef))) { 532 return undef; 533 } 534 my($Correlation, $XMean, $YMean, $Index, $XValueDeviation, $YValueDeviation, $SquareOfXDeviations, $SquareOfYDeviations, $ProductOfDeviations); 535 536 $Correlation = 0; 537 $XMean = Mean($XArrayRef); 538 $YMean = Mean($YArrayRef); 539 $ProductOfDeviations = 0; $SquareOfXDeviations = 0; $SquareOfYDeviations = 0; 540 for $Index (0 .. $#{@$XArrayRef}) { 541 $XValueDeviation = $XArrayRef->[$Index] - $XMean; 542 $YValueDeviation = $YArrayRef->[$Index] - $YMean; 543 $ProductOfDeviations += ($XValueDeviation * $YValueDeviation); 544 $SquareOfXDeviations += $XValueDeviation ** 2; 545 $SquareOfYDeviations += $YValueDeviation ** 2; 546 } 547 $Correlation = $ProductOfDeviations / sqrt($SquareOfXDeviations * $SquareOfYDeviations); 548 return $Correlation; 549 } 550 551 # Return the smallest and largest values from an array of numbers 552 sub Range { 553 my($XArrayRef) = @_; 554 555 if (!@$XArrayRef) { 556 return (undef, undef); 557 } 558 my($Smallest, $Largest, @SortedXArray); 559 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 560 $Smallest = $SortedXArray[0]; 561 $Largest = $SortedXArray[$#SortedXArray]; 562 return ($Smallest, $Largest); 563 } 564 565 # Compute square of the Pearson correlation coefficient between two arrays of numbers. 566 # 567 sub RSquare { 568 my($XArrayRef, $YArrayRef) = @_; 569 my($RSquare, $Correlation); 570 571 $RSquare = undef; 572 $Correlation = PearsonCorrelation($XArrayRef, $YArrayRef); 573 if (defined $Correlation) { 574 $RSquare = $Correlation ** 2; 575 } 576 return $RSquare; 577 } 578 579 # Compute the skewness of an array of numbers: 580 # {n/(n - 1)(n - 2)} SUM{ ((x[i] - Xmean)/STDDEV)^3 } 581 # 582 sub Skewness { 583 my($XArrayRef) = @_; 584 585 if (!@$XArrayRef || ((@$XArrayRef - 2) <= 0)) { 586 return undef; 587 } 588 my($Skewness, $Mean, $StandardDeviation, $Value); 589 $Mean = Mean($XArrayRef); 590 if (!defined $Mean) { 591 return undef; 592 } 593 $StandardDeviation = StandardDeviation($XArrayRef); 594 if (!(defined $StandardDeviation && $StandardDeviation != 0)) { 595 return undef; 596 } 597 598 my($SumOfScores, $SampleSize); 599 $SumOfScores = 0; 600 for $Value (@$XArrayRef) { 601 $SumOfScores += (($Value - $Mean)/$StandardDeviation) ** 3; 602 } 603 $SampleSize = @$XArrayRef; 604 $Skewness = ($SampleSize/(($SampleSize - 1) * ($SampleSize - 2) )) * $SumOfScores; 605 return $Skewness; 606 } 607 608 # Compute the standard deviation of an array of numbers 609 sub StandardDeviation { 610 my($XArrayRef) = @_; 611 return _CalculateStandardDeviation($XArrayRef, 2); 612 } 613 614 # Compute the standard deviation of an array of numbers representing entire population 615 sub StandardDeviationN { 616 my($XArrayRef) = @_; 617 return _CalculateStandardDeviation($XArrayRef, 1); 618 } 619 620 # Compute the standard deviation of an array of numbers. 621 # Mode 1: SQRT ( SUM( (x[i] - mean)^2 ) / n ) 622 # Mode 2: SQRT ( SUM( (x[i] - mean)^2 ) / (n - 1) ) 623 # 624 sub _CalculateStandardDeviation { 625 my($XArrayRef, $Mode) = @_; 626 627 if (!@$XArrayRef) { 628 return undef; 629 } 630 my($StandardDeviation, $Value, $SquareOfDeviations, $Mean, $N); 631 632 $StandardDeviation = 0; 633 $Mean = Mean($XArrayRef); 634 $SquareOfDeviations = 0; 635 foreach $Value (@$XArrayRef) { 636 $SquareOfDeviations += ($Value - $Mean) ** 2; 637 } 638 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 639 $StandardDeviation = sqrt($SquareOfDeviations / $N); 640 641 return $StandardDeviation; 642 } 643 644 # Compute the standard error using standard deviation and sample size 645 sub StandardError { 646 my($StandardDeviation, $Count) = @_; 647 648 if ($Count <= 0) { 649 return undef; 650 } 651 my($StandardError); 652 $StandardError = $StandardDeviation / sqrt($Count); 653 654 return $StandardError; 655 } 656 657 # Standardize the value using mean and standard deviation 658 sub Standardize { 659 my($Value, $Mean, $StandardDeviation) = @_; 660 661 if ($StandardDeviation <= 0) { 662 return undef; 663 } 664 my($StandardizedValue); 665 $StandardizedValue = ($Value - $Mean)/$StandardDeviation; 666 667 return $StandardizedValue; 668 } 669 670 # Compute the standard deviation above the mean for an array of numbers. 671 sub StandardScores { 672 my($XArrayRef) = @_; 673 return _CalculateStandardScores($XArrayRef, 2); 674 } 675 676 # Compute the standard deviation above the mean for an array of numbers representing entire population 677 sub StandardScoresN { 678 my($XArrayRef) = @_; 679 return _CalculateStandardScores($XArrayRef, 1); 680 } 681 682 # Compute the standard deviation above the mean for an array of numbers. 683 # Mode 1: (x[i] - mean) / n 684 # Mode 2: (x[i] - mean) / (n - 1) 685 # 686 sub _CalculateStandardScores { 687 my($XArrayRef, $Mode) = @_; 688 689 if (!@$XArrayRef) { 690 return undef; 691 } 692 my(@StandardScores, $Mean, $StandardDeviation, $Value); 693 694 $Mean = Mean($XArrayRef); 695 $StandardDeviation = _CalculateStandardDeviation($XArrayRef, $Mode); 696 if (!(defined($StandardDeviation) && $StandardDeviation > 0)) { 697 return undef; 698 } 699 @StandardScores = (); 700 for $Value (@$XArrayRef) { 701 push @StandardScores, ($Value - $Mean)/$StandardDeviation; 702 } 703 704 return @StandardScores; 705 } 706 707 # Compute the product of an array of numbers 708 sub Product { 709 my($XArrayRef) = @_; 710 711 if (!@$XArrayRef) { 712 return undef; 713 } 714 my($Product, $Value); 715 $Product = 1; 716 foreach $Value (@$XArrayRef) { 717 $Product *= $Value; 718 } 719 return $Product; 720 } 721 722 # Compute the sum of an array of numbers 723 sub Sum { 724 my($XArrayRef) = @_; 725 726 if (!@$XArrayRef) { 727 return undef; 728 } 729 my($Sum, $Value); 730 $Sum = 0; 731 foreach $Value (@$XArrayRef) { 732 $Sum += $Value; 733 } 734 return $Sum; 735 } 736 737 # Compute the sum of squares of an array of numbers 738 sub SumOfSquares { 739 my($XArrayRef) = @_; 740 741 if (!@$XArrayRef) { 742 return undef; 743 } 744 my($SumOfSquares, $Value); 745 $SumOfSquares = 0; 746 foreach $Value (@$XArrayRef) { 747 $SumOfSquares += $Value ** 2; 748 } 749 return $SumOfSquares; 750 } 751 752 # Compute the mean of an array of numbers by excluding a fraction of 753 # numbers from the top and bottom of the data set. 754 sub TrimMean { 755 my($XArrayRef, $FractionToExclude) = @_; 756 757 if (!(@$XArrayRef && $FractionToExclude > 0 && $FractionToExclude <= 1)) { 758 return undef; 759 } 760 my($NumberToExclude); 761 $NumberToExclude = int(@$XArrayRef * $FractionToExclude); 762 $NumberToExclude = ($NumberToExclude % 2) ? ($NumberToExclude - 1) : $NumberToExclude; 763 if ($NumberToExclude == @$XArrayRef) { 764 return undef; 765 } 766 my($Mean, $Sum, $Index, $FirstIndex, $LastIndex); 767 $FirstIndex = $NumberToExclude/2; 768 $LastIndex = @$XArrayRef - ($NumberToExclude/2) - 1; 769 $Sum = 0; 770 my(@SortedXArray); 771 @SortedXArray = sort { $a <=> $b } @$XArrayRef; 772 for $Index ($FirstIndex .. $LastIndex) { 773 $Sum += $SortedXArray[$Index]; 774 } 775 $Mean = $Sum/(@SortedXArray - $NumberToExclude); 776 return $Mean; 777 } 778 779 # Compute the variance of an array of numbers 780 sub Variance { 781 my($XArrayRef) = @_; 782 return _CalculateVariance($XArrayRef, 2); 783 } 784 785 # Compute the variance of an array of numbers representing entire population 786 sub VarianceN { 787 my($XArrayRef) = @_; 788 return _CalculateVariance($XArrayRef, 1); 789 } 790 791 # Compute the variance of an array of numbers: 792 # Mode 1: SUM( (x[i] - Xmean)^2 / n ) 793 # Mode 2: SUM( (x[i] - Xmean)^2 / (n - 1) ) 794 # 795 sub _CalculateVariance { 796 my($XArrayRef, $Mode) = @_; 797 798 if (!@$XArrayRef) { 799 return undef; 800 } 801 my($Variance, $Value, $SquareOfDeviations, $Mean, $N); 802 803 $Variance = 0; 804 $Mean = Mean($XArrayRef); 805 $SquareOfDeviations = 0; 806 foreach $Value (@$XArrayRef) { 807 $SquareOfDeviations += ($Value - $Mean) ** 2; 808 } 809 $N = ($Mode == 1) ? @$XArrayRef : (@$XArrayRef - 1); 810 $Variance = $SquareOfDeviations / $N; 811 812 return $Variance; 813 } 814