MayaChemTools

   1 package Fingerprints::FingerprintsVector;
   2 #
   3 # File: FingerprintsVector.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use MathUtil ();
  31 use TextUtil ();
  32 use StatisticsUtil ();
  33 use BitVector;
  34 use Vector;
  35 
  36 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  37 
  38 @ISA = qw(Exporter);
  39 
  40 # Distance coefficients
  41 my(@DistanceCoefficients) = qw(CityBlockDistanceCoefficient EuclideanDistanceCoefficient HammingDistanceCoefficient ManhattanDistanceCoefficient SoergelDistanceCoefficient);
  42 
  43 # Similarity coefficients...
  44 my(@SimilarityCoefficients) = qw(CosineSimilarityCoefficient CzekanowskiSimilarityCoefficient DiceSimilarityCoefficient OchiaiSimilarityCoefficient JaccardSimilarityCoefficient SorensonSimilarityCoefficient TanimotoSimilarityCoefficient);
  45 
  46 # New from string...
  47 my(@NewFromString) = qw(NewFromValuesString NewFromValuesAndIDsString NewFromIDsAndValuesString NewFromValuesAndIDsPairsString NewFromIDsAndValuesPairsString);
  48 
  49 @EXPORT = qw(IsFingerprintsVector);
  50 @EXPORT_OK = qw(GetSupportedDistanceCoefficients GetSupportedSimilarityCoefficients GetSupportedDistanceAndSimilarityCoefficients @DistanceCoefficients @SimilarityCoefficients);
  51 
  52 %EXPORT_TAGS = (
  53                 new => [@NewFromString],
  54                 distancecoefficients => [@DistanceCoefficients],
  55                 similaritycoefficients => [@SimilarityCoefficients],
  56                 all  => [@EXPORT, @EXPORT_OK]
  57                );
  58 
  59 # Setup class variables...
  60 my($ClassName);
  61 _InitializeClass();
  62 
  63 # Overload Perl functions...
  64 use overload '""' => 'StringifyFingerprintsVector';
  65 
  66 # Class constructor...
  67 sub new {
  68   my($Class, %NamesAndValues) = @_;
  69 
  70   # Initialize object...
  71   my $This = {};
  72   bless $This, ref($Class) || $Class;
  73 
  74   $This->_InitializeFingerprintsVector();
  75 
  76   $This->_InitializeFingerprintsVectorProperties(%NamesAndValues);
  77 
  78   return $This;
  79 }
  80 
  81 # Initialize object data...
  82 #
  83 sub _InitializeFingerprintsVector {
  84   my($This) = @_;
  85 
  86   # Type of fingerprint vector...
  87   $This->{Type} = '';
  88 
  89   # Fingerprint vector values...
  90   @{$This->{Values}} = ();
  91 
  92   # Fingerprint vector value IDs...
  93   @{$This->{ValueIDs}} = ();
  94 
  95   return $This;
  96 }
  97 
  98 # Initialize class ...
  99 sub _InitializeClass {
 100   #Class name...
 101   $ClassName = __PACKAGE__;
 102 }
 103 
 104 # Initialize object properties....
 105 sub _InitializeFingerprintsVectorProperties {
 106   my($This, %NamesAndValues) = @_;
 107 
 108   my($Name, $Value, $MethodName);
 109   while (($Name, $Value) = each  %NamesAndValues) {
 110     $MethodName = "Set${Name}";
 111     $This->$MethodName($Value);
 112   }
 113 
 114   if (!exists $NamesAndValues{Type}) {
 115     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type...";
 116   }
 117   return $This;
 118 }
 119 
 120 # Create a new fingerprints vector using space delimited values string. This functionality can be
 121 # either invoked as a class function or an object method.
 122 #
 123 sub NewFromValuesString ($$;$) {
 124   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 125   my($This, $Type, $ValuesString);
 126 
 127   if (@_ == 3) {
 128     ($This, $Type, $ValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 129   }
 130   else {
 131     ($This, $Type, $ValuesString) = (undef, $FirstParameter, $SecondParameter);
 132   }
 133   my($FingerprintsVector, @Values);
 134 
 135   @Values = ();
 136   if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
 137     @Values = split(' ', $ValuesString);
 138   }
 139 
 140   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values);
 141 
 142   return $FingerprintsVector;
 143 }
 144 
 145 # Create a new fingerprints vector using values and IDs string containing semicolon
 146 # delimited value string and value IDs strings. The values within value and value IDs
 147 # string are delimited by spaces.
 148 #
 149 # This functionality can be either invoked as a class function or an object method.
 150 #
 151 sub NewFromValuesAndIDsString ($$;$) {
 152   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 153   my($This, $Type, $ValuesAndIDsString);
 154 
 155   if (@_ == 3) {
 156     ($This, $Type, $ValuesAndIDsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 157   }
 158   else {
 159     ($This, $Type, $ValuesAndIDsString) = (undef, $FirstParameter, $SecondParameter);
 160   }
 161   my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
 162 
 163   ($ValuesString, $ValueIDsString) = split(';', $ValuesAndIDsString);
 164 
 165   @Values = ();
 166   if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
 167     @Values = split(' ', $ValuesString);
 168   }
 169   @ValueIDs = ();
 170   if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
 171     @ValueIDs = split(' ', $ValueIDsString);
 172   }
 173 
 174   if (@Values != @ValueIDs ) {
 175     carp "Warning: ${ClassName}->NewFromValuesAndIDsString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs .  "...";
 176     return undef;
 177   }
 178 
 179   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 180 
 181   return $FingerprintsVector;
 182 }
 183 
 184 # Create a new fingerprints vector using IDs and values string containing semicolon
 185 # delimited value IDs string and values strings. The values within value and value IDs
 186 # string are delimited by spaces.
 187 #
 188 # This functionality can be either invoked as a class function or an object method.
 189 #
 190 sub NewFromIDsAndValuesString ($$;$) {
 191   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 192   my($This, $Type, $IDsAndValuesString);
 193 
 194   if (@_ == 3) {
 195     ($This, $Type, $IDsAndValuesString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 196   }
 197   else {
 198     ($This, $Type, $IDsAndValuesString) = (undef, $FirstParameter, $SecondParameter);
 199   }
 200   my($FingerprintsVector, $ValuesString, $ValueIDsString, @Values, @ValueIDs);
 201 
 202   ($ValueIDsString, $ValuesString) = split(';', $IDsAndValuesString);
 203 
 204   @Values = ();
 205   if (defined($ValuesString) && length($ValuesString) && $ValuesString !~ /^None$/i) {
 206     @Values = split(' ', $ValuesString);
 207   }
 208   @ValueIDs = ();
 209   if (defined($ValueIDsString) && length($ValueIDsString) && $ValueIDsString !~ /^None$/i) {
 210     @ValueIDs = split(' ', $ValueIDsString);
 211   }
 212 
 213   if (@Values != @ValueIDs ) {
 214     carp "Warning: ${ClassName}->NewFromIDsAndValuesString: Object can't be instantiated: Number specified values, " . scalar @Values . ", must be equal to number of specified value IDs, " . scalar @ValueIDs .  "...";
 215     return undef;
 216   }
 217 
 218   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 219 
 220   return $FingerprintsVector;
 221 }
 222 
 223 # Create a new fingerprints vector using values and IDs pairs string containing space
 224 # value and value IDs pairs.
 225 #
 226 # This functionality can be either invoked as a class function or an object method.
 227 #
 228 sub NewFromValuesAndIDsPairsString ($$;$) {
 229   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 230   my($This, $Type, $ValuesAndIDsPairsString);
 231 
 232   if (@_ == 3) {
 233     ($This, $Type, $ValuesAndIDsPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 234   }
 235   else {
 236     ($This, $Type, $ValuesAndIDsPairsString) = (undef, $FirstParameter, $SecondParameter);
 237   }
 238   my($FingerprintsVector, $Index, @Values, @ValueIDs, @ValuesAndIDsPairs);
 239 
 240   @ValuesAndIDsPairs = split(' ', $ValuesAndIDsPairsString);
 241   if (@ValuesAndIDsPairs % 2) {
 242     carp "Warning: ${ClassName}->NewFromValuesAndIDsPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
 243     return undef;
 244   }
 245 
 246   @Values = (); @ValueIDs = ();
 247   if (!(@ValuesAndIDsPairs == 2 && $ValuesAndIDsPairs[0] =~ /^None$/i && $ValuesAndIDsPairs[1] =~ /^None$/i)) {
 248     for ($Index = 0; $Index < $#ValuesAndIDsPairs; $Index += 2) {
 249       push @Values, $ValuesAndIDsPairs[$Index];
 250       push @ValueIDs, $ValuesAndIDsPairs[$Index + 1];
 251     }
 252   }
 253   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 254 
 255   return $FingerprintsVector;
 256 }
 257 
 258 # Create a new fingerprints vector using IDs and values pairs string containing space
 259 # value IDs and valus pairs.
 260 #
 261 # This functionality can be either invoked as a class function or an object method.
 262 #
 263 sub NewFromIDsAndValuesPairsString ($$;$) {
 264   my($FirstParameter, $SecondParameter, $ThirdParamater) = @_;
 265   my($This, $Type, $IDsAndValuesPairsString);
 266 
 267   if (@_ == 3) {
 268     ($This, $Type, $IDsAndValuesPairsString) = ($FirstParameter, $SecondParameter, $ThirdParamater);
 269   }
 270   else {
 271     ($This, $Type, $IDsAndValuesPairsString) = (undef, $FirstParameter, $SecondParameter);
 272   }
 273   my($FingerprintsVector, $Index, @Values, @ValueIDs, @IDsAndValuesPairs);
 274 
 275   @IDsAndValuesPairs = split(' ', $IDsAndValuesPairsString);
 276   if (@IDsAndValuesPairs % 2) {
 277     croak "Error: ${ClassName}->NewFromIDsAndValuesPairsString: No fingerprint vector created: Invalid values and IDs pairs data: Input list must contain even number of values and IDs pairs...";
 278     return undef;
 279   }
 280 
 281   @Values = (); @ValueIDs = ();
 282   if (!(@IDsAndValuesPairs == 2 && $IDsAndValuesPairs[0] =~ /^None$/i && $IDsAndValuesPairs[1] =~ /^None$/i)) {
 283     for ($Index = 0; $Index < $#IDsAndValuesPairs; $Index += 2) {
 284       push @ValueIDs, $IDsAndValuesPairs[$Index];
 285       push @Values, $IDsAndValuesPairs[$Index + 1];
 286     }
 287   }
 288   $FingerprintsVector = new Fingerprints::FingerprintsVector('Type' => $Type, 'Values' => \@Values, 'ValueIDs' => \@ValueIDs);
 289 
 290   return $FingerprintsVector;
 291 }
 292 
 293 # Set type of fingerprint vector. Supported types are: OrderedNumericalValues, NumericalValues, and
 294 # AlphaNumericalValues
 295 #
 296 #  .  For OrderedNumericalValues type, both vectors must be of the same size and contain similar
 297 #     types of numerical values in the same order.
 298 #
 299 #  .  For NumericalValues type, vector value IDs for both vectors must be specified; however, their
 300 #     size and order of IDs and numerical values may be different. For each vector, value IDs must
 301 #     correspond to vector values.
 302 #
 303 #  .  For AlphaNumericalValues type, vectors may contain both numerical and alphanumerical values
 304 #     and their sizes may be different.
 305 #
 306 sub SetType {
 307   my($This, $Type) = @_;
 308 
 309   if ($Type !~ /^(OrderedNumericalValues|NumericalValues|AlphaNumericalValues)$/i) {
 310     croak "Error: ${ClassName}->SetType: Specified value, $Type, for Type is not vaild. Supported types in current release of MayaChemTools: OrderedNumericalValues, NumericalValues or AlphaNumericalValues";
 311   }
 312 
 313   if ($This->{Type}) {
 314     croak "Error: ${ClassName}->SetType: Can't change intial fingerprints vector type:  It's already set...";
 315   }
 316   $This->{Type} = $Type;
 317 
 318   return $This;
 319 }
 320 
 321 # Get fingerpints vector type...
 322 #
 323 sub GetType {
 324   my($This) = @_;
 325 
 326   return $This->{Type};
 327 }
 328 
 329 # Set ID...
 330 sub SetID {
 331   my($This, $Value) = @_;
 332 
 333   $This->{ID} = $Value;
 334 
 335   return $This;
 336 }
 337 
 338 # Get ID...
 339 sub GetID {
 340   my($This) = @_;
 341 
 342   return exists $This->{ID} ? $This->{ID} : 'None';
 343 }
 344 
 345 # Set description...
 346 sub SetDescription {
 347   my($This, $Value) = @_;
 348 
 349   $This->{Description} = $Value;
 350 
 351   return $This;
 352 }
 353 
 354 # Get description...
 355 sub GetDescription {
 356   my($This) = @_;
 357 
 358   return exists $This->{Description} ? $This->{Description} : 'No description available';
 359 }
 360 
 361 # Set vector type...
 362 sub SetVectorType {
 363   my($This, $Value) = @_;
 364 
 365   $This->{VectorType} = $Value;
 366 
 367   return $This;
 368 }
 369 
 370 # Get vector type...
 371 sub GetVectorType {
 372   my($This) = @_;
 373 
 374   return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsVector';
 375 }
 376 
 377 # Set values of a fingerprint vector using a vector, reference to an array or an array...
 378 #
 379 sub SetValues {
 380   my($This, @Values) = @_;
 381 
 382   $This->_SetOrAddValuesOrValueIDs("SetValues", @Values);
 383 
 384   return $This;
 385 }
 386 
 387 # Set value IDs of a fingerprint vector using a vector, reference to an array or an array...
 388 #
 389 sub SetValueIDs {
 390   my($This, @Values) = @_;
 391 
 392   $This->_SetOrAddValuesOrValueIDs("SetValueIDs", @Values);
 393 
 394   return $This;
 395 }
 396 
 397 # Add values to a fingerprint vector using a vector, reference to an array or an array...
 398 #
 399 sub AddValues {
 400   my($This, @Values) = @_;
 401 
 402   $This->_SetOrAddValuesOrValueIDs("AddValues", @Values);
 403 
 404   return $This;
 405 }
 406 
 407 # Add value IDs to a fingerprint vector using a vector, reference to an array or an array...
 408 #
 409 sub AddValueIDs {
 410   my($This, @Values) = @_;
 411 
 412   $This->_SetOrAddValuesOrValueIDs("AddValueIDs", @Values);
 413 
 414   return $This;
 415 }
 416 
 417 # Set or add values or value IDs using:
 418 #
 419 #    o List of values or ValueIDs
 420 #    o Reference to an list of values or ValuesIDs
 421 #    o A vector containing values or ValueIDs
 422 #
 423 sub _SetOrAddValuesOrValueIDs {
 424   my($This, $Mode, @Values) = @_;
 425 
 426   if (!@Values) {
 427     return;
 428   }
 429 
 430   # Collect specified values or valueIDs...
 431   my($FirstValue, $TypeOfFirstValue, $ValuesRef);
 432 
 433   $FirstValue = $Values[0];
 434   $TypeOfFirstValue = ref $FirstValue;
 435   if ($TypeOfFirstValue =~ /^(SCALAR|HASH|CODE|REF|GLOB)/) {
 436     croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Trying to add values to vector object with a reference to unsupported value format...";
 437   }
 438 
 439   if (Vector::IsVector($FirstValue)) {
 440     # It's a vector...
 441     $ValuesRef = $FirstValue->GetValues();
 442   }
 443   elsif ($TypeOfFirstValue =~ /^ARRAY/) {
 444     # It's an array refernce...
 445     $ValuesRef = $FirstValue;
 446   }
 447   else {
 448     # It's a list of values...
 449     $ValuesRef = \@Values;
 450   }
 451 
 452   # Set or add values or value IDs...
 453   MODE: {
 454     if ($Mode =~ /^SetValues$/i) { @{$This->{Values}} = (); push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
 455     if ($Mode =~ /^SetValueIDs$/i) { @{$This->{ValueIDs}} = (); push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
 456     if ($Mode =~ /^AddValues$/i) { push @{$This->{Values}}, @{$ValuesRef}; last MODE; }
 457     if ($Mode =~ /^AddValueIDs$/i) { push @{$This->{ValueIDs}}, @{$ValuesRef}; last MODE; }
 458     croak "Error: ${ClassName}-> _SetOrAddValuesOrValueIDs: Unknown mode $Mode...";
 459   }
 460   return $This;
 461 }
 462 
 463 # Set a specific value in fingerprint vector with indicies starting from 0..
 464 #
 465 sub SetValue {
 466   my($This, $Index, $Value, $SkipCheck) = @_;
 467 
 468   # Just set it...
 469   if ($SkipCheck) {
 470     return $This->_SetValue($Index, $Value);
 471   }
 472 
 473   # Check and set...
 474   if ($Index < 0) {
 475     croak "Error: ${ClassName}->SetValue: Index value must be a positive number...";
 476   }
 477   if ($Index >= $This->GetNumOfValues()) {
 478     croak "Error: ${ClassName}->SetValue: Index vaue must be less than number of values...";
 479   }
 480 
 481   return $This->_SetValue($Index, $Value);
 482 }
 483 
 484 # Set a fingerprint vector value...
 485 #
 486 sub _SetValue {
 487   my($This, $Index, $Value) = @_;
 488 
 489   $This->{Values}[$Index] = $Value;
 490 
 491   return $This;
 492 }
 493 
 494 # Get a specific value from fingerprint vector with indicies starting from 0...
 495 #
 496 sub GetValue {
 497   my($This, $Index) = @_;
 498 
 499   if ($Index < 0) {
 500     croak "Error: ${ClassName}->GetValue: Index value must be a positive number...";
 501   }
 502   if ($Index >= $This->GetNumOfValues()) {
 503     croak "Error: ${ClassName}->GetValue: Index value must be less than number of values...";
 504   }
 505   return $This->_GetValue($Index);
 506 }
 507 
 508 # Get a fingerprint vector value...
 509 sub _GetValue {
 510   my($This, $Index) = @_;
 511 
 512   return $This->{Values}[$Index];
 513 }
 514 
 515 # Return vector values as an array or reference to an array...
 516 #
 517 sub GetValues {
 518   my($This) = @_;
 519 
 520   return wantarray ? @{$This->{Values}} : \@{$This->{Values}};
 521 }
 522 
 523 # Set a specific value ID in fingerprint vector with indicies starting from 0..
 524 #
 525 sub SetValueID {
 526   my($This, $Index, $Value, $SkipCheck) = @_;
 527 
 528   # Just set it...
 529   if ($SkipCheck) {
 530     return $This->_SetValueID($Index, $Value);
 531   }
 532 
 533   # Check and set...
 534   if ($Index < 0) {
 535     croak "Error: ${ClassName}->SetValueID: Index value must be a positive number...";
 536   }
 537   if ($Index >= $This->GetNumOfValueIDs()) {
 538     croak "Error: ${ClassName}->SetValueID: Index vaue must be less than number of value IDs...";
 539   }
 540 
 541   return $This->_SetValueID($Index, $Value);
 542 }
 543 
 544 # Set a fingerprint vector value ID...
 545 #
 546 sub _SetValueID {
 547   my($This, $Index, $Value) = @_;
 548 
 549   $This->{ValueIDs}[$Index] = $Value;
 550 
 551   return $This;
 552 }
 553 
 554 # Get a specific value ID from fingerprint vector with indicies starting from 0...
 555 #
 556 sub GetValueID {
 557   my($This, $Index) = @_;
 558 
 559   if ($Index < 0) {
 560     croak "Error: ${ClassName}->GetValueID: Index value must be a positive number...";
 561   }
 562   if ($Index >= $This->GetNumOfValueIDs()) {
 563     croak "Error: ${ClassName}->GetValueID: Index value must be less than number of value IDs...";
 564   }
 565   return $This->_GetValueID($Index);
 566 }
 567 
 568 # Get a fingerprint vector value ID...
 569 #
 570 sub _GetValueID {
 571   my($This, $Index) = @_;
 572 
 573   return $This->{ValueIDs}[$Index];
 574 }
 575 
 576 # Return vector value IDs as an array or reference to an array...
 577 #
 578 sub GetValueIDs {
 579   my($This) = @_;
 580 
 581   return wantarray ? @{$This->{ValueIDs}} : \@{$This->{ValueIDs}};
 582 }
 583 
 584 # Get fingerprints vector string containing values and/or IDs string in a specifed format...
 585 #
 586 sub GetFingerprintsVectorString {
 587   my($This, $Format) = @_;
 588 
 589   FORMAT : {
 590     if ($Format =~ /^(IDsAndValuesString|IDsAndValues)$/i) { return $This->GetIDsAndValuesString(); last FORMAT; }
 591     if ($Format =~ /^(IDsAndValuesPairsString|IDsAndValuesPairs)$/i) { return $This->GetIDsAndValuesPairsString(); last FORMAT; }
 592     if ($Format =~ /^(ValuesAndIDsString|ValuesAndIDs)$/i) { return $This->GetValuesAndIDsString(); last FORMAT; }
 593     if ($Format =~ /^(ValuesAndIDsPairsString|ValuesAndIDsPairs)$/i) { return $This->GetValuesAndIDsPairsString(); last FORMAT;}
 594     if ($Format =~ /^(ValueIDsString|ValueIDs)$/i) { return $This->GetValueIDsString(); last FORMAT; }
 595     if ($Format =~ /^(ValuesString|Values)$/i) { return $This->GetValuesString(); last FORMAT; }
 596     croak "Error: ${ClassName}->GetFingerprintsVectorString: Specified vector string format, $Format, is not supported. Value values: IDsAndValuesString, IDsAndValues, IDsAndValuesPairsString, IDsAndValuesPairs, ValuesAndIDsString, ValuesAndIDs, ValuesAndIDsPairsString, ValuesAndIDsPairs, ValueIDsString, ValueIDs, ValuesString, Values...";
 597   }
 598   return '';
 599 }
 600 # Get vector value IDs and values string as space delimited ASCII string separated
 601 # by semicolon...
 602 #
 603 sub GetIDsAndValuesString {
 604   my($This) = @_;
 605 
 606   if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
 607     # Both IDs and values are available...
 608     return join(' ', @{$This->{ValueIDs}}) . ";" . join(' ', @{$This->{Values}});
 609   }
 610   elsif (@{$This->{Values}}) {
 611     # Only values are available...
 612     return "None;" . join(' ', @{$This->{Values}});
 613   }
 614   else {
 615     # Values are not available...
 616     return "None;None";
 617   }
 618 }
 619 
 620 # Get vector value IDs and value pairs string as space delimited ASCII string...
 621 #
 622 sub GetIDsAndValuesPairsString {
 623   my($This) = @_;
 624   my($Index, $ValueIDsPresent, @IDsAndValuesPairs);
 625 
 626   if (!@{$This->{Values}}) {
 627     # Values are unavailable...
 628     return "None None";
 629   }
 630 
 631   $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
 632 
 633   @IDsAndValuesPairs = ();
 634   for $Index (0 .. $#{$This->{Values}}) {
 635     if ($ValueIDsPresent) {
 636       push @IDsAndValuesPairs, ($This->{ValueIDs}->[$Index], $This->{Values}->[$Index]);
 637     }
 638     else {
 639       push @IDsAndValuesPairs, ('None', $This->{Values}->[$Index]);
 640     }
 641   }
 642   return join(' ', @IDsAndValuesPairs);
 643 }
 644 
 645 # Get vector value and value IDs string as space delimited ASCII string separated
 646 # by semicolon...
 647 #
 648 sub GetValuesAndIDsString {
 649   my($This) = @_;
 650 
 651   if (@{$This->{ValueIDs}} && @{$This->{Values}}) {
 652     # Both IDs and values are available...
 653     return join(' ', @{$This->{Values}}) . ";" . join(' ', @{$This->{ValueIDs}});
 654   }
 655   elsif (@{$This->{Values}}) {
 656     # Only values are available...
 657     return join(' ', @{$This->{Values}}) . ";None";
 658   }
 659   else {
 660     # Values are not available...
 661     return "None;None";
 662   }
 663 }
 664 
 665 # Get vector value and value ID pairs string as space delimited ASCII string...
 666 #
 667 sub GetValuesAndIDsPairsString {
 668   my($This) = @_;
 669   my($Index, $ValueIDsPresent, @ValuesAndIDsPairs);
 670 
 671   if (!@{$This->{Values}}) {
 672     # Values are unavailable...
 673     return "None None";
 674   }
 675 
 676   $ValueIDsPresent = @{$This->{ValueIDs}} ? 1 : 0;
 677 
 678   @ValuesAndIDsPairs = ();
 679   for $Index (0 .. $#{$This->{Values}}) {
 680     if ($ValueIDsPresent) {
 681       push @ValuesAndIDsPairs, ($This->{Values}->[$Index], $This->{ValueIDs}->[$Index]);
 682     }
 683     else {
 684       push @ValuesAndIDsPairs, ($This->{Values}->[$Index], 'None');
 685     }
 686   }
 687   return join(' ', @ValuesAndIDsPairs);
 688 }
 689 
 690 # Get vector value IDs string as space delimited ASCII string...
 691 #
 692 sub GetValueIDsString {
 693   my($This) = @_;
 694 
 695   return @{$This->{ValueIDs}} ? join(' ', @{$This->{ValueIDs}}) : 'None';
 696 }
 697 
 698 # Get vector value string as space delimited ASCII string...
 699 #
 700 sub GetValuesString {
 701   my($This) = @_;
 702 
 703   return @{$This->{Values}} ? join(' ', @{$This->{Values}}) : 'None';
 704 }
 705 
 706 # Get number of values...
 707 sub GetNumOfValues {
 708   my($This) = @_;
 709 
 710   return scalar @{$This->{Values}};
 711 }
 712 
 713 # Get number of non-zero values...
 714 sub GetNumOfNonZeroValues {
 715   my($This) = @_;
 716   my($Count, $Index, $Size);
 717 
 718   $Count = 0;
 719   $Size = $This->GetNumOfValues();
 720 
 721   for $Index (0 .. ($Size -1)) {
 722     if ($This->{Values}[$Index] != 0) {
 723       $Count++;
 724     }
 725   }
 726   return $Count;
 727 }
 728 
 729 # Get number of value IDs...
 730 sub GetNumOfValueIDs {
 731   my($This) = @_;
 732 
 733   return scalar @{$This->{ValueIDs}};
 734 }
 735 
 736 # FinegerprintsVectors class provides methods to calculate similarity between vectors
 737 # containing three different types of values:
 738 #
 739 # Type I: OrderedNumericalValues
 740 #
 741 #   . Size of two vectors are same
 742 #   . Vectors contain real values in a specific order. For example: MACCS keys count, Topological
 743 #     pharnacophore atom pairs and so on.
 744 #   . Option to calculate similarity value using continious values or binary values
 745 #
 746 # Type II: UnorderedNumericalValues
 747 #
 748 #   . Size of two vectors might not be same
 749 #   . Vectors contain unordered real value identified by value IDs. For example: Toplogical atom pairs,
 750 #     Topological atom torsions and so on
 751 #   . Option to calculate similarity value using continous values or binary values
 752 #
 753 # Type III: AlphaNumericalValues
 754 #
 755 #   . Size of two vectors might not be same
 756 #   . Vectors contain unordered alphanumerical values. For example: Extended connectivity fingerprints,
 757 #     atom neighbothood fingerpritns.
 758 #   . The vector values are treated as keys or bit indices and similarity value is calculated accordingly.
 759 #
 760 # Before performing similarity or distance calculations between vectors containing UnorderedNumericalValues
 761 # or AlphaNumericalValues, the vectors are tranformed into vectors containing unique OrderedNumericalValues
 762 # using value IDs for UnorderedNumericalValues and values itself for AlphaNumericalValues.
 763 #
 764 # Three forms similarity or distance calculation between two vectors: AlgebraicForm, BinaryForm or
 765 # SetTheoreticForm.
 766 #
 767 # The value of an extra paramter, CalculationMode, passed to each similarity or distance function
 768 # controls the calculation. Supported values for CalculationMode: AlgebraicForm, BinaryForm and
 769 # SetTheoreticForm. Default: AlgebraicForm.
 770 #
 771 # For BinaryForm CalculationMode, the ordered list of processed final vector values containing the value or
 772 # count of each unique value type is simply converted into a binary vector containing 1s and 0s
 773 # corresponding to presence or absence of values before calculating similarity or distance between
 774 # two vectors.
 775 #
 776 # For two fingerprint vectors A and B of same size containing OrderedNumericalValues, let:
 777 #
 778 #  N = Number values in A or B
 779 #
 780 #  Xa = Values of vector A
 781 #  Xb = Values of vector B
 782 #
 783 #  Xai = Value of ith element in A
 784 #  Xbi = Value of ith element in B
 785 #
 786 #  SUM = Sum of i over N values
 787 #
 788 # For SetTheoreticForm of calculation between two vectors, let:
 789 #
 790 #  SetIntersectionXaXb = SUM ( MIN ( Xai, Xbi ) )
 791 #  SetDifferenceXaXb = SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) )
 792 #
 793 # For BinaryForm of calculation between two vectors, let:
 794 #
 795 #  Na = Number of bits set to "1" in A = SUM ( Xai )
 796 #  Nb = Number of bits set to "1" in B = SUM ( Xbi )
 797 #  Nc = Number of bits set to "1" in both A and B = SUM ( Xai * Xbi )
 798 #  Nd = Number of bits set to "0" in both A and B = SUM ( 1 - Xai - Xbi + Xai * Xbi)
 799 #
 800 #  N = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
 801 #
 802 # Additionally, for BinaryForm various values also correspond to:
 803 #
 804 #  Na = | Xa |
 805 #  Nb = | Xb |
 806 #  Nc = | SetIntersectionXaXb |
 807 #  Nd = N - | SetDifferenceXaXb |
 808 #
 809 #  | SetDifferenceXaXb | = N - Nd = Na + Nb - Nc + Nd - Nd = Na + Nb - Nc
 810 #                        =  | Xa | + | Xb | - | SetIntersectionXaXb |
 811 #
 812 # Various distance coefficients and similarity coefficients [ Ref 40, Ref 62, Ref 64 ] for a pair vectors A and B
 813 # in AlgebraicForm and BinaryForm are defined as follows:
 814 #
 815 # . CityBlockDistanceCoefficient: ( same as HammingDistanceCoefficient and ManhattanDistanceCoefficient)
 816 #
 817 #     . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
 818 #
 819 #     . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
 820 #
 821 #     . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
 822 #                        = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
 823 #
 824 # . CosineSimilarityCoefficient:  ( same as OchiaiSimilarityCoefficient)
 825 #
 826 #     . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
 827 #
 828 #     . BinaryForm: Nc / SQRT ( Na * Nb)
 829 #
 830 #     . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
 831 #                        = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
 832 #
 833 # . CzekanowskiSimilarityCoefficient: ( same as DiceSimilarityCoefficient and SorensonSimilarityCoefficient)
 834 #
 835 #     . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) )  ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
 836 #
 837 #     . BinaryForm: 2 * Nc / ( Na + Nb )
 838 #
 839 #     . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
 840 #                        = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
 841 #
 842 # . DiceSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and SorensonSimilarityCoefficient)
 843 #
 844 #     . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) )  ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
 845 #
 846 #     . BinaryForm: 2 * Nc / ( Na + Nb )
 847 #
 848 #     . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
 849 #                        = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
 850 #
 851 # . EuclideanDistanceCoefficient:
 852 #
 853 #     . AlgebraicForm: SQRT ( SUM ( ( ( Xai - Xbi ) ** 2 ) ) )
 854 #
 855 #     . BinaryForm: SQRT ( ( Na - Nc ) + ( Nb - Nc ) ) = SQRT ( Na + Nb - 2 * Nc )
 856 #
 857 #     . SetTheoreticForm: SQRT ( | SetDifferenceXaXb | - | SetIntersectionXaXb | )
 858 #                        = SQRT (  SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) )
 859 #
 860 # . HammingDistanceCoefficient:  ( same as CityBlockDistanceCoefficient and ManhattanDistanceCoefficient)
 861 #
 862 #     . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
 863 #
 864 #     . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
 865 #
 866 #     . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
 867 #                        = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
 868 #
 869 # . JaccardSimilarityCoefficient: ( same as TanimotoSimilarityCoefficient)
 870 #
 871 #     . AlgebraicForm:  SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
 872 #
 873 #     . BinaryForm:  Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
 874 #
 875 #     . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
 876 #                        = SUM ( MIN ( Xai, Xbi ) ) / (  SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
 877 #
 878 # . ManhattanDistanceCoefficient:  ( same as CityBlockDistanceCoefficient and HammingDistanceCoefficient)
 879 #
 880 #     . AlgebraicForm: SUM ( ABS ( Xai - Xbi ) )
 881 #
 882 #     . BinaryForm: ( Na - Nc ) + ( Nb - Nc ) = Na + Nb - 2 * Nc
 883 #
 884 #     . SetTheoreticForm: | SetDifferenceXaXb | - | SetIntersectionXaXb |
 885 #                        = SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) )
 886 #
 887 # . OchiaiSimilarityCoefficient:  ( same as CosineSimilarityCoefficient)
 888 #
 889 #     . AlgebraicForm: SUM ( Xai * Xbi ) / SQRT ( SUM ( Xai ** 2) * SUM ( Xbi ** 2) )
 890 #
 891 #     . BinaryForm: Nc / SQRT ( Na * Nb)
 892 #
 893 #     . SetTheoreticForm: | SetIntersectionXaXb | / SQRT ( |Xa| * |Xb| )
 894 #                        = SUM ( MIN ( Xai, Xbi ) ) / SQRT ( SUM ( Xai ) * SUM ( Xbi ) )
 895 #
 896 # . SorensonSimilarityCoefficient: ( same as CzekanowskiSimilarityCoefficient and DiceSimilarityCoefficient)
 897 #
 898 #     . AlgebraicForm: ( 2 * ( SUM ( Xai * Xbi ) )  ) / ( SUM ( Xai ** 2) + SUM ( Xbi **2 ) )
 899 #
 900 #     . BinaryForm: 2 * Nc / ( Na + Nb )
 901 #
 902 #     . SetTheoreticForm: 2 * | SetIntersectionXaXb | / ( |Xa| + |Xb| )
 903 #                        = 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) )
 904 #
 905 # . SoergelDistanceCoefficient:
 906 #
 907 #     . AlgebraicForm:  SUM ( ABS ( Xai - Xbi ) ) / SUM ( MAX ( Xai, Xbi ) )
 908 #
 909 #     . BinaryForm: 1 - Nc / ( Na + Nb - Nc ) = ( Na + Nb - 2 * Nc ) / ( Na + Nb - Nc )
 910 #
 911 #     . SetTheoreticForm: ( | SetDifferenceXaXb | - | SetIntersectionXaXb | ) / | SetDifferenceXaXb |
 912 #                        = ( SUM ( Xai ) + SUM ( Xbi ) - 2 * ( SUM ( MIN ( Xai, Xbi ) ) ) ) / ( SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
 913 #
 914 # . TanimotoSimilarityCoefficient:  ( same as JaccardSimilarityCoefficient)
 915 #
 916 #     . AlgebraicForm:  SUM ( Xai * Xbi ) / ( SUM ( Xai ** 2 ) + SUM ( Xbi ** 2 ) - SUM ( Xai * Xbi ) )
 917 #
 918 #     . BinaryForm:  Nc / ( ( Na - Nc ) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc )
 919 #
 920 #     . SetTheoreticForm: | SetIntersectionXaXb | / | SetDifferenceXaXb |
 921 #                        = SUM ( MIN ( Xai, Xbi ) ) / (  SUM ( Xai ) + SUM ( Xbi ) - SUM ( MIN ( Xai, Xbi ) ) )
 922 #
 923 #
 924 
 925 # Calculate Hamming distance coefficient between two fingerprint vectors.
 926 #
 927 # This functionality can be either invoked as a class function or an object method.
 928 #
 929 sub HammingDistanceCoefficient ($$;$$) {
 930   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
 931 
 932   return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
 933 }
 934 
 935 # Calculate Hamming distance coefficient between two fingerprint vectors.
 936 #
 937 # This functionality can be either invoked as a class function or an object method.
 938 #
 939 sub ManhattanDistanceCoefficient ($$;$$) {
 940   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
 941 
 942   return CityBlockDistanceCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
 943 }
 944 
 945 # Calculate CityBlock distance coefficient between two fingerprint vectors.
 946 #
 947 # This functionality can be either invoked as a class function or an object method.
 948 #
 949 sub CityBlockDistanceCoefficient ($$;$$) {
 950   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
 951 
 952   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
 953   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
 954 
 955   # Validate and process fingerprints vectors for similarity calculations...
 956   #
 957   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CityBlockDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
 958 
 959   # Perform the calculation...
 960   if ($CalculationMode =~ /^AlgebraicForm$/i) {
 961     return _CityBlockDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
 962   }
 963   elsif ($CalculationMode =~ /^BinaryForm$/i) {
 964     return _CityBlockDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
 965   }
 966   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
 967     return _CityBlockDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
 968   }
 969   else {
 970     return undef;
 971   }
 972 }
 973 
 974 # Calculate CityBlock distance coefficient using algebraic form...
 975 #
 976 sub _CityBlockDistanceCoefficientUsingAlgebraicForm {
 977   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
 978   my($SumAbsSubtractionXaiXbi);
 979 
 980   $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
 981 
 982   return $SumAbsSubtractionXaiXbi;
 983 }
 984 
 985 # Calculate CityBlock distance coefficient using binary form...
 986 #
 987 sub _CityBlockDistanceCoefficientUsingBinaryForm {
 988   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
 989   my($Na, $Nb, $Nc);
 990 
 991   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
 992 
 993   return  ($Na + $Nb - 2 * $Nc);
 994 }
 995 
 996 # Calculate  CityBlock distance coefficient using set theoretic form...
 997 #
 998 sub _CityBlockDistanceCoefficientUsingSetTheoreticForm {
 999   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1000   my($SumMinXaiXbi, $SumXai, $SumXbi);
1001 
1002   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1003   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1004   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1005 
1006   return  ($SumXai + $SumXbi - 2 * $SumMinXaiXbi);
1007 }
1008 
1009 # Calculate Ochiai similarity cofficient between two fingerprint vectors.
1010 #
1011 # This functionality can be either invoked as a class function or an object method.
1012 #
1013 sub OchiaiSimilarityCoefficient ($$;$$) {
1014   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1015 
1016   return CosineSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1017 }
1018 
1019 # Calculate Cosine similarity cofficient between two fingerprint vectors.
1020 #
1021 # This functionality can be either invoked as a class function or an object method.
1022 #
1023 sub CosineSimilarityCoefficient ($$;$$) {
1024   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1025 
1026   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1027   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1028 
1029   # Validate and process fingerprints vectors for similarity calculations...
1030   #
1031   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("CosineSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1032 
1033   # Perform the calculation...
1034   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1035     return _CosineSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1036   }
1037   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1038     return _CosineSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1039   }
1040   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1041     return _CosineSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1042   }
1043   else {
1044     return undef;
1045   }
1046 }
1047 
1048 # Calculate Cosine similarity coefficient using algebraic form...
1049 #
1050 sub _CosineSimilarityCoefficientUsingAlgebraicForm {
1051   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1052   my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1053 
1054   $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1055   $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1056   $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1057 
1058   $Numerator = $SumProductXaiXbi;
1059   $Denominator = sqrt($SumXai2 * $SumXbi2);
1060 
1061   return  $Denominator ? ($Numerator/$Denominator) : 0;
1062 }
1063 
1064 # CalculateCosine similarity coefficient using binary form...
1065 #
1066 sub _CosineSimilarityCoefficientUsingBinaryForm {
1067   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1068   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1069 
1070   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1071 
1072   $Numerator = $Nc;
1073   $Denominator = sqrt($Na * $Nb);
1074 
1075   return  $Denominator ? ($Numerator/$Denominator) : 0;
1076 }
1077 
1078 # Calculate Cosine similarity coefficient using set theoretic form...
1079 #
1080 sub _CosineSimilarityCoefficientUsingSetTheoreticForm {
1081   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1082   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1083 
1084   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1085   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1086   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1087 
1088   $Numerator = $SumMinXaiXbi;
1089   $Denominator = sqrt($SumXai * $SumXbi);
1090 
1091   return  $Denominator ? ($Numerator/$Denominator) : 0;
1092 }
1093 
1094 # Calculate Czekanowski similarity cofficient between two fingerprint vectors.
1095 #
1096 # This functionality can be either invoked as a class function or an object method.
1097 #
1098 sub CzekanowskiSimilarityCoefficient ($$;$$) {
1099   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1100 
1101   return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1102 }
1103 
1104 # Calculate Sorenson similarity cofficient between two fingerprint vectors.
1105 #
1106 # This functionality can be either invoked as a class function or an object method.
1107 #
1108 sub SorensonSimilarityCoefficient ($$;$$) {
1109   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1110 
1111   return DiceSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1112 }
1113 
1114 # Calculate Dice similarity cofficient between two fingerprint vectors.
1115 #
1116 # This functionality can be either invoked as a class function or an object method.
1117 #
1118 sub DiceSimilarityCoefficient ($$;$$) {
1119   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1120 
1121   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1122   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1123 
1124   # Validate and process fingerprints vectors for similarity calculations...
1125   #
1126   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("DiceSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1127 
1128   # Perform the calculation...
1129   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1130     return _DiceSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1131   }
1132   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1133     return _DiceSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1134   }
1135   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1136     return _DiceSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1137   }
1138   else {
1139     return undef;
1140   }
1141 }
1142 
1143 # Calculate Dice similarity coefficient using algebraic form...
1144 #
1145 sub _DiceSimilarityCoefficientUsingAlgebraicForm {
1146   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1147   my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1148 
1149   $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1150   $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1151   $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1152 
1153   $Numerator = 2 * $SumProductXaiXbi;
1154   $Denominator = $SumXai2 + $SumXbi2;
1155 
1156   return  $Denominator ? ($Numerator/$Denominator) : 0;
1157 }
1158 
1159 # Calculate Dice similarity coefficient using binary form...
1160 #
1161 sub _DiceSimilarityCoefficientUsingBinaryForm {
1162   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1163   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1164 
1165   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1166 
1167   $Numerator = 2 * $Nc;
1168   $Denominator = $Na + $Nb;
1169 
1170   return  $Denominator ? ($Numerator/$Denominator) : 0;
1171 }
1172 
1173 # Calculate Dice similarity coefficient using set theoretic form...
1174 #
1175 sub _DiceSimilarityCoefficientUsingSetTheoreticForm {
1176   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1177   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1178 
1179   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1180   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1181   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1182 
1183   $Numerator = 2 * $SumMinXaiXbi;
1184   $Denominator = $SumXai + $SumXbi;
1185 
1186   return  $Denominator ? ($Numerator/$Denominator) : 0;
1187 }
1188 
1189 
1190 # Calculate Euclidean distance coefficient between two fingerprint vectors.
1191 #
1192 # This functionality can be either invoked as a class function or an object method.
1193 #
1194 sub EuclideanDistanceCoefficient ($$;$$) {
1195   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1196 
1197   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1198   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1199 
1200   # Validate and process fingerprints vectors for similarity calculations...
1201   #
1202   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("EuclideanDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1203 
1204   # Perform the calculation...
1205   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1206     return _EuclideanDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1207   }
1208   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1209     return _EuclideanDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1210   }
1211   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1212     return _EuclideanDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1213   }
1214   else {
1215     return undef;
1216   }
1217 }
1218 
1219 # Calculate Euclidean distance coefficient using algebraic form...
1220 #
1221 sub _EuclideanDistanceCoefficientUsingAlgebraicForm {
1222   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1223   my($SumSquaresSubtractionXaiXbi);
1224 
1225   $SumSquaresSubtractionXaiXbi = _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1226 
1227   return sqrt($SumSquaresSubtractionXaiXbi);
1228 }
1229 
1230 # Calculate Euclidean distance coefficient using binary form...
1231 #
1232 sub _EuclideanDistanceCoefficientUsingBinaryForm {
1233   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1234   my($Na, $Nb, $Nc);
1235 
1236   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1237 
1238   return  (sqrt($Na + $Nb - 2 * $Nc));
1239 }
1240 
1241 # Calculate Euclidean distance coefficient using set theoretic form...
1242 #
1243 sub _EuclideanDistanceCoefficientUsingSetTheoreticForm {
1244   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1245   my($SumMinXaiXbi, $SumXai, $SumXbi);
1246 
1247   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1248   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1249   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1250 
1251   return  (sqrt($SumXai + $SumXbi - 2 * $SumMinXaiXbi));
1252 }
1253 
1254 # Calculate Jaccard similarity cofficient between two fingerprint vectors.
1255 #
1256 # This functionality can be either invoked as a class function or an object method.
1257 #
1258 sub JaccardSimilarityCoefficient ($$;$$) {
1259   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1260 
1261   return TanimotoSimilarityCoefficient($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1262 }
1263 
1264 # Calculate Tanimoto similarity cofficient between two fingerprint vectors.
1265 #
1266 # This functionality can be either invoked as a class function or an object method.
1267 #
1268 sub TanimotoSimilarityCoefficient ($$;$$) {
1269   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1270 
1271   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1272   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1273 
1274   # Validate and process fingerprints vectors for similarity calculations...
1275   #
1276   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("TanimotoSimilarityCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1277 
1278   # Perform the calculation...
1279   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1280     return _TanimotoSimilarityCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1281   }
1282   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1283     return _TanimotoSimilarityCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1284   }
1285   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1286     return _TanimotoSimilarityCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1287   }
1288   else {
1289     return undef;
1290   }
1291 }
1292 
1293 # Calculate Tanimoto similarity coefficient using algebraic form...
1294 #
1295 sub _TanimotoSimilarityCoefficientUsingAlgebraicForm {
1296   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1297   my($SumProductXaiXbi, $SumXai2, $SumXbi2, $Numerator, $Denominator);
1298 
1299   $SumXai2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorA);
1300   $SumXbi2 = _GetSumOfSquaresOfFingerprintsOrderedValues($FingerprintsVectorB);
1301   $SumProductXaiXbi = _GetSumOfProductOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1302 
1303   $Numerator = $SumProductXaiXbi;
1304   $Denominator = $SumXai2 + $SumXbi2 - $SumProductXaiXbi;
1305 
1306   return  $Denominator ? ($Numerator/$Denominator) : 0;
1307 }
1308 
1309 # Calculate Tanimoto similarity coefficient using binary form...
1310 #
1311 sub _TanimotoSimilarityCoefficientUsingBinaryForm {
1312   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1313   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1314 
1315   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1316 
1317   $Numerator = $Nc;
1318   $Denominator = $Na + $Nb - $Nc;
1319 
1320   return  $Denominator ? ($Numerator/$Denominator) : 0;
1321 }
1322 
1323 # Calculate Tanimoto similarity coefficient using set theoretic form...
1324 #
1325 sub _TanimotoSimilarityCoefficientUsingSetTheoreticForm {
1326   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1327   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1328 
1329   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1330   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1331   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1332 
1333   $Numerator = $SumMinXaiXbi;
1334   $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
1335 
1336   return  $Denominator ? ($Numerator/$Denominator) : 0;
1337 }
1338 
1339 
1340 # Calculate Soergel distance coefficient between two fingerprint vectors.
1341 #
1342 # This functionality can be either invoked as a class function or an object method.
1343 #
1344 sub SoergelDistanceCoefficient ($$;$$) {
1345   my($FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1346 
1347   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1348   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1349 
1350   # Validate and process fingerprints vectors for similarity calculations...
1351   #
1352   _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation("SoergelDistanceCoefficient: Calculation failed", $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck);
1353 
1354   # Perform the calculation...
1355   if ($CalculationMode =~ /^AlgebraicForm$/i) {
1356     return _SoergelDistanceCoefficientUsingAlgebraicForm($FingerprintsVectorA, $FingerprintsVectorB);
1357   }
1358   elsif ($CalculationMode =~ /^BinaryForm$/i) {
1359     return _SoergelDistanceCoefficientUsingBinaryForm($FingerprintsVectorA, $FingerprintsVectorB);
1360   }
1361   elsif ($CalculationMode =~ /^SetTheoreticForm$/i) {
1362     return _SoergelDistanceCoefficientUsingSetTheoreticForm($FingerprintsVectorA, $FingerprintsVectorB);
1363   }
1364   else {
1365     return undef;
1366   }
1367 }
1368 
1369 # Calculate Soergel distance coefficientusing algebraic form...
1370 #
1371 sub _SoergelDistanceCoefficientUsingAlgebraicForm {
1372   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1373   my($SumAbsSubtractionXaiXbi, $SumMaxXaiXbi, $Numerator, $Denominator);
1374 
1375   $SumAbsSubtractionXaiXbi = _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues($FingerprintsVectorA, $FingerprintsVectorB);
1376   $SumMaxXaiXbi = _GetSumOfMaximumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1377 
1378   $Numerator = $SumAbsSubtractionXaiXbi;
1379   $Denominator = $SumMaxXaiXbi;
1380 
1381   return  $Denominator ? ($Numerator/$Denominator) : 0;
1382 }
1383 
1384 # Calculate Soergel distance coefficient using binary form...
1385 #
1386 sub _SoergelDistanceCoefficientUsingBinaryForm {
1387   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1388   my($Na, $Nb, $Nc, $Numerator, $Denominator);
1389 
1390   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsVectorA, $FingerprintsVectorB);
1391 
1392   $Numerator = $Na + $Nb - 2 * $Nc;
1393   $Denominator = $Na + $Nb - $Nc;
1394 
1395   return  $Denominator ? ($Numerator/$Denominator) : 0;
1396 }
1397 
1398 # Calculate SoergelDistanceCoefficient using set theoretic form...
1399 #
1400 sub _SoergelDistanceCoefficientUsingSetTheoreticForm {
1401   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1402   my($SumMinXaiXbi, $SumXai, $SumXbi, $Numerator, $Denominator);
1403 
1404   $SumXai = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorA);
1405   $SumXbi = _GetSumOfFingerprintsOrderedValues($FingerprintsVectorB);
1406   $SumMinXaiXbi = _GetSumOfMinimumOfFingerprintsOrderdedValues($FingerprintsVectorA, $FingerprintsVectorB);
1407 
1408   $Numerator = $SumXai + $SumXbi - 2 * $SumMinXaiXbi;
1409   $Denominator = $SumXai + $SumXbi - $SumMinXaiXbi;
1410 
1411   return  $Denominator ? ($Numerator/$Denominator) : 0;
1412 }
1413 
1414 # Validate and process fingerprints vectors for similarity calculations...
1415 #
1416 sub _ValidateAndProcessFingerprintsVectorsForSimilarityCalculation {
1417   my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode, $SkipValuesCheck) = @_;
1418 
1419   $CalculationMode = defined $CalculationMode ? $CalculationMode : 'AlgebraicForm';
1420   $SkipValuesCheck = defined $SkipValuesCheck ? $SkipValuesCheck : 0;
1421 
1422   if (!$SkipValuesCheck) {
1423     _ValidateFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
1424   }
1425   _ProcessFingerprintsVectorsForSimilarityCalculation($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode);
1426 }
1427 
1428 # Make sure fingerprint vectors are good for performing similarity/distance calculation...
1429 #
1430 sub _ValidateFingerprintsVectorsForSimilarityCalculation {
1431   my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
1432 
1433   # Make sure both are fingerprint vectors..
1434   if (!(IsFingerprintsVector($FingerprintsVectorA) && IsFingerprintsVector($FingerprintsVectorB))) {
1435     croak "Error: ${ClassName}->${ErrorMsg}: Both objects must be fingerprint vectors...";
1436   }
1437 
1438   # Check types...
1439   if ($FingerprintsVectorA->{Type} ne $FingerprintsVectorB->{Type}) {
1440     croak "Error: ${ClassName}->${ErrorMsg}: Type of first fingerprint vector, $FingerprintsVectorA->{Type}, must be same as type of second fingerprint vector, $FingerprintsVectorB->{Type}...";
1441   }
1442 
1443   # Check calculation mode...
1444   if ($CalculationMode !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) {
1445     croak "Error: ${ClassName}->${ErrorMsg}: Specified similarity calculation mode, $CalculationMode, is not valid. Supported values: AlgebraicForm, BinaryForm, and SetTheoreticForm...";
1446   }
1447 
1448   # Check values and value IDs...
1449   my($Na, $Nb, $NIDa, $NIDb);
1450   $Na = $FingerprintsVectorA->GetNumOfValues(); $Nb = $FingerprintsVectorB->GetNumOfValues();
1451   $NIDa = $FingerprintsVectorA->GetNumOfValueIDs(); $NIDb = $FingerprintsVectorB->GetNumOfValueIDs();
1452 
1453   if ($Na == 0) {
1454     croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1455   }
1456   if ($Nb == 0) {
1457     croak "Error: ${ClassName}->${ErrorMsg}: Number of values in second fingerprint vector, $Nb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
1458   }
1459 
1460   if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
1461     if ($Na != $Nb) {
1462       croak "Error: ${ClassName}->${ErrorMsg}: Number of values in first fingerprint vector, $Na, must be equal to number of values, $Nb, in second fingerprint vector for fingerprint vector types $FingerprintsVectorA->{Type} ...";
1463     }
1464   }
1465   elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
1466     if ($NIDa == 0) {
1467       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be > 0 for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1468     }
1469     if ($NIDb == 0) {
1470       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDb, must be > 0 for fingerprint vector type $FingerprintsVectorB->{Type} ...";
1471     }
1472 
1473     if ($NIDa != $Na) {
1474       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in first fingerprint vector, $NIDa, must be equal to its number of values, $Na, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1475     }
1476     if ($NIDb != $Nb) {
1477       croak "Error: ${ClassName}->${ErrorMsg}: Number of value IDs in second fingerprint vector, $NIDb, must be equal to its number of values, $Nb, for fingerprint vector type $FingerprintsVectorA->{Type} ...";
1478     }
1479   }
1480   elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
1481     if ($NIDa || $NIDb) {
1482       croak "Error: ${ClassName}->${ErrorMsg}: ValueIDs cann't be specified for fingerprint vector types $FingerprintsVectorA->{Type} ...";
1483     }
1484   }
1485   else {
1486     croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
1487   }
1488 }
1489 
1490 # Process fingerprints vectors for similarity calculation by generating vectors
1491 # containing ordered list of values...
1492 #
1493 sub _ProcessFingerprintsVectorsForSimilarityCalculation {
1494   my($ErrorMsg, $FingerprintsVectorA, $FingerprintsVectorB, $CalculationMode) = @_;
1495 
1496   $FingerprintsVectorA->{OrderedValuesRef} = undef; $FingerprintsVectorB->{OrderedValuesRef} = undef;
1497   $FingerprintsVectorA->{BitVector} = undef; $FingerprintsVectorB->{BitVector} = undef;
1498 
1499   if ($FingerprintsVectorA->{Type} =~ /^OrderedNumericalValues$/i) {
1500     _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1501   }
1502   elsif ($FingerprintsVectorA->{Type} =~ /^NumericalValues$/i) {
1503     _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1504   }
1505   elsif ($FingerprintsVectorA->{Type} =~ /^AlphaNumericalValues$/i) {
1506     _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1507   }
1508   else {
1509     croak "Error: ${ClassName}->${ErrorMsg}: Fingerprint vector types $FingerprintsVectorA->{Type} is not valid...";
1510   }
1511   if ($CalculationMode =~ /^BinaryForm$/i) {
1512     _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation($FingerprintsVectorA, $FingerprintsVectorB);
1513   }
1514 }
1515 
1516 # Process fingerprints vectors with ordered numerical values for similarity calculations...
1517 #
1518 sub _ProcessOrderedNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1519   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1520 
1521   $FingerprintsVectorA->{OrderedValuesRef} = \@{$FingerprintsVectorA->{Values}};
1522   $FingerprintsVectorB->{OrderedValuesRef} = \@{$FingerprintsVectorB->{Values}};
1523 }
1524 
1525 # Process fingerprints vectors with numerical values for similarity calculations...
1526 #
1527 sub _ProcessNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1528   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1529 
1530   # Set up unique IDs and values map for each fingerprint vector...
1531   my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValueIDValues, %UniqueFingerprintsVectorBValueIDValues, %UniqueFingerprintsVectorsValueIDs);
1532 
1533   %UniqueFingerprintsVectorAValueIDValues = ();
1534   %UniqueFingerprintsVectorBValueIDValues = ();
1535   %UniqueFingerprintsVectorsValueIDs = ();
1536 
1537   # Go over first vector...
1538   for $Index (0 .. $#{$FingerprintsVectorA->{ValueIDs}}) {
1539     $ValueID = $FingerprintsVectorA->{ValueIDs}[$Index];
1540     $Value = $FingerprintsVectorA->{Values}[$Index];
1541     if (exists $UniqueFingerprintsVectorAValueIDValues{$ValueID}) {
1542       $UniqueFingerprintsVectorAValueIDValues{$ValueID} += $Value;
1543     }
1544     else {
1545       $UniqueFingerprintsVectorAValueIDValues{$ValueID} = $Value;
1546     }
1547     if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
1548       $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
1549     }
1550   }
1551 
1552   # Go over second vector...
1553   for $Index (0 .. $#{$FingerprintsVectorB->{ValueIDs}}) {
1554     $ValueID = $FingerprintsVectorB->{ValueIDs}[$Index];
1555     $Value = $FingerprintsVectorB->{Values}[$Index];
1556     if (exists $UniqueFingerprintsVectorBValueIDValues{$ValueID}) {
1557       $UniqueFingerprintsVectorBValueIDValues{$ValueID} += $Value;
1558     }
1559     else {
1560       $UniqueFingerprintsVectorBValueIDValues{$ValueID} = $Value;
1561     }
1562     if (!exists $UniqueFingerprintsVectorsValueIDs{$ValueID}) {
1563       $UniqueFingerprintsVectorsValueIDs{$ValueID} = 1;
1564     }
1565   }
1566 
1567   # Setup ordered values...
1568   my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
1569 
1570   @UniqueOrderedValueIDs = ();
1571   @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValueIDs;
1572 
1573   @OrderedValuesA = ();
1574   @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValueIDValues{$_} ? $UniqueFingerprintsVectorAValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
1575 
1576   @OrderedValuesB = ();
1577   @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValueIDValues{$_} ? $UniqueFingerprintsVectorBValueIDValues{$_} : 0 } @UniqueOrderedValueIDs;
1578 
1579   $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
1580   $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
1581 }
1582 
1583 # Process fingerprints vectors with allpha numerical values for similarity calculations...
1584 #
1585 sub _ProcessAlphaNumericalValuesFingerprintsVectorsForSimilarityCalculation {
1586   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1587 
1588   # Set up unique IDs and values map for each vector...
1589   my($Index, $Value, $ValueID, %UniqueFingerprintsVectorAValuesCount, %UniqueFingerprintsVectorBValuesCount, %UniqueFingerprintsVectorsValues);
1590 
1591   %UniqueFingerprintsVectorAValuesCount = ();
1592   %UniqueFingerprintsVectorBValuesCount = ();
1593   %UniqueFingerprintsVectorsValues = ();
1594 
1595   # Go over first vector...
1596   for $Value (@{$FingerprintsVectorA->{Values}}) {
1597     if (exists $UniqueFingerprintsVectorAValuesCount{$Value}) {
1598       $UniqueFingerprintsVectorAValuesCount{$Value} += 1;
1599     }
1600     else {
1601       $UniqueFingerprintsVectorAValuesCount{$Value} = 1;
1602     }
1603     if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
1604       $UniqueFingerprintsVectorsValues{$Value} = 1;
1605     }
1606   }
1607 
1608   # Go over second vector...
1609   for $Value (@{$FingerprintsVectorB->{Values}}) {
1610     if (exists $UniqueFingerprintsVectorBValuesCount{$Value}) {
1611       $UniqueFingerprintsVectorBValuesCount{$Value} += 1;
1612     }
1613     else {
1614       $UniqueFingerprintsVectorBValuesCount{$Value} = 1;
1615     }
1616     if (!exists $UniqueFingerprintsVectorsValues{$Value}) {
1617       $UniqueFingerprintsVectorsValues{$Value} = 1;
1618     }
1619   }
1620 
1621   # Setup ordered values...
1622   my(@UniqueOrderedValueIDs, @OrderedValuesA, @OrderedValuesB);
1623 
1624   @UniqueOrderedValueIDs = ();
1625   @UniqueOrderedValueIDs = sort keys %UniqueFingerprintsVectorsValues;
1626 
1627   @OrderedValuesA = ();
1628   @OrderedValuesA = map { exists $UniqueFingerprintsVectorAValuesCount{$_} ? $UniqueFingerprintsVectorAValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
1629 
1630   @OrderedValuesB = ();
1631   @OrderedValuesB = map { exists $UniqueFingerprintsVectorBValuesCount{$_} ? $UniqueFingerprintsVectorBValuesCount{$_} : 0 } @UniqueOrderedValueIDs;
1632 
1633   $FingerprintsVectorA->{OrderedValuesRef} = \@OrderedValuesA;
1634   $FingerprintsVectorB->{OrderedValuesRef} = \@OrderedValuesB;
1635 
1636 }
1637 
1638 # Transform final ordered values array into a BitVector for similarity calculation...
1639 #
1640 sub _TransformFinalOrderedValuesIntoBitVectorsForSimilarityCalculation {
1641   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1642   my($Index, $Size, $BitVectorA, $BitVectorB, $SkipCheck);
1643 
1644   # Create bit vectors...
1645   $Size = scalar @{$FingerprintsVectorA->{OrderedValuesRef}};
1646 
1647   $FingerprintsVectorA->{BitVector} = new BitVector($Size);
1648   $FingerprintsVectorB->{BitVector} = new BitVector($Size);
1649 
1650   # Set bits...
1651   $SkipCheck = 1;
1652   for $Index (0 .. ($Size - 1)) {
1653     if ($FingerprintsVectorA->{OrderedValuesRef}[$Index]) {
1654       $FingerprintsVectorA->{BitVector}->SetBit($Index, $SkipCheck);
1655     }
1656     if ($FingerprintsVectorB->{OrderedValuesRef}[$Index]) {
1657       $FingerprintsVectorB->{BitVector}->SetBit($Index, $SkipCheck);
1658     }
1659   }
1660 }
1661 
1662 # Return sum of ordered vector values...
1663 #
1664 sub _GetSumOfFingerprintsOrderedValues {
1665   my($FingerprintVector) = @_;
1666 
1667   return StatisticsUtil::Sum($FingerprintVector->{OrderedValuesRef});
1668 }
1669 
1670 # Return sum of squared ordered vector values...
1671 #
1672 sub _GetSumOfSquaresOfFingerprintsOrderedValues {
1673   my($FingerprintVector) = @_;
1674 
1675   return StatisticsUtil::SumOfSquares($FingerprintVector->{OrderedValuesRef});
1676 }
1677 
1678 # Return sum of product of correponding ordered vector values...
1679 #
1680 sub _GetSumOfProductOfFingerprintsOrderedValues {
1681   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1682   my($Index, $SumProductXaiXbi);
1683 
1684   $SumProductXaiXbi = 0;
1685   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1686     $SumProductXaiXbi += $FingerprintsVectorA->{OrderedValuesRef}[$Index] * $FingerprintsVectorB->{OrderedValuesRef}[$Index];
1687   }
1688   return $SumProductXaiXbi;
1689 }
1690 
1691 # Return sum of absolute value of subtraction of correponding ordered vector values...
1692 #
1693 sub _GetSumOfAbsoluteValueOfSubtractionOfFingerprintsOrderedValues {
1694   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1695   my($Index, $SumAbsSubtractionXaiXbi);
1696 
1697   $SumAbsSubtractionXaiXbi = 0;
1698   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1699     $SumAbsSubtractionXaiXbi += abs($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1700   }
1701   return $SumAbsSubtractionXaiXbi;
1702 }
1703 
1704 # Return sum of squares of subtraction of correponding ordered vector values...
1705 #
1706 sub _GetSumOfSquaresOfSubtractionOfFingerprintsOrderedValues {
1707   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1708   my($Index, $SumSquaresSubtractionXaiXbi);
1709 
1710   $SumSquaresSubtractionXaiXbi = 0;
1711   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1712     $SumSquaresSubtractionXaiXbi += ($FingerprintsVectorA->{OrderedValuesRef}[$Index] - $FingerprintsVectorB->{OrderedValuesRef}[$Index])**2;
1713   }
1714   return $SumSquaresSubtractionXaiXbi;
1715 }
1716 
1717 # Return sum of minimum of correponding ordered vector values...
1718 #
1719 sub _GetSumOfMinimumOfFingerprintsOrderdedValues {
1720   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1721   my($Index, $SumMinXaiXbi);
1722 
1723   $SumMinXaiXbi = 0;
1724   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1725     $SumMinXaiXbi += MathUtil::min($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1726   }
1727   return $SumMinXaiXbi;
1728 }
1729 
1730 # Return sum of maximum of correponding ordered vector values...
1731 #
1732 sub _GetSumOfMaximumOfFingerprintsOrderdedValues {
1733   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1734   my($Index, $SumMaxXaiXbi);
1735 
1736   $SumMaxXaiXbi = 0;
1737   for $Index (0 .. $#{$FingerprintsVectorA->{OrderedValuesRef}}) {
1738     $SumMaxXaiXbi += MathUtil::max($FingerprintsVectorA->{OrderedValuesRef}[$Index], $FingerprintsVectorB->{OrderedValuesRef}[$Index]);
1739   }
1740   return $SumMaxXaiXbi;
1741 }
1742 
1743 # Get number of Na, Nb and Nc bits in vector A and B for BinaryForm calculation...
1744 #
1745 sub _GetNumOfIndividualAndCommonSetBits ($$) {
1746   my($FingerprintsVectorA, $FingerprintsVectorB) = @_;
1747   my($Na, $Nb, $Nc, $Nd, $FingerprintsBitVectorA, $FingerprintsBitVectorB);
1748 
1749   $FingerprintsBitVectorA = $FingerprintsVectorA->{BitVector};
1750   $FingerprintsBitVectorB = $FingerprintsVectorB->{BitVector};
1751 
1752   # Number of bits set to "1" in A
1753   $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
1754 
1755   # Number of bits set to "1" in B
1756   $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
1757 
1758   # Number of bits set to "1" in both A and B
1759   my($NcBitVector);
1760   $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
1761   $Nc = $NcBitVector->GetNumOfSetBits();
1762 
1763   return ($Na, $Nb, $Nc);
1764 }
1765 
1766 # Return a list of supported distance coefficients...
1767 #
1768 sub GetSupportedDistanceCoefficients () {
1769 
1770   return @DistanceCoefficients;
1771 }
1772 
1773 # Return a list of supported similarity coefficients...
1774 #
1775 sub GetSupportedSimilarityCoefficients () {
1776 
1777   return @SimilarityCoefficients;
1778 }
1779 
1780 # Return a list of supported distance and similarity coefficients...
1781 #
1782 sub GetSupportedDistanceAndSimilarityCoefficients () {
1783   my(@DistanceAndSimilarityCoefficients);
1784 
1785   @DistanceAndSimilarityCoefficients = ();
1786   push @DistanceAndSimilarityCoefficients, @DistanceCoefficients;
1787   push @DistanceAndSimilarityCoefficients, @SimilarityCoefficients;
1788 
1789   return sort @DistanceAndSimilarityCoefficients;
1790 }
1791 
1792 # Is it a fingerprints vector object?
1793 sub IsFingerprintsVector ($) {
1794   my($Object) = @_;
1795 
1796   return _IsFingerprintsVector($Object);
1797 }
1798 
1799 # Is it a fingerprints vector object?
1800 sub _IsFingerprintsVector {
1801   my($Object) = @_;
1802 
1803   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
1804 }
1805 
1806 # Return a string containing vector values...
1807 sub StringifyFingerprintsVector {
1808   my($This) = @_;
1809   my($FingerprintsVectorString);
1810 
1811   # Set type, values and value IDs...
1812   my($NumOfValues, $ValuesString, $NumOfValueIDs, $ValueIDsString, $MaxValuesToStringify);
1813 
1814   $NumOfValues = $This->GetNumOfValues();
1815   $MaxValuesToStringify = 500;
1816 
1817   if ($NumOfValues < $MaxValuesToStringify) {
1818     # Append all values...
1819     $ValuesString = $NumOfValues ? join ' ', @{$This->{Values}} : 'None';
1820   }
1821   else {
1822     # Truncate values...
1823     my($Index, @Values);
1824     for $Index (0 .. ($MaxValuesToStringify - 1)) {
1825       push @Values, $This->{Values}[$Index];
1826     }
1827     $ValuesString = join(' ', @Values) . " ...";
1828   }
1829 
1830   $NumOfValueIDs = $This->GetNumOfValueIDs();
1831   if ($NumOfValueIDs < $MaxValuesToStringify) {
1832     # Append all valueIDs...
1833     $ValueIDsString = $NumOfValueIDs ? join ' ', @{$This->{ValueIDs}} : 'None';
1834   }
1835   else {
1836     # Truncate value IDs...
1837     my($Index, @ValueIDs);
1838     @ValueIDs = ();
1839     for $Index (0 .. ($MaxValuesToStringify - 1)) {
1840       push @ValueIDs, $This->{ValueIDs}[$Index];
1841     }
1842     $ValueIDsString = join(' ', @ValueIDs) . " ...";
1843   }
1844 
1845   $FingerprintsVectorString = "Type: $This->{Type}; NumOfValues: $NumOfValues";
1846   if ($This->{Type} =~ /^(OrderedNumericalValues|NumericalValues)$/i) {
1847     my($NumOfNonZeroValues);
1848     $NumOfNonZeroValues = $This->GetNumOfNonZeroValues();
1849     $FingerprintsVectorString .= "; NumOfNonZeroValues: $NumOfNonZeroValues";
1850   }
1851 
1852   # Append all the values and value IDs...
1853   if ($NumOfValues < $MaxValuesToStringify) {
1854     $FingerprintsVectorString .= "; Values: <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs: <$ValueIDsString>";
1855   }
1856   else {
1857     $FingerprintsVectorString .= "; Values (Truncated after $MaxValuesToStringify): <$ValuesString>; NumOfValueIDs: $NumOfValueIDs; ValueIDs (Truncated after $MaxValuesToStringify): <$ValueIDsString>";
1858   }
1859 
1860   return $FingerprintsVectorString;
1861 }
1862