MayaChemTools

   1 package Fingerprints::FingerprintsBitVector;
   2 #
   3 # File: FingerprintsBitVector.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use BitVector;
  31 use MathUtil;
  32 use TextUtil ();
  33 
  34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  35 
  36 @ISA = qw(BitVector Exporter);
  37 
  38 # Similiarity coefficients...
  39 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient);
  40 
  41 # New from string...
  42 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString);
  43 
  44 @EXPORT = qw(IsFingerprintsBitVector);
  45 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients);
  46 
  47 %EXPORT_TAGS = (
  48                 new => [@NewFromString],
  49                 coefficients => [@SimilarityCoefficients],
  50                 all  => [@EXPORT, @EXPORT_OK]
  51                );
  52 
  53 # Setup class variables...
  54 my($ClassName);
  55 _InitializeClass();
  56 
  57 use overload '""' => 'StringifyFingerprintsBitVector';
  58 
  59 # Class constructor...
  60 sub new {
  61   my($Class, $Size) = @_;
  62 
  63   # Initialize object...
  64   my $This = $Class->SUPER::new($Size);
  65   bless $This, ref($Class) || $Class;
  66   $This->_InitializeFingerprintsBitVector($Size);
  67 
  68   return $This;
  69 }
  70 
  71 # Initialize object data...
  72 #
  73 # Note:
  74 #  . The class, BitVector, used to derive this class provides all the functionality to
  75 #    manipulate bits.
  76 #  . Irrespective of specified size, Perl functions used to handle bit data in
  77 #    BitVector class automatically sets the size to the next nearest power of 2.
  78 #    SpecifiedSize is used by this class to process any aribitray size during similarity
  79 #    coefficient calculations.
  80 #
  81 sub _InitializeFingerprintsBitVector {
  82   my($This, $Size) = @_;
  83 
  84   if (!defined $Size) {
  85     croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ...";
  86   }
  87   if ($Size <=0) {
  88     croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer...";
  89   }
  90 
  91   # Specified size of fingerprints...
  92   $This->{SpecifiedSize} = $Size;
  93 
  94 }
  95 
  96 # Initialize class ...
  97 sub _InitializeClass {
  98   #Class name...
  99   $ClassName = __PACKAGE__;
 100 }
 101 
 102 # Set specified size...
 103 #
 104 # Notes:
 105 #   Irrespective of specified size, Perl functions used to handle bit data in
 106 #   BitVector class automatically sets the size to the next nearest power of 2.
 107 #   SpecifiedSize is used by this class to process any aribitray size during similarity
 108 #   coefficient calculations.
 109 #
 110 sub SetSpecifiedSize {
 111   my($This, $SpecifiedSize) = @_;
 112 
 113   if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) {
 114     croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid:  It must be > 0 && <= ", $This->GetSize()," ...";
 115   }
 116   $This->{SpecifiedSize} = $SpecifiedSize;
 117 }
 118 
 119 # Get specified size...
 120 sub GetSpecifiedSize {
 121   my($This) = @_;
 122 
 123   return $This->{SpecifiedSize};
 124 }
 125 
 126 # Set ID...
 127 sub SetID {
 128   my($This, $Value) = @_;
 129 
 130   $This->{ID} = $Value;
 131 
 132   return $This;
 133 }
 134 
 135 # Get ID...
 136 sub GetID {
 137   my($This) = @_;
 138 
 139   return exists $This->{ID} ? $This->{ID} : 'None';
 140 }
 141 
 142 # Set description...
 143 sub SetDescription {
 144   my($This, $Value) = @_;
 145 
 146   $This->{Description} = $Value;
 147 
 148   return $This;
 149 }
 150 
 151 # Get description...
 152 sub GetDescription {
 153   my($This) = @_;
 154 
 155   return exists $This->{Description} ? $This->{Description} : 'No description available';
 156 }
 157 
 158 # Set vector type...
 159 sub SetVectorType {
 160   my($This, $Value) = @_;
 161 
 162   $This->{VectorType} = $Value;
 163 
 164   return $This;
 165 }
 166 
 167 # Get vector type...
 168 sub GetVectorType {
 169   my($This) = @_;
 170 
 171   return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector';
 172 }
 173 
 174 # Create a new fingerprints bit vector using binary string. This functionality can be
 175 # either invoked as a class function or an object method.
 176 #
 177 sub NewFromBinaryString ($;$) {
 178   my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
 179 
 180   if (_IsFingerprintsBitVector($FirstParameter)) {
 181     return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter);
 182   }
 183   else {
 184     return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter);
 185   }
 186 }
 187 
 188 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be
 189 # either invoked as a class function or an object method.
 190 #
 191 sub NewFromHexadecimalString ($;$) {
 192   my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
 193 
 194   if (_IsFingerprintsBitVector($FirstParameter)) {
 195     return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter);
 196   }
 197   else {
 198     return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter);
 199   }
 200 }
 201 
 202 # Create a new fingerprints bit vector using octal string. This functionality can be
 203 # either invoked as a class function or an object method.
 204 #
 205 #
 206 sub NewFromOctalString ($) {
 207   croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ...";
 208 }
 209 
 210 # Create a new fingerprints bit vector using decimal string. This functionality can be
 211 # either invoked as a class function or an object method.
 212 #
 213 sub NewFromDecimalString ($;$) {
 214   croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ...";
 215 }
 216 
 217 # Create a new fingerprints bit vector using raw binary string. This functionality can be
 218 # either invoked as a class function or an object method.
 219 #
 220 sub NewFromRawBinaryString ($;$) {
 221   my($FirstParameter, $SecondParameter, $ThirdParameter) = @_;
 222 
 223   if (_IsFingerprintsBitVector($FirstParameter)) {
 224     return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter);
 225   }
 226   else {
 227     return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter);
 228   }
 229 }
 230 
 231 # Create a new fingerprints bit vector from a string...
 232 #
 233 #
 234 sub _NewFingerptinsBitVectorFromString ($$;$) {
 235   my($Format, $String, $BitsOrder) = @_;
 236   my($FingerprintsBitVector, $Size);
 237 
 238   $Size = BitVector::_CalculateStringSizeInBits($Format, $String);
 239 
 240   $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size);
 241   $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder);
 242 
 243   return $FingerprintsBitVector;
 244 }
 245 
 246 # Get fingerprint bits as a hexadecimal string...
 247 #
 248 sub GetBitsAsHexadecimalString {
 249   my($This, $BitsOrder) = @_;
 250 
 251   return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder);
 252 }
 253 
 254 # Get fingerprint bits as an octal string...
 255 #
 256 sub GetBitsAsOctalString {
 257   my($This, $BitsOrder) = @_;
 258 
 259   croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ...";
 260 }
 261 
 262 # Get fingerprint bits as an decimal string...
 263 #
 264 sub GetBitsAsDecimalString {
 265   my($This, $BitsOrder) = @_;
 266 
 267   croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ...";
 268 }
 269 
 270 # Get fingerprint bits as a binary string conatning 1s and 0s...
 271 #
 272 sub GetBitsAsBinaryString {
 273   my($This, $BitsOrder) = @_;
 274 
 275   return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder);
 276 }
 277 
 278 # Get fingerprint bits as a binary string conatning 1s and 0s...
 279 #
 280 sub GetBitsAsRawBinaryString {
 281   my($This) = @_;
 282 
 283   return $This->_GetFingerprintBitsAsString('RawBinary');
 284 }
 285 
 286 # Return fingerprint bits as a string...
 287 #
 288 sub _GetFingerprintBitsAsString {
 289   my($This, $Format, $BitsOrder) = @_;
 290 
 291   $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending';
 292 
 293   return $This->_GetBitsAsString($Format, $BitsOrder);
 294 }
 295 
 296 # Is it a fingerprints bit vector object?
 297 sub IsFingerprintsBitVector ($) {
 298   my($Object) = @_;
 299 
 300   return _IsFingerprintsBitVector($Object);
 301 }
 302 
 303 # Is it a fingerprints bit vector object?
 304 sub _IsFingerprintsBitVector {
 305   my($Object) = @_;
 306 
 307   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 308 }
 309 
 310 # Return a list of supported similarity coefficients...
 311 sub GetSupportedSimilarityCoefficients () {
 312 
 313   return @SimilarityCoefficients;
 314 }
 315 
 316 # Get bit density for fingerprints bit vector corresponding to on bits...
 317 #
 318 sub GetFingerprintsBitDensity {
 319   my($This) = @_;
 320   my($BitDensity);
 321 
 322   $BitDensity = $This->GetDensityOfSetBits();
 323 
 324   return round($BitDensity, 2);
 325 }
 326 
 327 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to
 328 # specified size...
 329 #
 330 sub FoldFingerprintsBitVectorBySize {
 331   my($This, $Size) = @_;
 332 
 333   if (!($Size > 0 && $Size <= $This->GetSize())) {
 334     croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid:  It must be > 0 && <= ", $This->GetSize()," ...";
 335   }
 336 
 337   if ($This->GetSize() <= $Size) {
 338     return $This;
 339   }
 340   return $This->_FoldFingerprintsBitVector('BySize', $Size);
 341 }
 342 
 343 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than
 344 #  or equal to specified density...
 345 #
 346 sub FoldFingerprintsBitVectorByDensity {
 347   my($This, $Density) = @_;
 348 
 349   if (!($Density > 0 && $Density <= 1)) {
 350     croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid:  It must be > 0 && <= 1 ...";
 351   }
 352 
 353   if ($This->GetDensityOfSetBits() >= $Density) {
 354     return $This;
 355   }
 356   return $This->_FoldFingerprintsBitVector('ByDensity', $Density);
 357 }
 358 
 359 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector...
 360 #
 361 sub _FoldFingerprintsBitVector {
 362   my($This, $Mode, $Value) = @_;
 363 
 364   # Fold upto size of 8 bits...
 365   if ($This->GetSize() <= 8) {
 366     return $This;
 367   }
 368 
 369   # Check size or density....
 370   if ($Mode =~ /^BySize$/i) {
 371     if ($This->GetSize() <= $Value) {
 372       return $This;
 373     }
 374   }
 375   elsif ($Mode =~ /^ByDensity$/i) {
 376     if ($This->GetDensityOfSetBits() >= $Value) {
 377       return $This;
 378     }
 379   }
 380   else {
 381     return $This;
 382   }
 383 
 384   # Recursively reduce its size by half...
 385   my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength);
 386 
 387   $BinaryString = $This->GetBitsAsBinaryString();
 388   $StringLength = length $BinaryString;
 389 
 390   $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2);
 391   $SecondHalfBinaryString = substr($BinaryString, $StringLength/2);
 392 
 393   $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString);
 394   $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString);
 395 
 396   $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector;
 397 
 398   return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value);
 399 }
 400 
 401 # Is first bit vector subset of second bit vector?
 402 #
 403 # For a bit vector to be a subset of another bit vector, both vectors must be of
 404 # the same size and the bit positions set in first vector must also be set in the
 405 # secons bit vector.
 406 #
 407 # This functionality can be either invoked as a class function or an object method.
 408 #
 409 sub IsSubSet ($$) {
 410   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 411 
 412   if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) {
 413     return 0;
 414   }
 415   my($AndFingerprintsBitVector);
 416 
 417   $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
 418 
 419   return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0;
 420 }
 421 
 422 # Return a string containing vector values...
 423 sub StringifyFingerprintsBitVector {
 424   my($This) = @_;
 425   my($FingerprintsBitVectorString);
 426 
 427   # BitVector size information...
 428   #
 429   if ($This->{SpecifiedSize} != $This->GetSize()) {
 430     $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize();
 431   }
 432   else {
 433     $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize();
 434   }
 435   my($NumOfSetBits, $BitDensity);
 436   $NumOfSetBits = $This->GetNumOfSetBits();
 437   $BitDensity = $This->GetFingerprintsBitDensity();
 438 
 439   $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
 440 
 441   # BitVector values...
 442   $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector();
 443 
 444   return $FingerprintsBitVectorString;
 445 }
 446 
 447 # For two fingerprints bit vectors A and B of same size, let:
 448 #
 449 #  Na = Number of bits set to "1" in A
 450 #  Nb = Number of bits set to "1" in B
 451 #  Nc = Number of bits set to "1" in both A and B
 452 #  Nd = Number of bits set to "0" in both A and B
 453 #
 454 #  Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd
 455 #
 456 #  Na - Nc = Number of bits set to "1" in A but not in B
 457 #  Nb - Nc = Number of bits set to "1" in B but not in A
 458 #
 459 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are
 460 # defined as follows:
 461 #
 462 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / (  SQRT ( Nc * Nd ) + Nc + ( Na - Nc )  + ( Nb - Nc ) ) ( same as Buser )
 463 #
 464 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / (  SQRT ( Nc * Nd ) + Nc + ( Na - Nc )  + ( Nb - Nc ) ) ( same as BaroniUrbani )
 465 #
 466 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai)
 467 #
 468 # . Dice: (2 * Nc) / ( Na + Nb )
 469 #
 470 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb)
 471 #
 472 # . Forbes: ( Nt * Nc ) / ( Na * Nb )
 473 #
 474 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb )
 475 #
 476 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt
 477 #
 478 # . Jaccard: Nc /  ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto)
 479 #
 480 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc )
 481 #
 482 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb )
 483 #
 484 # . Matching: ( Nc + Nd ) / Nt
 485 #
 486 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / (  Na * Nb )
 487 #
 488 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine)
 489 #
 490 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * (  Na - Nc + Nd ) * ( Nb - Nc + Nd ) )
 491 #
 492 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc)  + ( Nb  - Nc) + Nt) = ( Nc + Nd ) / ( Na  + Nb  - 2Nc + Nt)
 493 #
 494 # . RussellRao: Nc / Nt
 495 #
 496 # . Simpson: Nc / MIN ( Na, Nb)
 497 #
 498 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc)  + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc )
 499 #
 500 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt )
 501 #
 502 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc  )
 503 #
 504 # . Tanimoto: Nc /  ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard)
 505 #
 506 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb )  + Nb)
 507 #
 508 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) )  )
 509 #
 510 #
 511 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which
 512 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions
 513 # of Tanimoto [ Ref. 42 ] and Tversky [  Ref. 43 ] have been developed.
 514 #
 515 # Let:
 516 #
 517 #  Na' = Number of bits set to "0" in A
 518 #  Nb' = Number of bits set to "0" in B
 519 #  Nc' = Number of bits set to "0" in both A and B
 520 #
 521 # . Tanimoto': Nc' /  ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' )
 522 #
 523 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' )  + Nb')
 524 #
 525 # Then:
 526 #
 527 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto'
 528 #
 529 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky'
 530 #
 531 #
 532 
 533 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors.
 534 #
 535 # This functionality can be either invoked as a class function or an object method.
 536 #
 537 sub BaroniUrbaniSimilarityCoefficient ($$) {
 538   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 539 
 540   return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 541 }
 542 
 543 # Calculate Buser similarity coefficient for two same size bit vectors.
 544 #
 545 # This functionality can be either invoked as a class function or an object method.
 546 #
 547 sub BuserSimilarityCoefficient ($$) {
 548   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 549   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 550 
 551   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 552   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 553   $Nt = $Na + $Nb - $Nc + $Nd;
 554 
 555   $Numerator = sqrt($Nc*$Nd) + $Nc;
 556   $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc)  + ($Nb - $Nc ) + $Nc;
 557 
 558   return  $Denominator ? ($Numerator/$Denominator) : 0;
 559 }
 560 
 561 # Calculate Cosine similarity coefficient for two same size bit vectors.
 562 #
 563 # This functionality can be either invoked as a class function or an object method.
 564 #
 565 sub CosineSimilarityCoefficient ($$) {
 566   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 567   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 568 
 569   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 570 
 571   $Numerator = $Nc;
 572   $Denominator = sqrt($Na*$Nb);
 573 
 574   return  $Denominator ? ($Numerator/$Denominator) : 0;
 575 }
 576 
 577 # Calculate Dice similarity coefficient for two same size bit vectors.
 578 #
 579 # This functionality can be either invoked as a class function or an object method.
 580 #
 581 sub DiceSimilarityCoefficient ($$) {
 582   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 583   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 584 
 585   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 586 
 587   $Numerator = 2*$Nc;
 588   $Denominator = $Na + $Nb;
 589 
 590   return  $Denominator ? ($Numerator/$Denominator) : 0;
 591 }
 592 
 593 # Calculate Dennis similarity coefficient for two same size bit vectors.
 594 #
 595 # This functionality can be either invoked as a class function or an object method.
 596 #
 597 sub DennisSimilarityCoefficient ($$) {
 598   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 599   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 600 
 601   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 602   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 603   $Nt = $Na + $Nb - $Nc + $Nd;
 604 
 605   $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc));
 606   $Denominator = sqrt($Nt*$Na*$Nb);
 607 
 608   return  $Denominator ? ($Numerator/$Denominator) : 0;
 609 }
 610 
 611 # Calculate Forbes similarity coefficient for two same size bit vectors.
 612 #
 613 # This functionality can be either invoked as a class function or an object method.
 614 #
 615 sub ForbesSimilarityCoefficient ($$) {
 616   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 617   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 618 
 619   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 620   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 621   $Nt = $Na + $Nb - $Nc + $Nd;
 622 
 623   $Numerator = $Nt*$Nc;
 624   $Denominator = $Na*$Nb;
 625 
 626   return  $Denominator ? ($Numerator/$Denominator) : 0;
 627 }
 628 
 629 # Calculate Fossum similarity coefficient for two same size bit vectors.
 630 #
 631 # This functionality can be either invoked as a class function or an object method.
 632 #
 633 sub FossumSimilarityCoefficient ($$) {
 634   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 635   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 636 
 637   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 638   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 639   $Nt = $Na + $Nb - $Nc + $Nd;
 640 
 641   $Numerator =  $Nt*(($Nc - 0.5)** 2);
 642   $Denominator =  $Na*$Nb ;
 643 
 644   return  $Denominator ? ($Numerator/$Denominator) : 0;
 645 }
 646 
 647 # Calculate Hamann similarity coefficient for two same size bit vectors.
 648 #
 649 # This functionality can be either invoked as a class function or an object method.
 650 #
 651 sub HamannSimilarityCoefficient ($$) {
 652   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 653   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 654 
 655   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 656   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 657   $Nt = $Na + $Nb - $Nc + $Nd;
 658 
 659   $Numerator =  ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ;
 660   $Denominator = $Nt;
 661 
 662   return  $Denominator ? ($Numerator/$Denominator) : 0;
 663 }
 664 
 665 # Calculate Jacard similarity coefficient for two same size bit vectors.
 666 #
 667 # This functionality can be either invoked as a class function or an object method.
 668 #
 669 sub JacardSimilarityCoefficient ($$) {
 670   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 671 
 672   return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 673 }
 674 
 675 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors.
 676 #
 677 # This functionality can be either invoked as a class function or an object method.
 678 #
 679 sub Kulczynski1SimilarityCoefficient ($$) {
 680   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 681   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 682 
 683   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 684 
 685   $Numerator = $Nc;
 686   $Denominator =  $Na + $Nb - 2*$Nc;
 687 
 688   return  $Denominator ? ($Numerator/$Denominator) : 0;
 689 }
 690 
 691 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors.
 692 #
 693 # This functionality can be either invoked as a class function or an object method.
 694 #
 695 sub Kulczynski2SimilarityCoefficient ($$) {
 696   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 697   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 698 
 699   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 700 
 701   $Numerator = 0.5*($Na*$Nc + $Nb*$Nc);
 702   $Denominator = $Na*$Nb;
 703 
 704   return  $Denominator ? ($Numerator/$Denominator) : 0;
 705 }
 706 
 707 # Calculate Matching similarity coefficient for two same size bit vectors.
 708 #
 709 # This functionality can be either invoked as a class function or an object method.
 710 #
 711 sub MatchingSimilarityCoefficient ($$) {
 712   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 713   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 714 
 715   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 716   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 717   $Nt = $Na + $Nb - $Nc + $Nd;
 718 
 719   $Numerator =  $Nc + $Nd;
 720   $Denominator = $Nt;
 721 
 722   return  $Denominator ? ($Numerator/$Denominator) : 0;
 723 }
 724 
 725 # Calculate McConnaughey similarity coefficient for two same size bit vectors.
 726 #
 727 # This functionality can be either invoked as a class function or an object method.
 728 #
 729 sub McConnaugheySimilarityCoefficient ($$) {
 730   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 731   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 732 
 733   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 734 
 735   $Numerator =  $Nc**2 - (($Na - $Nc)*($Nb - $Nc));
 736   $Denominator = $Na*$Nb ;
 737 
 738   return  $Denominator ? ($Numerator/$Denominator) : 0;
 739 }
 740 
 741 # Calculate Ochiai similarity coefficient for two same size bit vectors.
 742 #
 743 # This functionality can be either invoked as a class function or an object method.
 744 #
 745 sub OchiaiSimilarityCoefficient ($$) {
 746   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 747 
 748   return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 749 }
 750 
 751 # Calculate Pearson similarity coefficient for two same size bit vectors.
 752 #
 753 # This functionality can be either invoked as a class function or an object method.
 754 #
 755 sub PearsonSimilarityCoefficient ($$) {
 756   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 757   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 758 
 759   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 760   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 761   $Nt = $Na + $Nb - $Nc + $Nd;
 762 
 763   $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc));
 764   $Denominator =  sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd));
 765 
 766   return  $Denominator ? ($Numerator/$Denominator) : 0;
 767 }
 768 
 769 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors.
 770 #
 771 # This functionality can be either invoked as a class function or an object method.
 772 #
 773 sub RogersTanimotoSimilarityCoefficient ($$) {
 774   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 775   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 776 
 777   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 778   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 779   $Nt = $Na + $Nb - $Nc + $Nd;
 780 
 781   $Numerator = $Nc + $Nd;
 782   $Denominator =  ($Na - $Nc)  + ($Nb  - $Nc) + $Nt;
 783 
 784   return  $Denominator ? ($Numerator/$Denominator) : 0;
 785 }
 786 
 787 # Calculate RussellRao similarity coefficient for two same size bit vectors.
 788 #
 789 # This functionality can be either invoked as a class function or an object method.
 790 #
 791 sub RussellRaoSimilarityCoefficient ($$) {
 792   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 793   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 794 
 795   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 796   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 797   $Nt = $Na + $Nb - $Nc + $Nd;
 798 
 799   $Numerator = $Nc;
 800   $Denominator = $Nt;
 801 
 802   return  $Denominator ? ($Numerator/$Denominator) : 0;
 803 }
 804 
 805 # Calculate Simpson similarity coefficient for two same size bit vectors.
 806 #
 807 # This functionality can be either invoked as a class function or an object method.
 808 #
 809 sub SimpsonSimilarityCoefficient ($$) {
 810   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 811   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 812 
 813   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 814 
 815   $Numerator = $Nc;
 816   $Denominator =  min($Na, $Nb);
 817 
 818   return  $Denominator ? ($Numerator/$Denominator) : 0;
 819 }
 820 
 821 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors.
 822 #
 823 # This functionality can be either invoked as a class function or an object method.
 824 #
 825 sub SkoalSneath1SimilarityCoefficient ($$) {
 826   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 827   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 828 
 829   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 830 
 831   $Numerator = $Nc;
 832   $Denominator = $Nc + 2*($Na - $Nc)  + 2*($Nb - $Nc);
 833 
 834   return  $Denominator ? ($Numerator/$Denominator) : 0;
 835 }
 836 
 837 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors.
 838 #
 839 # This functionality can be either invoked as a class function or an object method.
 840 #
 841 sub SkoalSneath2SimilarityCoefficient ($$) {
 842   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 843   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 844 
 845   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 846   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 847   $Nt = $Na + $Nb - $Nc + $Nd;
 848 
 849   $Numerator = 2*$Nc + 2*$Nd  ;
 850   $Denominator = $Nc + $Nd + $Nt ;
 851 
 852   return  $Denominator ? ($Numerator/$Denominator) : 0;
 853 }
 854 
 855 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors.
 856 #
 857 # This functionality can be either invoked as a class function or an object method.
 858 #
 859 sub SkoalSneath3SimilarityCoefficient ($$) {
 860   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 861   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 862 
 863   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 864   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 865   $Nt = $Na + $Nb - $Nc + $Nd;
 866 
 867   $Numerator =  $Nc + $Nd;
 868   $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ;
 869 
 870   return  $Denominator ? ($Numerator/$Denominator) : 0;
 871 }
 872 
 873 # Calculate Tanimoto similarity coefficient for two same size bit vectors.
 874 #
 875 # This functionality can be either invoked as a class function or an object method.
 876 #
 877 sub TanimotoSimilarityCoefficient ($$) {
 878   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 879   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 880 
 881   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 882 
 883   $Numerator = $Nc;
 884   $Denominator = $Na + $Nb - $Nc;
 885 
 886   return  $Denominator ? ($Numerator/$Denominator) : 0;
 887 }
 888 
 889 # Calculate Tversky similarity coefficient for two same size bit vectors.
 890 #
 891 # This functionality can be either invoked as a class function or an object method.
 892 #
 893 sub TverskySimilarityCoefficient ($$$) {
 894   my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_;
 895   my($Na, $Nb, $Nc, $Numerator, $Denominator);
 896 
 897   if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
 898     croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
 899   }
 900 
 901   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 902 
 903   $Numerator = $Nc;
 904   $Denominator =  $Alpha*($Na - $Nb )  + $Nb;
 905 
 906   return  $Denominator ? ($Numerator/$Denominator) : 0;
 907 }
 908 
 909 # Calculate Yule similarity coefficient for two same size bit vectors.
 910 #
 911 # This functionality can be either invoked as a class function or an object method.
 912 #
 913 sub YuleSimilarityCoefficient ($$) {
 914   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 915   my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator);
 916 
 917   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 918   $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 919   $Nt = $Na + $Nb - $Nc + $Nd;
 920 
 921   $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ;
 922   $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc))  ;
 923 
 924   return  $Denominator ? ($Numerator/$Denominator) : 0;
 925 }
 926 
 927 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors.
 928 #
 929 # This functionality can be either invoked as a class function or an object method.
 930 #
 931 sub WeightedTanimotoSimilarityCoefficient ($$$) {
 932   my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_;
 933   my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto);
 934 
 935   if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
 936     croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
 937   }
 938 
 939   # Get Tanimoto for set bits...
 940   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 941 
 942   $Numerator = $Nc;
 943   $Denominator = $Na + $Nb - $Nc;
 944   $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0;
 945 
 946   # Get Tanimoto for clear bits...
 947   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 948 
 949   $Numerator = $Nc;
 950   $Denominator = $Na + $Nb - $Nc;
 951   $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0;
 952 
 953   $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits;
 954 
 955   return  $WeightedTanimoto;
 956 }
 957 
 958 # Calculate WeightedTversky similarity coefficient for two same size bit vectors.
 959 #
 960 # This functionality can be either invoked as a class function or an object method.
 961 #
 962 sub WeightedTverskySimilarityCoefficient ($$$) {
 963   my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_;
 964   my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky);
 965 
 966   if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) {
 967     croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ...";
 968   }
 969   if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) {
 970     croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ...";
 971   }
 972 
 973   # Get Tversky for set bits...
 974   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 975 
 976   $Numerator = $Nc;
 977   $Denominator =  $Alpha*($Na - $Nb )  + $Nb;
 978   $TverskyForSetBits =  $Denominator ? ($Numerator/$Denominator) : 0;
 979 
 980   # Get Tversky for clear bits...
 981   ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
 982 
 983   $Numerator = $Nc;
 984   $Denominator =  $Alpha*($Na - $Nb )  + $Nb;
 985   $TverskyForClearBits =  $Denominator ? ($Numerator/$Denominator) : 0;
 986 
 987   $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits;
 988 
 989   return  $WeightedTversky;
 990 }
 991 
 992 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
 993 #
 994 sub _GetNumOfIndividualAndCommonSetBits ($$) {
 995   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
 996   my($Na, $Nb, $Nc, $Nd);
 997 
 998   # Number of bits set to "1" in A
 999   $Na = $FingerprintsBitVectorA->GetNumOfSetBits();
1000 
1001   # Number of bits set to "1" in B
1002   $Nb = $FingerprintsBitVectorB->GetNumOfSetBits();
1003 
1004   # Number of bits set to "1" in both A and B
1005   my($NcBitVector);
1006   $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB;
1007   $Nc = $NcBitVector->GetNumOfSetBits();
1008 
1009   return ($Na, $Nb, $Nc);
1010 }
1011 
1012 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations...
1013 #
1014 sub _GetNumOfCommonClearBits ($$) {
1015   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
1016   my($Nd, $NdBitVector);
1017 
1018   #  Number of bits set to "0" in both A and B
1019   $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB;
1020   $Nd = $NdBitVector->GetNumOfSetBits();
1021 
1022   # Correct for number of clear bits used for padding...
1023   if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
1024     $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
1025   }
1026   elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
1027     $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
1028   }
1029 
1030   return $Nd;
1031 }
1032 
1033 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations...
1034 #
1035 sub _GetNumOfIndividualAndCommonClearBits ($$) {
1036   my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_;
1037   my($Na, $Nb, $Nc, $Nd);
1038 
1039   # Number of bits set to "0" in A
1040   $Na = $FingerprintsBitVectorA->GetNumOfClearBits();
1041 
1042   # Correct for number of clear bits used for padding...
1043   if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) {
1044     $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA);
1045   }
1046 
1047   # Number of bits set to "0" in B
1048   $Nb = $FingerprintsBitVectorB->GetNumOfClearBits();
1049 
1050   # Correct for number of clear bits used for padding...
1051   if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) {
1052     $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB);
1053   }
1054 
1055   # Number of bits set to "0" in both A and B
1056   $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB);
1057 
1058   return ($Na, $Nb, $Nc);
1059 }
1060 
1061 # Irrespective of specified size, Perl functions used to handle bit data data in
1062 # BitVector class automatically sets the size to the next nearest power of 2
1063 # and clear the extra bits.
1064 #
1065 # SpecifiedSize is used by this class to process any aribitray size during similarity
1066 # coefficient calculations.
1067 #
1068 # Assuming the FingerprintsBitBector class only manipulates bits upto specified
1069 # size, a correction for the extra bits added by BitVector class needs to be applied
1070 # to number of clear bits.
1071 #
1072 sub _GetNumOfClearBitsCorrection {
1073   my($FingerprintsBitVector) = @_;
1074 
1075   return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize});
1076 }
1077 
1078 # Is number of clear bits correction required?
1079 #
1080 sub _IsNumOfClearBitsCorrectionRequired {
1081   my($FingerprintsBitVector) = @_;
1082 
1083   return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0;
1084 }
1085 
1086