1 package Fingerprints::FingerprintsBitVector; 2 # 3 # File: FingerprintsBitVector.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Scalar::Util (); 30 use BitVector; 31 use MathUtil; 32 use TextUtil (); 33 34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 35 36 @ISA = qw(BitVector Exporter); 37 38 # Similiarity coefficients... 39 my(@SimilarityCoefficients) = qw(BaroniUrbaniSimilarityCoefficient BuserSimilarityCoefficient CosineSimilarityCoefficient DiceSimilarityCoefficient DennisSimilarityCoefficient ForbesSimilarityCoefficient FossumSimilarityCoefficient HamannSimilarityCoefficient JacardSimilarityCoefficient Kulczynski1SimilarityCoefficient Kulczynski2SimilarityCoefficient MatchingSimilarityCoefficient McConnaugheySimilarityCoefficient OchiaiSimilarityCoefficient PearsonSimilarityCoefficient RogersTanimotoSimilarityCoefficient RussellRaoSimilarityCoefficient SimpsonSimilarityCoefficient SkoalSneath1SimilarityCoefficient SkoalSneath2SimilarityCoefficient SkoalSneath3SimilarityCoefficient TanimotoSimilarityCoefficient TverskySimilarityCoefficient YuleSimilarityCoefficient WeightedTanimotoSimilarityCoefficient WeightedTverskySimilarityCoefficient); 40 41 # New from string... 42 my(@NewFromString) = qw(NewFromBinaryString NewFromHexadecimalString NewFromRawBinaryString); 43 44 @EXPORT = qw(IsFingerprintsBitVector); 45 @EXPORT_OK = qw(GetSupportedSimilarityCoefficients @NewFromString @SimilarityCoefficients); 46 47 %EXPORT_TAGS = ( 48 new => [@NewFromString], 49 coefficients => [@SimilarityCoefficients], 50 all => [@EXPORT, @EXPORT_OK] 51 ); 52 53 # Setup class variables... 54 my($ClassName); 55 _InitializeClass(); 56 57 use overload '""' => 'StringifyFingerprintsBitVector'; 58 59 # Class constructor... 60 sub new { 61 my($Class, $Size) = @_; 62 63 # Initialize object... 64 my $This = $Class->SUPER::new($Size); 65 bless $This, ref($Class) || $Class; 66 $This->_InitializeFingerprintsBitVector($Size); 67 68 return $This; 69 } 70 71 # Initialize object data... 72 # 73 # Note: 74 # . The class, BitVector, used to derive this class provides all the functionality to 75 # manipulate bits. 76 # . Irrespective of specified size, Perl functions used to handle bit data in 77 # BitVector class automatically sets the size to the next nearest power of 2. 78 # SpecifiedSize is used by this class to process any aribitray size during similarity 79 # coefficient calculations. 80 # 81 sub _InitializeFingerprintsBitVector { 82 my($This, $Size) = @_; 83 84 if (!defined $Size) { 85 croak "Error: ${ClassName}->new: FingerprintsBitVector object instantiated without specifying its size ..."; 86 } 87 if ($Size <=0) { 88 croak "Error: ${ClassName}->new: Fingerprints bit vector size, $Size, must be a positive integer..."; 89 } 90 91 # Specified size of fingerprints... 92 $This->{SpecifiedSize} = $Size; 93 94 } 95 96 # Initialize class ... 97 sub _InitializeClass { 98 #Class name... 99 $ClassName = __PACKAGE__; 100 } 101 102 # Set specified size... 103 # 104 # Notes: 105 # Irrespective of specified size, Perl functions used to handle bit data in 106 # BitVector class automatically sets the size to the next nearest power of 2. 107 # SpecifiedSize is used by this class to process any aribitray size during similarity 108 # coefficient calculations. 109 # 110 sub SetSpecifiedSize { 111 my($This, $SpecifiedSize) = @_; 112 113 if (!($SpecifiedSize > 0 && $SpecifiedSize <= $This->{Size})) { 114 croak "Error: ${ClassName}->SetSpecifiedSize: Specified size, $SpecifiedSize, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; 115 } 116 $This->{SpecifiedSize} = $SpecifiedSize; 117 } 118 119 # Get specified size... 120 sub GetSpecifiedSize { 121 my($This) = @_; 122 123 return $This->{SpecifiedSize}; 124 } 125 126 # Set ID... 127 sub SetID { 128 my($This, $Value) = @_; 129 130 $This->{ID} = $Value; 131 132 return $This; 133 } 134 135 # Get ID... 136 sub GetID { 137 my($This) = @_; 138 139 return exists $This->{ID} ? $This->{ID} : 'None'; 140 } 141 142 # Set description... 143 sub SetDescription { 144 my($This, $Value) = @_; 145 146 $This->{Description} = $Value; 147 148 return $This; 149 } 150 151 # Get description... 152 sub GetDescription { 153 my($This) = @_; 154 155 return exists $This->{Description} ? $This->{Description} : 'No description available'; 156 } 157 158 # Set vector type... 159 sub SetVectorType { 160 my($This, $Value) = @_; 161 162 $This->{VectorType} = $Value; 163 164 return $This; 165 } 166 167 # Get vector type... 168 sub GetVectorType { 169 my($This) = @_; 170 171 return exists $This->{VectorType} ? $This->{VectorType} : 'FingerprintsBitVector'; 172 } 173 174 # Create a new fingerprints bit vector using binary string. This functionality can be 175 # either invoked as a class function or an object method. 176 # 177 sub NewFromBinaryString ($;$) { 178 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; 179 180 if (_IsFingerprintsBitVector($FirstParameter)) { 181 return _NewFingerptinsBitVectorFromString('Binary', $SecondParameter, $ThirdParameter); 182 } 183 else { 184 return _NewFingerptinsBitVectorFromString( 'Binary', $FirstParameter, $SecondParameter); 185 } 186 } 187 188 # Create a new fingerprints bit vector using hexadecimal string. This functionality can be 189 # either invoked as a class function or an object method. 190 # 191 sub NewFromHexadecimalString ($;$) { 192 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; 193 194 if (_IsFingerprintsBitVector($FirstParameter)) { 195 return _NewFingerptinsBitVectorFromString('Hexadecimal', $SecondParameter, $ThirdParameter); 196 } 197 else { 198 return _NewFingerptinsBitVectorFromString( 'Hexadecimal', $FirstParameter, $SecondParameter); 199 } 200 } 201 202 # Create a new fingerprints bit vector using octal string. This functionality can be 203 # either invoked as a class function or an object method. 204 # 205 # 206 sub NewFromOctalString ($) { 207 croak "Error: ${ClassName}->NewFromOctalString: Creation of fingerprits bit vector from an octal string is not supported ..."; 208 } 209 210 # Create a new fingerprints bit vector using decimal string. This functionality can be 211 # either invoked as a class function or an object method. 212 # 213 sub NewFromDecimalString ($;$) { 214 croak "Error: ${ClassName}->NewFromDecimalString: Creation of fingerprits bit vector from a decimal string is not supported ..."; 215 } 216 217 # Create a new fingerprints bit vector using raw binary string. This functionality can be 218 # either invoked as a class function or an object method. 219 # 220 sub NewFromRawBinaryString ($;$) { 221 my($FirstParameter, $SecondParameter, $ThirdParameter) = @_; 222 223 if (_IsFingerprintsBitVector($FirstParameter)) { 224 return _NewFingerptinsBitVectorFromString('RawBinary', $SecondParameter, $ThirdParameter); 225 } 226 else { 227 return _NewFingerptinsBitVectorFromString( 'RawBinary', $FirstParameter, $SecondParameter); 228 } 229 } 230 231 # Create a new fingerprints bit vector from a string... 232 # 233 # 234 sub _NewFingerptinsBitVectorFromString ($$;$) { 235 my($Format, $String, $BitsOrder) = @_; 236 my($FingerprintsBitVector, $Size); 237 238 $Size = BitVector::_CalculateStringSizeInBits($Format, $String); 239 240 $FingerprintsBitVector = new Fingerprints::FingerprintsBitVector($Size); 241 $FingerprintsBitVector->_SetBitsAsString($Format, $String, $BitsOrder); 242 243 return $FingerprintsBitVector; 244 } 245 246 # Get fingerprint bits as a hexadecimal string... 247 # 248 sub GetBitsAsHexadecimalString { 249 my($This, $BitsOrder) = @_; 250 251 return $This->_GetFingerprintBitsAsString('Hexadecimal', $BitsOrder); 252 } 253 254 # Get fingerprint bits as an octal string... 255 # 256 sub GetBitsAsOctalString { 257 my($This, $BitsOrder) = @_; 258 259 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as an octal string is not supported ..."; 260 } 261 262 # Get fingerprint bits as an decimal string... 263 # 264 sub GetBitsAsDecimalString { 265 my($This, $BitsOrder) = @_; 266 267 croak "Error: ${ClassName}->GetBitsAsOctalString: Retrieval of fingerprits bits as a decimal string is not supported ..."; 268 } 269 270 # Get fingerprint bits as a binary string conatning 1s and 0s... 271 # 272 sub GetBitsAsBinaryString { 273 my($This, $BitsOrder) = @_; 274 275 return $This->_GetFingerprintBitsAsString('Binary', $BitsOrder); 276 } 277 278 # Get fingerprint bits as a binary string conatning 1s and 0s... 279 # 280 sub GetBitsAsRawBinaryString { 281 my($This) = @_; 282 283 return $This->_GetFingerprintBitsAsString('RawBinary'); 284 } 285 286 # Return fingerprint bits as a string... 287 # 288 sub _GetFingerprintBitsAsString { 289 my($This, $Format, $BitsOrder) = @_; 290 291 $BitsOrder = (defined($BitsOrder) && $BitsOrder) ? $BitsOrder : 'Ascending'; 292 293 return $This->_GetBitsAsString($Format, $BitsOrder); 294 } 295 296 # Is it a fingerprints bit vector object? 297 sub IsFingerprintsBitVector ($) { 298 my($Object) = @_; 299 300 return _IsFingerprintsBitVector($Object); 301 } 302 303 # Is it a fingerprints bit vector object? 304 sub _IsFingerprintsBitVector { 305 my($Object) = @_; 306 307 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 308 } 309 310 # Return a list of supported similarity coefficients... 311 sub GetSupportedSimilarityCoefficients () { 312 313 return @SimilarityCoefficients; 314 } 315 316 # Get bit density for fingerprints bit vector corresponding to on bits... 317 # 318 sub GetFingerprintsBitDensity { 319 my($This) = @_; 320 my($BitDensity); 321 322 $BitDensity = $This->GetDensityOfSetBits(); 323 324 return round($BitDensity, 2); 325 } 326 327 # Fold fingerprints bit vector by recursively reducing its size by half untill size is less than or equal to 328 # specified size... 329 # 330 sub FoldFingerprintsBitVectorBySize { 331 my($This, $Size) = @_; 332 333 if (!($Size > 0 && $Size <= $This->GetSize())) { 334 croak "Error: ${ClassName}->FoldFingerprintsBitVectorBySize: Specified size, $Size, is not valid: It must be > 0 && <= ", $This->GetSize()," ..."; 335 } 336 337 if ($This->GetSize() <= $Size) { 338 return $This; 339 } 340 return $This->_FoldFingerprintsBitVector('BySize', $Size); 341 } 342 343 # Fold fingerprints bit vector by recursively reducing its size by half untill bit density of set bits is greater than 344 # or equal to specified density... 345 # 346 sub FoldFingerprintsBitVectorByDensity { 347 my($This, $Density) = @_; 348 349 if (!($Density > 0 && $Density <= 1)) { 350 croak "Error: ${ClassName}->FoldFingerprintsBitVectorByDensity: Specified bit density, $Density, is not valid: It must be > 0 && <= 1 ..."; 351 } 352 353 if ($This->GetDensityOfSetBits() >= $Density) { 354 return $This; 355 } 356 return $This->_FoldFingerprintsBitVector('ByDensity', $Density); 357 } 358 359 # Fold fingerprints bit vector using size or density and return folded fingerprint bit vector... 360 # 361 sub _FoldFingerprintsBitVector { 362 my($This, $Mode, $Value) = @_; 363 364 # Fold upto size of 8 bits... 365 if ($This->GetSize() <= 8) { 366 return $This; 367 } 368 369 # Check size or density.... 370 if ($Mode =~ /^BySize$/i) { 371 if ($This->GetSize() <= $Value) { 372 return $This; 373 } 374 } 375 elsif ($Mode =~ /^ByDensity$/i) { 376 if ($This->GetDensityOfSetBits() >= $Value) { 377 return $This; 378 } 379 } 380 else { 381 return $This; 382 } 383 384 # Recursively reduce its size by half... 385 my($FirstHalfBinaryString, $SecondHalfBinaryString, $FirstHalfFingerprintsBitVector, $SecondHalfFingerprintsBitVector, $FoldedFingerprintsBitVector, $BinaryString, $StringLength); 386 387 $BinaryString = $This->GetBitsAsBinaryString(); 388 $StringLength = length $BinaryString; 389 390 $FirstHalfBinaryString = substr($BinaryString, 0, $StringLength/2); 391 $SecondHalfBinaryString = substr($BinaryString, $StringLength/2); 392 393 $FirstHalfFingerprintsBitVector = NewFromBinaryString($FirstHalfBinaryString); 394 $SecondHalfFingerprintsBitVector = NewFromBinaryString($SecondHalfBinaryString); 395 396 $FoldedFingerprintsBitVector = $FirstHalfFingerprintsBitVector | $SecondHalfFingerprintsBitVector; 397 398 return $FoldedFingerprintsBitVector->_FoldFingerprintsBitVector($Mode, $Value); 399 } 400 401 # Is first bit vector subset of second bit vector? 402 # 403 # For a bit vector to be a subset of another bit vector, both vectors must be of 404 # the same size and the bit positions set in first vector must also be set in the 405 # secons bit vector. 406 # 407 # This functionality can be either invoked as a class function or an object method. 408 # 409 sub IsSubSet ($$) { 410 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 411 412 if ($FingerprintsBitVectorA->GetSize() != $FingerprintsBitVectorB->GetSize()) { 413 return 0; 414 } 415 my($AndFingerprintsBitVector); 416 417 $AndFingerprintsBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; 418 419 return ($FingerprintsBitVectorA->GetNumOfSetBits() == $AndFingerprintsBitVector->GetNumOfSetBits()) ? 1 : 0; 420 } 421 422 # Return a string containing vector values... 423 sub StringifyFingerprintsBitVector { 424 my($This) = @_; 425 my($FingerprintsBitVectorString); 426 427 # BitVector size information... 428 # 429 if ($This->{SpecifiedSize} != $This->GetSize()) { 430 $FingerprintsBitVectorString = "SpecifiedSize: " . $This->{SpecifiedSize} . "; BitVectorSize: " . $This->GetSize(); 431 } 432 else { 433 $FingerprintsBitVectorString = "BitVectorSize: " . $This->GetSize(); 434 } 435 my($NumOfSetBits, $BitDensity); 436 $NumOfSetBits = $This->GetNumOfSetBits(); 437 $BitDensity = $This->GetFingerprintsBitDensity(); 438 439 $FingerprintsBitVectorString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; 440 441 # BitVector values... 442 $FingerprintsBitVectorString .= "; BitVector: " . $This->StringifyBitVector(); 443 444 return $FingerprintsBitVectorString; 445 } 446 447 # For two fingerprints bit vectors A and B of same size, let: 448 # 449 # Na = Number of bits set to "1" in A 450 # Nb = Number of bits set to "1" in B 451 # Nc = Number of bits set to "1" in both A and B 452 # Nd = Number of bits set to "0" in both A and B 453 # 454 # Nt = Number of bits set to "1" or "0" in A or B = Size of A or B = Na + Nb - Nc + Nd 455 # 456 # Na - Nc = Number of bits set to "1" in A but not in B 457 # Nb - Nc = Number of bits set to "1" in B but not in A 458 # 459 # Various similarity coefficients [ Ref 40 - 42 ] for a pair of bit vectors A and B are 460 # defined as follows: 461 # 462 # . BaroniUrbani: ( SQRT( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as Buser ) 463 # 464 # . Buser: ( SQRT ( Nc * Nd ) + Nc ) / ( SQRT ( Nc * Nd ) + Nc + ( Na - Nc ) + ( Nb - Nc ) ) ( same as BaroniUrbani ) 465 # 466 # . Cosine: Nc / SQRT ( Na * Nb ) (same as Ochiai) 467 # 468 # . Dice: (2 * Nc) / ( Na + Nb ) 469 # 470 # . Dennis: ( Nc * Nd - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / SQRT ( Nt * Na * Nb) 471 # 472 # . Forbes: ( Nt * Nc ) / ( Na * Nb ) 473 # 474 # . Fossum: ( Nt * ( ( Nc - 1/2 ) ** 2 ) / ( Na * Nb ) 475 # 476 # . Hamann: ( ( Nc + Nd ) - ( Na - Nc ) - ( Nb - Nc ) ) / Nt 477 # 478 # . Jaccard: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Tanimoto) 479 # 480 # . Kulczynski1: Nc / ( ( Na - Nc ) + ( Nb - Nc) ) = Nc / ( Na + Nb - 2Nc ) 481 # 482 # . Kulczynski2: ( ( Nc / 2 ) * ( 2 * Nc + ( Na - Nc ) + ( Nb - Nc) ) ) / ( ( Nc + ( Na - Nc ) ) * ( Nc + ( Nb - Nc ) ) ) = 0.5 * ( Nc / Na + Nc / Nb ) 483 # 484 # . Matching: ( Nc + Nd ) / Nt 485 # 486 # . McConnaughey: ( Nc ** 2 - ( Na - Nc ) * ( Nb - Nc) ) / ( Na * Nb ) 487 # 488 # . Ochiai: Nc / SQRT ( Na * Nb ) (same as Cosine) 489 # 490 # . Pearson: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) / SQRT ( Na * Nb * ( Na - Nc + Nd ) * ( Nb - Nc + Nd ) ) 491 # 492 # . RogersTanimoto: ( Nc + Nd ) / ( ( Na - Nc) + ( Nb - Nc) + Nt) = ( Nc + Nd ) / ( Na + Nb - 2Nc + Nt) 493 # 494 # . RussellRao: Nc / Nt 495 # 496 # . Simpson: Nc / MIN ( Na, Nb) 497 # 498 # . SkoalSneath1: Nc / ( Nc + 2 * ( Na - Nc) + 2 * ( Nb - Nc) ) = Nc / ( 2 * Na + 2 * Nb - 3 * Nc ) 499 # 500 # . SkoalSneath2: ( 2 * Nc + 2 * Nd ) / ( Nc + Nd + Nt ) 501 # 502 # . SkoalSneath3: ( Nc + Nd ) / ( ( Na - Nc ) + ( Nb - Nc ) ) = ( Nc + Nd ) / ( Na + Nb - 2 * Nc ) 503 # 504 # . Tanimoto: Nc / ( ( Na - Nc) + ( Nb - Nc ) + Nc ) = Nc / ( Na + Nb - Nc ) (same as Jaccard) 505 # 506 # . Tversky: Nc / ( alpha * ( Na - Nc ) + ( 1 - alpha) * ( Nb - Nc) + Nc ) = Nc / ( alpha * ( Na - Nb ) + Nb) 507 # 508 # . Yule: ( ( Nc * Nd ) - ( ( Na - Nc ) * ( Nb - Nc ) ) ) / ( ( Nc * Nd ) + ( ( Na - Nc ) * ( Nb - Nc ) ) ) 509 # 510 # 511 # Values of Tanimoto/Jaccard and Tversky coefficients are dependent on only those bit which 512 # are set to "1" in both A and B. In order to take into account all bit positions, modified versions 513 # of Tanimoto [ Ref. 42 ] and Tversky [ Ref. 43 ] have been developed. 514 # 515 # Let: 516 # 517 # Na' = Number of bits set to "0" in A 518 # Nb' = Number of bits set to "0" in B 519 # Nc' = Number of bits set to "0" in both A and B 520 # 521 # . Tanimoto': Nc' / ( ( Na' - Nc') + ( Nb' - Nc' ) + Nc' ) = Nc' / ( Na' + Nb' - Nc' ) 522 # 523 # . Tversky': Nc' / ( alpha * ( Na' - Nc' ) + ( 1 - alpha) * ( Nb' - Nc' ) + Nc' ) = Nc' / ( alpha * ( Na' - Nb' ) + Nb') 524 # 525 # Then: 526 # 527 # . WeightedTanimoto = beta * Tanimoto + (1 - beta) * Tanimoto' 528 # 529 # . WeightedTversky = beta * Tversky + (1 - beta) * Tversky' 530 # 531 # 532 533 # Calculate BaroniUrbani similarity coefficient for two same size bit vectors. 534 # 535 # This functionality can be either invoked as a class function or an object method. 536 # 537 sub BaroniUrbaniSimilarityCoefficient ($$) { 538 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 539 540 return BuserSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); 541 } 542 543 # Calculate Buser similarity coefficient for two same size bit vectors. 544 # 545 # This functionality can be either invoked as a class function or an object method. 546 # 547 sub BuserSimilarityCoefficient ($$) { 548 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 549 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 550 551 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 552 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 553 $Nt = $Na + $Nb - $Nc + $Nd; 554 555 $Numerator = sqrt($Nc*$Nd) + $Nc; 556 $Denominator = sqrt($Nc*$Nd) + ($Na - $Nc) + ($Nb - $Nc ) + $Nc; 557 558 return $Denominator ? ($Numerator/$Denominator) : 0; 559 } 560 561 # Calculate Cosine similarity coefficient for two same size bit vectors. 562 # 563 # This functionality can be either invoked as a class function or an object method. 564 # 565 sub CosineSimilarityCoefficient ($$) { 566 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 567 my($Na, $Nb, $Nc, $Numerator, $Denominator); 568 569 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 570 571 $Numerator = $Nc; 572 $Denominator = sqrt($Na*$Nb); 573 574 return $Denominator ? ($Numerator/$Denominator) : 0; 575 } 576 577 # Calculate Dice similarity coefficient for two same size bit vectors. 578 # 579 # This functionality can be either invoked as a class function or an object method. 580 # 581 sub DiceSimilarityCoefficient ($$) { 582 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 583 my($Na, $Nb, $Nc, $Numerator, $Denominator); 584 585 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 586 587 $Numerator = 2*$Nc; 588 $Denominator = $Na + $Nb; 589 590 return $Denominator ? ($Numerator/$Denominator) : 0; 591 } 592 593 # Calculate Dennis similarity coefficient for two same size bit vectors. 594 # 595 # This functionality can be either invoked as a class function or an object method. 596 # 597 sub DennisSimilarityCoefficient ($$) { 598 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 599 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 600 601 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 602 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 603 $Nt = $Na + $Nb - $Nc + $Nd; 604 605 $Numerator = $Nc*$Nd - (($Na - $Nc)*($Nb - $Nc)); 606 $Denominator = sqrt($Nt*$Na*$Nb); 607 608 return $Denominator ? ($Numerator/$Denominator) : 0; 609 } 610 611 # Calculate Forbes similarity coefficient for two same size bit vectors. 612 # 613 # This functionality can be either invoked as a class function or an object method. 614 # 615 sub ForbesSimilarityCoefficient ($$) { 616 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 617 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 618 619 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 620 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 621 $Nt = $Na + $Nb - $Nc + $Nd; 622 623 $Numerator = $Nt*$Nc; 624 $Denominator = $Na*$Nb; 625 626 return $Denominator ? ($Numerator/$Denominator) : 0; 627 } 628 629 # Calculate Fossum similarity coefficient for two same size bit vectors. 630 # 631 # This functionality can be either invoked as a class function or an object method. 632 # 633 sub FossumSimilarityCoefficient ($$) { 634 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 635 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 636 637 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 638 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 639 $Nt = $Na + $Nb - $Nc + $Nd; 640 641 $Numerator = $Nt*(($Nc - 0.5)** 2); 642 $Denominator = $Na*$Nb ; 643 644 return $Denominator ? ($Numerator/$Denominator) : 0; 645 } 646 647 # Calculate Hamann similarity coefficient for two same size bit vectors. 648 # 649 # This functionality can be either invoked as a class function or an object method. 650 # 651 sub HamannSimilarityCoefficient ($$) { 652 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 653 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 654 655 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 656 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 657 $Nt = $Na + $Nb - $Nc + $Nd; 658 659 $Numerator = ($Nc + $Nd ) - ($Na - $Nc) - ($Nb - $Nc) ; 660 $Denominator = $Nt; 661 662 return $Denominator ? ($Numerator/$Denominator) : 0; 663 } 664 665 # Calculate Jacard similarity coefficient for two same size bit vectors. 666 # 667 # This functionality can be either invoked as a class function or an object method. 668 # 669 sub JacardSimilarityCoefficient ($$) { 670 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 671 672 return TanimotoSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); 673 } 674 675 # Calculate Kulczynski1 similarity coefficient for two same size bit vectors. 676 # 677 # This functionality can be either invoked as a class function or an object method. 678 # 679 sub Kulczynski1SimilarityCoefficient ($$) { 680 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 681 my($Na, $Nb, $Nc, $Numerator, $Denominator); 682 683 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 684 685 $Numerator = $Nc; 686 $Denominator = $Na + $Nb - 2*$Nc; 687 688 return $Denominator ? ($Numerator/$Denominator) : 0; 689 } 690 691 # Calculate Kulczynski2 similarity coefficient for two same size bit vectors. 692 # 693 # This functionality can be either invoked as a class function or an object method. 694 # 695 sub Kulczynski2SimilarityCoefficient ($$) { 696 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 697 my($Na, $Nb, $Nc, $Numerator, $Denominator); 698 699 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 700 701 $Numerator = 0.5*($Na*$Nc + $Nb*$Nc); 702 $Denominator = $Na*$Nb; 703 704 return $Denominator ? ($Numerator/$Denominator) : 0; 705 } 706 707 # Calculate Matching similarity coefficient for two same size bit vectors. 708 # 709 # This functionality can be either invoked as a class function or an object method. 710 # 711 sub MatchingSimilarityCoefficient ($$) { 712 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 713 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 714 715 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 716 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 717 $Nt = $Na + $Nb - $Nc + $Nd; 718 719 $Numerator = $Nc + $Nd; 720 $Denominator = $Nt; 721 722 return $Denominator ? ($Numerator/$Denominator) : 0; 723 } 724 725 # Calculate McConnaughey similarity coefficient for two same size bit vectors. 726 # 727 # This functionality can be either invoked as a class function or an object method. 728 # 729 sub McConnaugheySimilarityCoefficient ($$) { 730 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 731 my($Na, $Nb, $Nc, $Numerator, $Denominator); 732 733 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 734 735 $Numerator = $Nc**2 - (($Na - $Nc)*($Nb - $Nc)); 736 $Denominator = $Na*$Nb ; 737 738 return $Denominator ? ($Numerator/$Denominator) : 0; 739 } 740 741 # Calculate Ochiai similarity coefficient for two same size bit vectors. 742 # 743 # This functionality can be either invoked as a class function or an object method. 744 # 745 sub OchiaiSimilarityCoefficient ($$) { 746 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 747 748 return CosineSimilarityCoefficient($FingerprintsBitVectorA, $FingerprintsBitVectorB); 749 } 750 751 # Calculate Pearson similarity coefficient for two same size bit vectors. 752 # 753 # This functionality can be either invoked as a class function or an object method. 754 # 755 sub PearsonSimilarityCoefficient ($$) { 756 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 757 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 758 759 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 760 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 761 $Nt = $Na + $Nb - $Nc + $Nd; 762 763 $Numerator = ($Nc*$Nd ) - (($Na - $Nc)*($Nb - $Nc)); 764 $Denominator = sqrt($Na*$Nb*($Na - $Nc + $Nd )*($Nb - $Nc + $Nd)); 765 766 return $Denominator ? ($Numerator/$Denominator) : 0; 767 } 768 769 # Calculate RogersTanimoto similarity coefficient for two same size bit vectors. 770 # 771 # This functionality can be either invoked as a class function or an object method. 772 # 773 sub RogersTanimotoSimilarityCoefficient ($$) { 774 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 775 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 776 777 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 778 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 779 $Nt = $Na + $Nb - $Nc + $Nd; 780 781 $Numerator = $Nc + $Nd; 782 $Denominator = ($Na - $Nc) + ($Nb - $Nc) + $Nt; 783 784 return $Denominator ? ($Numerator/$Denominator) : 0; 785 } 786 787 # Calculate RussellRao similarity coefficient for two same size bit vectors. 788 # 789 # This functionality can be either invoked as a class function or an object method. 790 # 791 sub RussellRaoSimilarityCoefficient ($$) { 792 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 793 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 794 795 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 796 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 797 $Nt = $Na + $Nb - $Nc + $Nd; 798 799 $Numerator = $Nc; 800 $Denominator = $Nt; 801 802 return $Denominator ? ($Numerator/$Denominator) : 0; 803 } 804 805 # Calculate Simpson similarity coefficient for two same size bit vectors. 806 # 807 # This functionality can be either invoked as a class function or an object method. 808 # 809 sub SimpsonSimilarityCoefficient ($$) { 810 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 811 my($Na, $Nb, $Nc, $Numerator, $Denominator); 812 813 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 814 815 $Numerator = $Nc; 816 $Denominator = min($Na, $Nb); 817 818 return $Denominator ? ($Numerator/$Denominator) : 0; 819 } 820 821 # Calculate SkoalSneath1 similarity coefficient for two same size bit vectors. 822 # 823 # This functionality can be either invoked as a class function or an object method. 824 # 825 sub SkoalSneath1SimilarityCoefficient ($$) { 826 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 827 my($Na, $Nb, $Nc, $Numerator, $Denominator); 828 829 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 830 831 $Numerator = $Nc; 832 $Denominator = $Nc + 2*($Na - $Nc) + 2*($Nb - $Nc); 833 834 return $Denominator ? ($Numerator/$Denominator) : 0; 835 } 836 837 # Calculate SkoalSneath2 similarity coefficient for two same size bit vectors. 838 # 839 # This functionality can be either invoked as a class function or an object method. 840 # 841 sub SkoalSneath2SimilarityCoefficient ($$) { 842 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 843 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 844 845 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 846 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 847 $Nt = $Na + $Nb - $Nc + $Nd; 848 849 $Numerator = 2*$Nc + 2*$Nd ; 850 $Denominator = $Nc + $Nd + $Nt ; 851 852 return $Denominator ? ($Numerator/$Denominator) : 0; 853 } 854 855 # Calculate SkoalSneath3 similarity coefficient for two same size bit vectors. 856 # 857 # This functionality can be either invoked as a class function or an object method. 858 # 859 sub SkoalSneath3SimilarityCoefficient ($$) { 860 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 861 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 862 863 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 864 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 865 $Nt = $Na + $Nb - $Nc + $Nd; 866 867 $Numerator = $Nc + $Nd; 868 $Denominator = ($Na - $Nc) + ($Nb - $Nc ) ; 869 870 return $Denominator ? ($Numerator/$Denominator) : 0; 871 } 872 873 # Calculate Tanimoto similarity coefficient for two same size bit vectors. 874 # 875 # This functionality can be either invoked as a class function or an object method. 876 # 877 sub TanimotoSimilarityCoefficient ($$) { 878 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 879 my($Na, $Nb, $Nc, $Numerator, $Denominator); 880 881 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 882 883 $Numerator = $Nc; 884 $Denominator = $Na + $Nb - $Nc; 885 886 return $Denominator ? ($Numerator/$Denominator) : 0; 887 } 888 889 # Calculate Tversky similarity coefficient for two same size bit vectors. 890 # 891 # This functionality can be either invoked as a class function or an object method. 892 # 893 sub TverskySimilarityCoefficient ($$$) { 894 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha) = @_; 895 my($Na, $Nb, $Nc, $Numerator, $Denominator); 896 897 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { 898 croak "Error: ${ClassName}->TverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; 899 } 900 901 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 902 903 $Numerator = $Nc; 904 $Denominator = $Alpha*($Na - $Nb ) + $Nb; 905 906 return $Denominator ? ($Numerator/$Denominator) : 0; 907 } 908 909 # Calculate Yule similarity coefficient for two same size bit vectors. 910 # 911 # This functionality can be either invoked as a class function or an object method. 912 # 913 sub YuleSimilarityCoefficient ($$) { 914 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 915 my($Na, $Nb, $Nc, $Nd, $Nt, $Numerator, $Denominator); 916 917 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 918 $Nd = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 919 $Nt = $Na + $Nb - $Nc + $Nd; 920 921 $Numerator = ($Nc*$Nd) - (($Na - $Nc)*($Nb - $Nc)) ; 922 $Denominator = ($Nc*$Nd) + (($Na - $Nc)*($Nb - $Nc)) ; 923 924 return $Denominator ? ($Numerator/$Denominator) : 0; 925 } 926 927 # Calculate WeightedTanimoto similarity coefficient for two same size bit vectors. 928 # 929 # This functionality can be either invoked as a class function or an object method. 930 # 931 sub WeightedTanimotoSimilarityCoefficient ($$$) { 932 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Beta) = @_; 933 my($Na, $Nb, $Nc, $TanimotoForSetBits, $TanimotoForClearBits, $Numerator, $Denominator, $WeightedTanimoto); 934 935 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { 936 croak "Error: ${ClassName}->WeightedTanimotoSimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; 937 } 938 939 # Get Tanimoto for set bits... 940 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 941 942 $Numerator = $Nc; 943 $Denominator = $Na + $Nb - $Nc; 944 $TanimotoForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; 945 946 # Get Tanimoto for clear bits... 947 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 948 949 $Numerator = $Nc; 950 $Denominator = $Na + $Nb - $Nc; 951 $TanimotoForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; 952 953 $WeightedTanimoto = $Beta*$TanimotoForSetBits + (1 - $Beta)*$TanimotoForClearBits; 954 955 return $WeightedTanimoto; 956 } 957 958 # Calculate WeightedTversky similarity coefficient for two same size bit vectors. 959 # 960 # This functionality can be either invoked as a class function or an object method. 961 # 962 sub WeightedTverskySimilarityCoefficient ($$$) { 963 my($FingerprintsBitVectorA, $FingerprintsBitVectorB, $Alpha, $Beta) = @_; 964 my($Na, $Nb, $Nc, $TverskyForSetBits, $TverskyForClearBits, $Numerator, $Denominator, $WeightedTversky); 965 966 if (!(defined($Alpha) && ($Alpha >= 0 && $Alpha <= 1))) { 967 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Alpha parameters must be defined and its value must be >=0 and <=1 ..."; 968 } 969 if (!(defined($Beta) && ($Beta >= 0 && $Beta <= 1))) { 970 croak "Error: ${ClassName}->WeightedTverskySimilarityCoefficient: Beta parameters must be defined and its value must be >=0 and <=1 ..."; 971 } 972 973 # Get Tversky for set bits... 974 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonSetBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 975 976 $Numerator = $Nc; 977 $Denominator = $Alpha*($Na - $Nb ) + $Nb; 978 $TverskyForSetBits = $Denominator ? ($Numerator/$Denominator) : 0; 979 980 # Get Tversky for clear bits... 981 ($Na, $Nb, $Nc) = _GetNumOfIndividualAndCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 982 983 $Numerator = $Nc; 984 $Denominator = $Alpha*($Na - $Nb ) + $Nb; 985 $TverskyForClearBits = $Denominator ? ($Numerator/$Denominator) : 0; 986 987 $WeightedTversky = $Beta*$TverskyForSetBits + (1 - $Beta)*$TverskyForClearBits; 988 989 return $WeightedTversky; 990 } 991 992 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... 993 # 994 sub _GetNumOfIndividualAndCommonSetBits ($$) { 995 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 996 my($Na, $Nb, $Nc, $Nd); 997 998 # Number of bits set to "1" in A 999 $Na = $FingerprintsBitVectorA->GetNumOfSetBits(); 1000 1001 # Number of bits set to "1" in B 1002 $Nb = $FingerprintsBitVectorB->GetNumOfSetBits(); 1003 1004 # Number of bits set to "1" in both A and B 1005 my($NcBitVector); 1006 $NcBitVector = $FingerprintsBitVectorA & $FingerprintsBitVectorB; 1007 $Nc = $NcBitVector->GetNumOfSetBits(); 1008 1009 return ($Na, $Nb, $Nc); 1010 } 1011 1012 # Get number of Nd bits in bit vector A and B to be used for similarity coefficient calculations... 1013 # 1014 sub _GetNumOfCommonClearBits ($$) { 1015 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 1016 my($Nd, $NdBitVector); 1017 1018 # Number of bits set to "0" in both A and B 1019 $NdBitVector = ~$FingerprintsBitVectorA & ~$FingerprintsBitVectorB; 1020 $Nd = $NdBitVector->GetNumOfSetBits(); 1021 1022 # Correct for number of clear bits used for padding... 1023 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { 1024 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); 1025 } 1026 elsif (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { 1027 $Nd = $Nd - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); 1028 } 1029 1030 return $Nd; 1031 } 1032 1033 # Get number of Na, Nb and Nc bits in bit vector A and B to be used for similarity coefficient calculations... 1034 # 1035 sub _GetNumOfIndividualAndCommonClearBits ($$) { 1036 my($FingerprintsBitVectorA, $FingerprintsBitVectorB) = @_; 1037 my($Na, $Nb, $Nc, $Nd); 1038 1039 # Number of bits set to "0" in A 1040 $Na = $FingerprintsBitVectorA->GetNumOfClearBits(); 1041 1042 # Correct for number of clear bits used for padding... 1043 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorA)) { 1044 $Na = $Na - _GetNumOfClearBitsCorrection($FingerprintsBitVectorA); 1045 } 1046 1047 # Number of bits set to "0" in B 1048 $Nb = $FingerprintsBitVectorB->GetNumOfClearBits(); 1049 1050 # Correct for number of clear bits used for padding... 1051 if (_IsNumOfClearBitsCorrectionRequired($FingerprintsBitVectorB)) { 1052 $Nb = $Nb - _GetNumOfClearBitsCorrection($FingerprintsBitVectorB); 1053 } 1054 1055 # Number of bits set to "0" in both A and B 1056 $Nc = _GetNumOfCommonClearBits($FingerprintsBitVectorA, $FingerprintsBitVectorB); 1057 1058 return ($Na, $Nb, $Nc); 1059 } 1060 1061 # Irrespective of specified size, Perl functions used to handle bit data data in 1062 # BitVector class automatically sets the size to the next nearest power of 2 1063 # and clear the extra bits. 1064 # 1065 # SpecifiedSize is used by this class to process any aribitray size during similarity 1066 # coefficient calculations. 1067 # 1068 # Assuming the FingerprintsBitBector class only manipulates bits upto specified 1069 # size, a correction for the extra bits added by BitVector class needs to be applied 1070 # to number of clear bits. 1071 # 1072 sub _GetNumOfClearBitsCorrection { 1073 my($FingerprintsBitVector) = @_; 1074 1075 return ($FingerprintsBitVector->{Size} - $FingerprintsBitVector->{SpecifiedSize}); 1076 } 1077 1078 # Is number of clear bits correction required? 1079 # 1080 sub _IsNumOfClearBitsCorrectionRequired { 1081 my($FingerprintsBitVector) = @_; 1082 1083 return ($FingerprintsBitVector->{Size} > $FingerprintsBitVector->{SpecifiedSize}) ? 1 : 0; 1084 } 1085 1086