1 package Fingerprints::TopologicalPharmacophoreAtomPairsFingerprints; 2 # 3 # File: TopologicalPharmacophoreAtomPairsFingerprints.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Fingerprints::Fingerprints; 30 use TextUtil (); 31 use MathUtil (); 32 use Molecule; 33 use AtomTypes::FunctionalClassAtomTypes; 34 35 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 36 37 @ISA = qw(Fingerprints::Fingerprints Exporter); 38 @EXPORT = qw(); 39 @EXPORT_OK = qw(); 40 41 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 42 43 # Setup class variables... 44 my($ClassName); 45 _InitializeClass(); 46 47 # Overload Perl functions... 48 use overload '""' => 'StringifyTopologicalPharmacophoreAtomPairsFingerprints'; 49 50 # Class constructor... 51 sub new { 52 my($Class, %NamesAndValues) = @_; 53 54 # Initialize object... 55 my $This = $Class->SUPER::new(); 56 bless $This, ref($Class) || $Class; 57 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprints(); 58 59 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties(%NamesAndValues); 60 61 return $This; 62 } 63 64 # Initialize object data... 65 # 66 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprints { 67 my($This) = @_; 68 69 # Type of fingerprint... 70 $This->{Type} = 'TopologicalPharmacophoreAtomPairs'; 71 72 # Type of vector... 73 $This->{VectorType} = 'FingerprintsVector'; 74 75 # AtomPairsSetSizeToUse... 76 # 77 # ArbitrarySize - Corrresponds to atom pairs with non-zero count 78 # FixedSize - Corresponds to all atom pairs with zero and non-zero count 79 # 80 # Possible values: ArbitrarySize or FixedSize. Default: ArbitrarySize 81 # 82 $This->{AtomPairsSetSizeToUse} = ''; 83 84 # Type of FingerprintsVector... 85 # 86 # OrderedNumericalValues - For ArbitrarySize value of AtomPairsSetSizeToUse 87 # NumericalValues - For FixedSize value of AtomPairsSetSizeToUse 88 # 89 # Possible values: OrderedNumericalValues or NumericalValues. Default: NumericalValues 90 # 91 $This->{FingerprintsVectorType} = ''; 92 93 # Vector values precision for real values which might be generated after 94 # normalization and fuzzification... 95 $This->{ValuesPrecision} = 2; 96 97 # Minimum and maximum bond distance between pharmacophore atom paris... 98 $This->{MinDistance} = 1; 99 $This->{MaxDistance} = 10; 100 101 # Initialize atom types and weight information... 102 $This->_InitializePharmacophoreAtomTypesAndWeightInformation(); 103 104 # Normalization methodology to use for scaling the occurance count of pharmacophore atom 105 # pairs at various distances. 106 # 107 # Possible values: None, ByHeavyAtomsCount, ByAtomTypesCount. Default: None 108 # 109 $This->{NormalizationMethodology} = 'None'; 110 111 # Initialize fuzzification parameters... 112 # 113 $This->_InitializeFuzzificationInformation(); 114 115 # Pharmacophore types assigned to each heavy atom... 116 # 117 %{$This->{AssignedAtomTypes}} = (); 118 119 # Assigned Atom types count of each type in the molecule... 120 # 121 %{$This->{AssignedAtomTypesCount}} = (); 122 123 # All pharmacophore atom pairs between minimum and maximum distance... 124 # 125 @{$This->{AtomPairsIDs}} = (); 126 %{$This->{AtomPairsCount}} = (); 127 } 128 129 # Inialize pharmacophore atom types and weight information... 130 # 131 sub _InitializePharmacophoreAtomTypesAndWeightInformation { 132 my($This) = @_; 133 134 # Default pharmacophore atom types to use for atom pairs fingerprint generation 135 # are: HBD, HBA, PI, NI, H 136 # 137 @{$This->{AtomTypesToUse}} = (); 138 @{$This->{AtomTypesToUse}} = sort ('HBD', 'HBA', 'PI', 'NI', 'H'); 139 140 # Weight of the various pharmacophore atom types to use for their contribution to atom 141 # pair interaction. It allows to increase the importance of specific pharmacophore atom 142 # types in the generted fingerprints. 143 # 144 # A value of 0 eliminates the contribution by a particular pharmacophore atom 145 # type and 2 doubles its contribution. 146 # 147 my($AtomType, %AvailableAtomTypes); 148 149 %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 150 151 %{$This->{AtomTypesWeight}} = (); 152 for $AtomType (keys %AvailableAtomTypes) { 153 $This->{AtomTypesWeight}{$AtomType} = 1; 154 } 155 return $This; 156 } 157 158 # Initialize fuzzification information... 159 # 160 sub _InitializeFuzzificationInformation { 161 my($This) = @_; 162 163 # To fuzz or not to fuzz atom pairs count. Default: No fuzzication 164 # 165 $This->{FuzzifyAtomPairsCount} = 0; 166 167 # When to fuzz atom pair count... 168 # 169 # Possible values: BeforeNormalization or AfterNormalization. Default: AfterNormalization 170 # 171 $This->{FuzzificationMode} = 'AfterNormalization'; 172 173 # How to fuzz atom pair count... 174 # 175 # Possible values: FuzzyBinning or FuzzyBinSmoothing. Default: FuzzyBinning 176 # 177 $This->{FuzzificationMethodology} = 'FuzzyBinning'; 178 179 # By how much to fuzz atom pairs count... 180 # 181 $This->{FuzzFactor} = 0.15; 182 183 return $This; 184 } 185 186 # Initialize class ... 187 sub _InitializeClass { 188 #Class name... 189 $ClassName = __PACKAGE__; 190 } 191 192 # Initialize object properties.... 193 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsProperties { 194 my($This, %NamesAndValues) = @_; 195 196 my($Name, $Value, $MethodName); 197 while (($Name, $Value) = each %NamesAndValues) { 198 $MethodName = "Set${Name}"; 199 $This->$MethodName($Value); 200 } 201 202 # Make sure molecule object was specified... 203 if (!exists $NamesAndValues{Molecule}) { 204 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 205 } 206 207 $This->_InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector(); 208 209 return $This; 210 } 211 212 # Initialize fingerprints vector... 213 # 214 sub _InitializeTopologicalPharmacophoreAtomPairsFingerprintsVector { 215 my($This) = @_; 216 217 if (!$This->{AtomPairsSetSizeToUse}) { 218 $This->{AtomPairsSetSizeToUse} = 'ArbitrarySize'; 219 } 220 221 # Vector type and type of values... 222 $This->{VectorType} = 'FingerprintsVector'; 223 224 if ($This->{AtomPairsSetSizeToUse} =~ /^FixedSize$/i) { 225 $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; 226 } 227 else { 228 $This->{FingerprintsVectorType} = 'NumericalValues'; 229 } 230 231 $This->_InitializeFingerprintsVector(); 232 } 233 234 # Set atom parits set size to use... 235 # 236 sub SetAtomPairsSetSizeToUse { 237 my($This, $Value) = @_; 238 239 if ($This->{AtomPairsSetSizeToUse}) { 240 croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Can't change size: It's already set..."; 241 } 242 243 if ($Value !~ /^(ArbitrarySize|FixedSize)$/i) { 244 croak "Error: ${ClassName}->SetAtomPairsSetSizeToUse: Unknown AtomPairsSetSizeToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; 245 } 246 247 $This->{AtomPairsSetSizeToUse} = $Value; 248 249 return $This; 250 } 251 252 # Disable change of AvailableAtomTypes... 253 # 254 sub SetAvailableAtomTypes { 255 my($This) = @_; 256 257 carp "Warning: ${ClassName}->SetAvailableAtomTypes: AvailableAtomTypes value can't be set..."; 258 259 return $This; 260 } 261 262 # Set atom types to use for atom pairs... 263 # 264 sub SetAtomTypesToUse { 265 my($This, @Values) = @_; 266 my($FirstValue, $TypeOfFirstValue, $AtomType, $SpecifiedAtomType, @SpecifiedAtomTypes, @AtomTypesToUse); 267 268 if (!@Values) { 269 carp "Warning: ${ClassName}->SetAtomTypesToUse: No values specified..."; 270 return; 271 } 272 273 $FirstValue = $Values[0]; 274 $TypeOfFirstValue = ref $FirstValue; 275 276 @SpecifiedAtomTypes = (); 277 @AtomTypesToUse = (); 278 279 if ($TypeOfFirstValue =~ /^ARRAY/) { 280 push @SpecifiedAtomTypes, @{$FirstValue}; 281 } 282 else { 283 push @SpecifiedAtomTypes, @Values; 284 } 285 286 # Make sure specified AtomTypes are valid... 287 for $SpecifiedAtomType (@SpecifiedAtomTypes) { 288 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedAtomType)) { 289 croak "Error: ${ClassName}->SetAtomTypesToUse: Specified atom type, $SpecifiedAtomType, is not supported...\n "; 290 } 291 $AtomType = $SpecifiedAtomType; 292 push @AtomTypesToUse, $AtomType; 293 } 294 295 # Set atom types to use... 296 @{$This->{AtomTypesToUse}} = (); 297 push @{$This->{AtomTypesToUse}}, sort @AtomTypesToUse; 298 299 return $This; 300 } 301 302 # Set vector values precision for real values which might be generated after 303 # normalization and fuzzification... 304 # 305 sub SetValuesPrecision { 306 my($This, $Value) = @_; 307 308 if (!TextUtil::IsPositiveInteger($Value)) { 309 croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid: It must be a positive integer..."; 310 } 311 $This->{ValuesPrecision} = $Value; 312 313 return $This; 314 } 315 316 # Set minimum distance for pharmacophore atom pairs... 317 # 318 sub SetMinDistance { 319 my($This, $Value) = @_; 320 321 if (!TextUtil::IsInteger($Value)) { 322 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be an integer..."; 323 } 324 $This->{MinDistance} = $Value; 325 326 return $This; 327 } 328 329 # Set maximum distance for pharmacophore atom pairs... 330 # 331 sub SetMaxDistance { 332 my($This, $Value) = @_; 333 334 if (!TextUtil::IsPositiveInteger($Value)) { 335 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; 336 } 337 $This->{MaxDistance} = $Value; 338 339 return $This; 340 } 341 342 # Set normalization methodology to use for scaling the occurance count of pharmacophore atom 343 # pairs over distance range beween minimum and maximum distance. 344 # 345 sub SetNormalizationMethodology { 346 my($This, $Value) = @_; 347 348 if ($Value !~ /^(ByHeavyAtomsCount|ByAtomTypesCount|None)$/i) { 349 croak "Error: ${ClassName}->SetNormalizationMethodology: NormalizationMethodology value, $Value, is not valid. Supported values: None, ByHeavyAtomsCount or ByAtomTypesCount..."; 350 } 351 352 $This->{NormalizationMethodology} = $Value; 353 354 return $This; 355 } 356 357 # Set weight of the various pharmacophore atom types to use for their contribution to atom 358 # pair interaction using atom types label and value hash. 359 # 360 # It allows to increase the importance of specific pharmacophore atom 361 # types in the generted fingerprints. 362 # 363 # A value of 0 eliminates the contribution by a particular pharmacophore atom 364 # type and 2 doubles its contribution. 365 # 366 sub SetAtomTypesWeight { 367 my($This, %AtomTypesWeight) = @_; 368 my($AtomType, $Weight); 369 370 while (($AtomType, $Weight) = each %AtomTypesWeight) { 371 if (!exists $This->{AtomTypesWeight}{$AtomType}) { 372 croak "Error: ${ClassName}->SetAtomTypesWeight: AtomTypeWeight for $AtomType couldn't be set: Unknown atom type..."; 373 } 374 if (!(TextUtil::IsFloat($Weight) && ($Weight >= 0))) { 375 croak "Error: ${ClassName}->SetAtomTypesWeight: Specified weight value, $Weight, for AtomType, $AtomType, muts be >= 0..."; 376 } 377 $This->{AtomTypesWeight}{$AtomType} = $Weight; 378 } 379 } 380 381 # Set fuzzification methodology to use for fuzzifying atom pairs count... 382 # 383 sub SetFuzzificationMethodology { 384 my($This, $Value) = @_; 385 386 if ($Value !~ /^(FuzzyBinning|FuzzyBinSmoothing)$/i) { 387 croak "Error: ${ClassName}->SetFuzzificationMethodology: FuzzificationMethodology value, $Value, is not valid. Supported values: FuzzyBinning or FuzzyBinSmoothing..."; 388 } 389 390 $This->{FuzzificationMethodology} = $Value; 391 392 return $This; 393 } 394 395 # Set fuzzification mode for fuzzifying atom pairs count... 396 # 397 sub SetFuzzificationMode { 398 my($This, $Value) = @_; 399 400 if ($Value !~ /^(BeforeNormalization|AfterNormalization)$/i) { 401 croak "Error: ${ClassName}->SetFuzzificationMode: FuzzificationMode value, $Value, is not valid. Supported values: BeforeNormalization or AfterNormalization..."; 402 } 403 404 $This->{FuzzificationMode} = $Value; 405 406 return $This; 407 } 408 409 # Set fuzz factor values used for fuzzifying atom pairs count... 410 # 411 sub SetFuzzFactor { 412 my($This, $Value) = @_; 413 414 if ($This->{FuzzificationMethodology} =~ /^FuzzyBinning$/i) { 415 if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 1.0)) { 416 croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 1..."; 417 } 418 } 419 elsif ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) { 420 if (!(TextUtil::IsFloat($Value) && $Value >=0 && $Value <= 0.5)) { 421 croak "Error: ${ClassName}->SetFuzzFactor: Specified fuzz factor value, $Value, must be >= 0 and <= 0.5..."; 422 } 423 } 424 else { 425 croak "Error: ${ClassName}->SetFuzzFactor: Fuzz factor value can't be changed: Uknown FuzzificationMethodology: $This->{FuzzificationMethodology}..."; 426 } 427 428 $This->{FuzzFactor} = $Value; 429 430 return $This; 431 } 432 433 # Generate fingerprints description... 434 # 435 sub GetDescription { 436 my($This) = @_; 437 438 # Is description explicity set? 439 if (exists $This->{Description}) { 440 return $This->{Description}; 441 } 442 443 # Generate fingerprints description... 444 445 return "$This->{Type}:$This->{AtomPairsSetSizeToUse}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; 446 } 447 448 # Generate topological pharmacophore atom pairs [ Ref 60-62, Ref 65, Ref 68 ] fingerprints... 449 # 450 # Methodology: 451 # . Generate a distance matrix. 452 # . Assign pharmacophore atom types to all the atoms. 453 # . Initialize pharmacophore atom pairs basis set for all unique pairs between 454 # minimum and maximum distance. 455 # . Using distance matrix and pharmacophore atom types, count occurance of 456 # unique atom pairs between specified distance range - It corresponds to the 457 # correlation-vector for the atom pairs. 458 # . Weigh contribution of each atom type to atom pair interaction by its specified 459 # weight during occurance count. 460 # . Assign count to appropriate distance bin for a specific atom pair 461 # 462 # . Normalize occurance count of pharmacophore atom pairs by heavy atom count 463 # or sum of AtomTypeCounts of each pharmacophore atom type in the atom pair 464 # at a specific distance. 465 # 466 # . Fuzzify occurance count of pharmacophore atom pairs using FuzzyBinning or 467 # FuzzySmothing methodology. 468 # 469 # Notes: 470 # . Hydrogen atoms are ignored during the fingerprint generation. 471 # 472 sub GenerateFingerprints { 473 my($This) = @_; 474 475 if ($This->{MinDistance} > $This->{MaxDistance}) { 476 croak "Error: ${ClassName}->GenerateTopologicalPharmacophoreAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; 477 } 478 479 # Cache appropriate molecule data... 480 $This->_SetupMoleculeDataCache(); 481 482 # Generate distance matrix... 483 if (!$This->_SetupDistanceMatrix()) { 484 carp "Warning: ${ClassName}->GenerateFingerprints: Fingerprints generation didn't succeed: Couldn't generate distance matrix..."; 485 return $This; 486 } 487 488 # Assign pharmacohore atom types to all heavy atoms... 489 $This->_AssignPharmacophoreAtomTypes(); 490 491 # Initialize values of all possible pharmacohore atom pairs... 492 $This->_InitializePharmacophoreAtomPairs(); 493 494 # Count atom pairs... 495 $This->_CountPharmacohoreAtomPairs(); 496 497 # Fuzzify atom pairs count... 498 if ($This->{FuzzificationMode} =~ /^BeforeNormalization$/i) { 499 $This->_FuzzifyPharmacohoreAtomPairsCount(); 500 } 501 502 # Normalize atom pairs count... 503 $This->_NormalizePharmacohoreAtomPairsCount(); 504 505 # Fuzzify atom pairs count... 506 if ($This->{FuzzificationMode} =~ /^AfterNormalization$/i) { 507 $This->_FuzzifyPharmacohoreAtomPairsCount(); 508 } 509 510 # Set final fingerprints... 511 $This->_SetFinalFingerprints(); 512 513 # Clear cached molecule data... 514 $This->_ClearMoleculeDataCache(); 515 516 return $This; 517 } 518 519 # Setup distance matrix... 520 # 521 sub _SetupDistanceMatrix { 522 my($This) = @_; 523 524 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); 525 526 if (!$This->{DistanceMatrix}) { 527 return undef; 528 } 529 530 return $This; 531 } 532 533 # Assign pharmacohore atom types to all heavy atoms and count each atom 534 # types assigned... 535 # 536 sub _AssignPharmacophoreAtomTypes { 537 my($This) = @_; 538 my($Atom, $AtomID, $AtomType, $AssignedAtomType, $FunctionalClassAtomTypes); 539 540 # Assign topological pharmacophore atom types... 541 $FunctionalClassAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1, 'FunctionalClassesToUse' => $This->{AtomTypesToUse}); 542 $FunctionalClassAtomTypes->AssignAtomTypes(); 543 544 %{$This->{AssignedAtomTypes}} = (); 545 546 # Initialize assigned atom types count... 547 %{$This->{AssignedAtomTypesCount}} = (); 548 for $AtomType (@{$This->{AtomTypesToUse}}) { 549 $This->{AssignedAtomTypesCount}{$AtomType} = 0; 550 } 551 552 $This->{HeavyAtomCount} = 0; 553 554 ATOM: for $Atom (@{$This->{Atoms}}) { 555 if ($Atom->IsHydrogen()) { 556 next ATOM; 557 } 558 $This->{HeavyAtomCount} += 1; 559 560 $AtomID = $Atom->GetID(); 561 562 # Collect all possible pharmacophore atom types which could be assigned to atom... 563 my(@AtomTypes); 564 565 @AtomTypes = (); 566 $AssignedAtomType = $FunctionalClassAtomTypes->GetAtomType($Atom); 567 if ($AssignedAtomType && $AssignedAtomType !~ /^None$/i) { 568 push @AtomTypes, split /\./, $AssignedAtomType; 569 for $AtomType (@AtomTypes) { 570 $This->{AssignedAtomTypesCount}{$AtomType} += 1; 571 } 572 } 573 574 # Assign phramacophore types to atom... 575 $AtomID = $Atom->GetID(); 576 $This->{AssignedAtomTypes}{$AtomID} = \@AtomTypes; 577 } 578 return $This; 579 } 580 581 # Initialize values of all possible pharmacohore atom pairs... 582 # 583 # Let: 584 # Dmin = Minimum distance correspoding to number of bonds between two atoms 585 # Dmax = Maximum distance correspoding to number of bonds between two atoms 586 # D = Distance correspoding to number of bonds between two atoms 587 # 588 # P = Number of pharmacophore atom types to consider 589 # PPDn = Number of possible unique pharmacophore atom pairs at a distance Dn 590 # 591 # PPT = Total number of possible pharmacophore atom pairs at all distances between Dmin and Dmax 592 # 593 # Then: 594 # 595 # PPD = (P * (P - 1))/2 + P 596 # 597 # PPT = ((Dmax - Dmin) + 1) * ((P * (P - 1))/2 + P) 598 # = ((Dmax - Dmin) + 1) * PPD 599 # 600 # 601 # So for default values of Dmin = 1, Dmax = 10 and P = 5, 602 # 603 # PPD = (5 * (5 - 1))/2 + 5 = 15 604 # PPT = ((10 - 1) + 1) * 15 = 150 605 # 606 # the pharmacophore atom pairs bais set includes 150 values. 607 # 608 sub _InitializePharmacophoreAtomPairs { 609 my($This) = @_; 610 my($Distance, $Index1, $Index2, $AtomType1, $AtomType2); 611 612 %{$This->{AtomPairsCount}} = (); 613 614 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 615 %{$This->{AtomPairsCount}{$Distance}} = (); 616 617 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) { 618 $AtomType1 = $This->{AtomTypesToUse}[$Index1]; 619 %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = (); 620 621 for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) { 622 $AtomType2 = $This->{AtomTypesToUse}[$Index2]; 623 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 0; 624 } 625 } 626 } 627 return $This; 628 } 629 630 # Count pharmacophore atom pairs between mininum and maximum distance at each 631 # distance using distance matrix and pharmacophore atom types assiged to each heavy 632 # atom. 633 # 634 # Let: 635 # Px = Pharmacophore atom type x 636 # Py = Pharmacophore atom type y 637 # Dn = Distance between Px and Py in specified distance range 638 # 639 # Then: 640 # Px-Dn-Py = Pharmacophore atom pair ID for atom types Px and Py at distance Dn 641 # 642 # For example: H-D1-H, H-D2-HBA, PI-D5-PI and so on 643 # 644 # Notes: 645 # . The row and column indices of distance matrix correspond to atom indices. 646 # . Distance value of BigNumber implies the atom is not connected to any other atom. 647 # . Due to symmetric nature of distance matrix, only upper or lower triangular matrix 648 # needs to be processed during identification and count of pharmacophore atom pairs. 649 # 650 sub _CountPharmacohoreAtomPairs { 651 my($This) = @_; 652 my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement); 653 654 $DistanceMatrix = $This->{DistanceMatrix}; 655 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize(); 656 $SkipIndexCheck = 0; 657 658 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) { 659 $AtomID1 = $This->{AtomIndexToID}{$RowIndex}; 660 if ( !((exists($This->{AssignedAtomTypes}{$AtomID1}) && @{$This->{AssignedAtomTypes}{$AtomID1}})) ) { 661 next ROWINDEX; 662 } 663 664 COLINDEX: for $ColIndex ($RowIndex .. ($NumOfCols - 1) ) { 665 $AtomID2 = $This->{AtomIndexToID}{$ColIndex}; 666 if ( !((exists($This->{AssignedAtomTypes}{$AtomID2}) && @{$This->{AssignedAtomTypes}{$AtomID2}})) ) { 667 next COLINDEX; 668 } 669 670 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck); 671 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) { 672 next COLINDEX; 673 } 674 675 ATOMTYPE1: for $AtomType1 (@{$This->{AssignedAtomTypes}{$AtomID1}}) { 676 if ($This->{AtomTypesWeight}{$AtomType1} == 0) { 677 next ATOMTYPE1; 678 } 679 ATOMTYPE2: for $AtomType2 (@{$This->{AssignedAtomTypes}{$AtomID2}}) { 680 if ($This->{AtomTypesWeight}{$AtomType2} == 0) { 681 next ATOMTYPE2; 682 } 683 $CountIncrement = $This->{AtomTypesWeight}{$AtomType1} * $This->{AtomTypesWeight}{$AtomType2}; 684 if ($AtomType1 le $AtomType2) { 685 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += $CountIncrement; 686 } 687 else { 688 $This->{AtomPairsCount}{$Distance}{$AtomType2}{$AtomType1} += $CountIncrement; 689 } 690 } 691 } 692 } 693 } 694 return $This; 695 } 696 697 # Normalize the occurance count of pharmacophore atom pairs over the specified distance 698 # range... 699 # 700 sub _NormalizePharmacohoreAtomPairsCount { 701 my($This) = @_; 702 703 METHODOLOGY: { 704 if ($This->{NormalizationMethodology} =~ /^None$/i) { 705 last METHODOLOGY; 706 } 707 if ($This->{NormalizationMethodology} =~ /^ByHeavyAtomsCount$/i) { 708 $This->_NormalizeAtomPairsCountByHeavyAtomsCount(); 709 last METHODOLOGY; 710 } 711 if ($This->{NormalizationMethodology} =~ /^ByAtomTypesCount$/i) { 712 $This->_NormalizeAtomPairsCountByAtomTypesCount(); 713 last METHODOLOGY; 714 } 715 croak "Error: ${ClassName}->_NormalizePharmacohoreAtomPairsCount: Unknown NormalizationMethodology: $This->{NormalizationMethodology}..."; 716 } 717 return $This; 718 } 719 720 721 # Normalize the occurance count of pharmacophore atom pairs at various distances by 722 # heavy atom count... 723 # 724 sub _NormalizeAtomPairsCountByHeavyAtomsCount { 725 my($This) = @_; 726 my($Distance, $AtomType1, $AtomType2); 727 728 if ($This->{HeavyAtomCount} == 0) { 729 return $This; 730 } 731 732 for $Distance (keys %{$This->{AtomPairsCount}} ) { 733 for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) { 734 ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) { 735 if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) { 736 next ATOMTYPE2; 737 } 738 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $This->{HeavyAtomCount}; 739 } 740 } 741 } 742 return $This; 743 } 744 745 # Normalize the occurance count of pharmacophore atom pairs at various distances by 746 # dividing it using sum of the count of each pharmacophore atom type present in the 747 # molecule for the corresponding atom pair. 748 # 749 sub _NormalizeAtomPairsCountByAtomTypesCount { 750 my($This) = @_; 751 my($Distance, $AtomType1, $AtomType2, $AtomType1Count, $AtomType2Count, $NormalizationFactor); 752 753 for $Distance (keys %{$This->{AtomPairsCount}} ) { 754 for $AtomType1 (keys %{$This->{AtomPairsCount}{$Distance}} ) { 755 ATOMTYPE2: for $AtomType2 (keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) { 756 if ($This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} == 0) { 757 next ATOMTYPE2; 758 } 759 $NormalizationFactor = $This->{AssignedAtomTypesCount}{$AtomType1} + $This->{AssignedAtomTypesCount}{$AtomType2}; 760 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} /= $NormalizationFactor; 761 } 762 } 763 } 764 return $This; 765 } 766 767 # Fuzzify pharmacophore atom pairs count... 768 # 769 # Let: 770 # Px = Pharmacophore atom type x 771 # Py = Pharmacophore atom type y 772 # 773 # PPxy = Pharmacophore atom pair between atom type Px and Py 774 # 775 # PPxyDn = Pharmacophore atom pairs count between atom type Px and Py at distance Dn 776 # PPxyDn-1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn - 1 777 # PPxyDn+1 = Pharmacophore atom pairs count between atom type Px and Py at distance Dn + 1 778 # 779 # FF = FuzzFactor for FuzzyBinning and FuzzyBinSmoothing 780 # 781 # Then: 782 # 783 # For FuzzyBinning: 784 # 785 # PPxyDn = PPxyDn (Unchanged) 786 # 787 # PPxyDn-1 = PPxyDn-1 + PPxyDn * FF 788 # PPxyDn+1 = PPxyDn+1 + PPxyDn * FF 789 # 790 # For FuzzyBinSmoothing: 791 # 792 # PPxyDn = PPxyDn - PPxyDn * 2FF for Dmin < Dn < Dmax 793 # PPxyDn = PPxyDn - PPxyDn * FF for Dn = Dmin or Dmax 794 # 795 # PPxyDn-1 = PPxyDn-1 + PPxyDn * FF 796 # PPxyDn+1 = PPxyDn+1 + PPxyDn * FF 797 # 798 # In both fuzzification schemes, a value of 0 for FF implies no fuzzification of occurance counts. 799 # A value of 1 during FuzzyBinning corresponds to maximum fuzzification of occurance counts; 800 # however, a value of 1 during FuzzyBinSmoothing ends up completely distributing the value over 801 # the previous and next distance bins. 802 # 803 # So for default value of FuzzFactor (FF) 0.15, the occurance count of pharmacohore atom pairs 804 # at distance Dn during FuzzyBinning is left unchanged and the counts at distances Dn -1 and Dn + 1 805 # are incremened by PPxyDn * 0.15. 806 # 807 # And during FuzzyBinSmoothing the occurance counts at Distance Dn is scaled back using multiplicate 808 # factor of (1 - 2*0.15) and the occurance counts at distances Dn -1 and Dn + 1 are incremened by 809 # PPxyDn * 0.15. In otherwords, occurance bin count is smoothed out by distributing it over the 810 # previous and next distance value. 811 # 812 sub _FuzzifyPharmacohoreAtomPairsCount { 813 my($This) = @_; 814 my($Index1, $Index2, $AtomType1, $AtomType2, $CurrentDistance, $CurrentCount, $NextDistance, $NextCount, $PreviousDistance, $ModifyCurrentCount, $ChangeInCountValue); 815 816 if (!($This->{FuzzifyAtomPairsCount} && $This->{FuzzFactor} > 0)) { 817 return $This; 818 } 819 820 $ModifyCurrentCount = ($This->{FuzzificationMethodology} =~ /^FuzzyBinSmoothing$/i) ? 1 : 0; 821 822 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) { 823 $AtomType1 = $This->{AtomTypesToUse}[$Index1]; 824 for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) { 825 $AtomType2 = $This->{AtomTypesToUse}[$Index2]; 826 827 $CurrentCount = 0; $NextCount = 0; 828 829 $NextDistance = $This->{MinDistance}; 830 $NextCount = $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2}; 831 832 DISTANCE: for $CurrentDistance ($This->{MinDistance} .. $This->{MaxDistance}) { 833 $NextDistance = $CurrentDistance + 1; 834 $PreviousDistance = $CurrentDistance - 1; 835 836 $CurrentCount = $NextCount; 837 $NextCount = ($CurrentDistance < $This->{MaxDistance}) ? $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} : 0; 838 839 if ($CurrentCount == 0) { 840 # No contribution to fuzzy binning from this distance... 841 next DISTANCE; 842 } 843 844 $ChangeInCountValue = $CurrentCount * $This->{FuzzFactor}; 845 846 if ($CurrentDistance > $This->{MinDistance}) { 847 # Increment count at previous distance... 848 $This->{AtomPairsCount}{$PreviousDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue; 849 } 850 851 if ($ModifyCurrentCount) { 852 # Decrement count at current distance for FuzzyBinSmoothing... 853 if ($CurrentDistance > $This->{MinDistance} && $CurrentDistance < $This->{MaxDistance}) { 854 $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= 2 * $ChangeInCountValue; 855 } 856 else { 857 $This->{AtomPairsCount}{$CurrentDistance}{$AtomType1}{$AtomType2} -= $ChangeInCountValue; 858 } 859 } 860 861 if ($CurrentDistance < $This->{MaxDistance}) { 862 # Increment count at next distance... 863 $This->{AtomPairsCount}{$NextDistance}{$AtomType1}{$AtomType2} += $ChangeInCountValue; 864 } 865 } 866 } 867 } 868 return $This; 869 } 870 871 # Set final fingerpritns vector... 872 # 873 sub _SetFinalFingerprints { 874 my($This) = @_; 875 my($Distance, $Index1, $Index2, $AtomType1, $AtomType2, $Value, $RoundOffValues, $ValuesPrecision, $UseArbitrarySetSize, @Values); 876 877 # Mark successful generation of fingerprints... 878 $This->{FingerprintsGenerated} = 1; 879 880 @Values = (); 881 @{$This->{AtomPairsIDs}} = (); 882 883 # Do values need to be rounded off? 884 $RoundOffValues = (($This->{NormalizationMethodology} !~ /^None$/i) || ($This->{FuzzifyAtomPairsCount})) ? 1 : 0; 885 $ValuesPrecision = $This->{ValuesPrecision}; 886 887 # Is it an ArbitraySize atom pairs set size? 888 $UseArbitrarySetSize = $This->{AtomPairsSetSizeToUse} =~ /^ArbitrarySize$/i ? 1 : 0; 889 890 # Collect all atom paris count values... 891 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 892 for $Index1 (0 .. $#{$This->{AtomTypesToUse}}) { 893 $AtomType1 = $This->{AtomTypesToUse}[$Index1]; 894 INDEX2: for $Index2 ($Index1 .. $#{$This->{AtomTypesToUse}}) { 895 $AtomType2 = $This->{AtomTypesToUse}[$Index2]; 896 897 # Atom pair count... 898 $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}; 899 if ($RoundOffValues) { 900 $Value = MathUtil::round($Value, $This->{ValuesPrecision}) + 0; 901 } 902 903 # Ignore or not to ignore... 904 if ($UseArbitrarySetSize && $Value == 0) { 905 next INDEX2; 906 } 907 908 push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}"; 909 push @Values, $Value; 910 } 911 } 912 } 913 914 # Add AtomPairsIDs and count values to fingerprint vector... 915 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}}); 916 $This->{FingerprintsVector}->AddValues(\@Values); 917 918 return $This; 919 } 920 921 # Get pharmacophore atom pair IDs corresponding to atom pairs count values in 922 # fingerprint vector as an array or reference to an array... 923 # 924 # AtomPairIDs list is generated during finalization of fingerprints and the fingerprint 925 # vector containing count values matches the atom pairs array. 926 # 927 # 928 sub GetAtomPairIDs { 929 my($This) = @_; 930 931 return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}}; 932 } 933 934 # Cache appropriate molecule data... 935 # 936 sub _SetupMoleculeDataCache { 937 my($This) = @_; 938 939 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for 940 # usage of distance matrix. The hydrogen atoms are ignored during processing... 941 # 942 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 943 944 # Get all atom IDs... 945 my(@AtomIDs); 946 @AtomIDs = (); 947 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; 948 949 # Set AtomIndex to AtomID hash... 950 %{$This->{AtomIndexToID}} = (); 951 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; 952 953 return $This; 954 } 955 956 # Clear cached molecule data... 957 # 958 sub _ClearMoleculeDataCache { 959 my($This) = @_; 960 961 @{$This->{Atoms}} = (); 962 963 return $This; 964 } 965 966 967 # Return a string containg data for TopologicalPharmacophoreAtomPairsFingerprints object... 968 sub StringifyTopologicalPharmacophoreAtomPairsFingerprints { 969 my($This) = @_; 970 my($FingerprintsString); 971 972 # Type of fingerprint... 973 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomPairsSetSizeToUse: $This->{AtomPairsSetSizeToUse}"; 974 975 # Min and max distance... 976 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}"; 977 978 # Pharmacophore type labels and description... 979 my($AtomType, @AtomTypes, @AtomTypesOrder, %AvailableAtomTypes); 980 981 @AtomTypesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 982 %AvailableAtomTypes = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 983 984 @AtomTypes = (); 985 for $AtomType (@AtomTypesOrder) { 986 push @AtomTypes, "$AtomType: $AvailableAtomTypes{$AtomType}"; 987 } 988 989 $FingerprintsString .= "; AtomTypesToUse: <" . TextUtil::JoinWords(\@{$This->{AtomTypesToUse}}, ", ", 0) . ">"; 990 $FingerprintsString .= "; AtomTypesOrder: <" . TextUtil::JoinWords(\@AtomTypesOrder, ", ", 0) . ">"; 991 $FingerprintsString .= "; AvailableAtomTypes: <" . TextUtil::JoinWords(\@AtomTypes, ", ", 0) . ">"; 992 993 # Normalization method... 994 $FingerprintsString .= "; NormalizationMethodology: $This->{NormalizationMethodology}"; 995 996 # Weights... 997 my($FirstLabel, $Label, $Weight); 998 999 $FingerprintsString .= "; AtomTypesWeight <Labels: Weight>: <"; 1000 $FirstLabel = 1; 1001 for $Label (sort @{$This->{AtomTypesToUse}}) { 1002 $Weight = $This->{AtomTypesWeight}{$Label}; 1003 if ($FirstLabel) { 1004 $FirstLabel = 0; 1005 $FingerprintsString .= " ${Label}: ${Weight}"; 1006 } 1007 else { 1008 $FingerprintsString .= "; ${Label}: ${Weight}"; 1009 } 1010 } 1011 $FingerprintsString .= ">"; 1012 1013 # Fuzzification of count... 1014 my($FuzzifyFlag); 1015 $FuzzifyFlag = $This->{FuzzifyAtomPairsCount} ? "Yes" : "No"; 1016 $FingerprintsString .= "; FuzzifyAtomPairsCount: $FuzzifyFlag; FuzzificationMode: $This->{FuzzificationMode}; FuzzificationMethodology: $This->{FuzzificationMethodology}; FuzzFactor: $This->{FuzzFactor}"; 1017 1018 # Total number of pharmacophore atom pairs... 1019 $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues(); 1020 1021 # FingerprintsVector... 1022 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 1023 1024 return $FingerprintsString; 1025 } 1026