1 package Fingerprints::TopologicalAtomPairsFingerprints; 2 # 3 # File: TopologicalAtomPairsFingerprints.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Fingerprints::Fingerprints; 30 use TextUtil (); 31 use Molecule; 32 use AtomTypes::AtomicInvariantsAtomTypes; 33 use AtomTypes::DREIDINGAtomTypes; 34 use AtomTypes::EStateAtomTypes; 35 use AtomTypes::FunctionalClassAtomTypes; 36 use AtomTypes::MMFF94AtomTypes; 37 use AtomTypes::SLogPAtomTypes; 38 use AtomTypes::SYBYLAtomTypes; 39 use AtomTypes::TPSAAtomTypes; 40 use AtomTypes::UFFAtomTypes; 41 42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 43 44 @ISA = qw(Fingerprints::Fingerprints Exporter); 45 @EXPORT = qw(); 46 @EXPORT_OK = qw(); 47 48 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 49 50 # Setup class variables... 51 my($ClassName); 52 _InitializeClass(); 53 54 # Overload Perl functions... 55 use overload '""' => 'StringifyTopologicalAtomPairsFingerprints'; 56 57 # Class constructor... 58 sub new { 59 my($Class, %NamesAndValues) = @_; 60 61 # Initialize object... 62 my $This = $Class->SUPER::new(); 63 bless $This, ref($Class) || $Class; 64 $This->_InitializeTopologicalAtomPairsFingerprints(); 65 66 $This->_InitializeTopologicalAtomPairsFingerprintsProperties(%NamesAndValues); 67 68 return $This; 69 } 70 71 # Initialize object data... 72 # 73 sub _InitializeTopologicalAtomPairsFingerprints { 74 my($This) = @_; 75 76 # Type of fingerprint... 77 $This->{Type} = 'TopologicalAtomPairs'; 78 79 # Type of vector... 80 $This->{VectorType} = 'FingerprintsVector'; 81 82 # Type of FingerprintsVector... 83 $This->{FingerprintsVectorType} = 'NumericalValues'; 84 85 # Minimum and maximum bond distance between atom paris... 86 $This->{MinDistance} = 1; 87 $This->{MaxDistance} = 10; 88 89 # Atom identifier type to use for atom IDs in atom pairs... 90 # 91 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 92 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 93 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 94 # 95 $This->{AtomIdentifierType} = ''; 96 97 # Atom types assigned to each heavy atom... 98 # 99 %{$This->{AssignedAtomTypes}} = (); 100 101 # All atom pairs between minimum and maximum distance... 102 # 103 @{$This->{AtomPairsIDs}} = (); 104 %{$This->{AtomPairsCount}} = (); 105 } 106 107 # Initialize class ... 108 sub _InitializeClass { 109 #Class name... 110 $ClassName = __PACKAGE__; 111 } 112 113 # Initialize object properties.... 114 sub _InitializeTopologicalAtomPairsFingerprintsProperties { 115 my($This, %NamesAndValues) = @_; 116 117 my($Name, $Value, $MethodName); 118 while (($Name, $Value) = each %NamesAndValues) { 119 $MethodName = "Set${Name}"; 120 $This->$MethodName($Value); 121 } 122 123 # Make sure molecule object was specified... 124 if (!exists $NamesAndValues{Molecule}) { 125 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 126 } 127 if (!exists $NamesAndValues{AtomIdentifierType}) { 128 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 129 } 130 131 $This->_InitializeFingerprintsVector(); 132 133 return $This; 134 } 135 136 # Set minimum distance for atom pairs... 137 # 138 sub SetMinDistance { 139 my($This, $Value) = @_; 140 141 if (!TextUtil::IsPositiveInteger($Value)) { 142 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; 143 } 144 $This->{MinDistance} = $Value; 145 146 return $This; 147 } 148 149 # Set maximum distance for atom pairs... 150 # 151 sub SetMaxDistance { 152 my($This, $Value) = @_; 153 154 if (!TextUtil::IsPositiveInteger($Value)) { 155 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; 156 } 157 $This->{MaxDistance} = $Value; 158 159 return $This; 160 } 161 162 # Set atom identifier type.. 163 # 164 sub SetAtomIdentifierType { 165 my($This, $IdentifierType) = @_; 166 167 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 168 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 169 } 170 171 if ($This->{AtomIdentifierType}) { 172 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 173 } 174 175 $This->{AtomIdentifierType} = $IdentifierType; 176 177 # Initialize atom identifier type information... 178 $This->_InitializeAtomIdentifierTypeInformation(); 179 180 return $This; 181 } 182 183 # Generate fingerprints description... 184 # 185 sub GetDescription { 186 my($This) = @_; 187 188 # Is description explicity set? 189 if (exists $This->{Description}) { 190 return $This->{Description}; 191 } 192 193 # Generate fingerprints description... 194 195 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; 196 } 197 198 # Generate topological atom pairs [ Ref 57, Ref 59, Ref 72 ] fingerprints... 199 # 200 # Methodology: 201 # . Generate a distance matrix. 202 # . Assign atom types to all the atoms. 203 # . Using distance matrix and atom types, count occurrence of 204 # unique atom pairs within specified distance range - It corresponds to the 205 # correlation-vector for the atom pairs. 206 # 207 # Notes: 208 # . Hydrogen atoms are ignored during the fingerprint generation. 209 # 210 sub GenerateFingerprints { 211 my($This) = @_; 212 213 if ($This->{MinDistance} > $This->{MaxDistance}) { 214 croak "Error: ${ClassName}->GenerateTopologicalAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; 215 } 216 217 # Cache appropriate molecule data... 218 $This->_SetupMoleculeDataCache(); 219 220 # Generate distance matrix... 221 if (!$This->_SetupDistanceMatrix()) { 222 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix..."; 223 return $This; 224 } 225 226 # Assign atom types to all heavy atoms... 227 if (!$This->_AssignAtomTypes()) { 228 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 229 return $This; 230 } 231 232 # Intialize values of toplogical atom pairs... 233 $This->_InitializeToplogicalAtomPairs(); 234 235 # Count atom pairs... 236 $This->_GenerateAndCountAtomPairs(); 237 238 # Set final fingerprints... 239 $This->_SetFinalFingerprints(); 240 241 # Clear cached molecule data... 242 $This->_ClearMoleculeDataCache(); 243 244 return $This; 245 } 246 247 # Setup distance matrix... 248 # 249 sub _SetupDistanceMatrix { 250 my($This) = @_; 251 252 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); 253 254 if (!$This->{DistanceMatrix}) { 255 return undef; 256 } 257 258 return $This; 259 } 260 261 # Assign appropriate atom types to all heavy atoms... 262 # 263 sub _AssignAtomTypes { 264 my($This) = @_; 265 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); 266 267 %{$This->{AssignedAtomTypes}} = (); 268 $IgnoreHydrogens = 1; 269 270 $SpecifiedAtomTypes = undef; 271 272 IDENTIFIERTYPE: { 273 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 274 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 275 last IDENTIFIERTYPE; 276 } 277 278 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 279 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 280 last IDENTIFIERTYPE; 281 } 282 283 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 284 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 285 last IDENTIFIERTYPE; 286 } 287 288 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 289 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 290 last IDENTIFIERTYPE; 291 } 292 293 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 294 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 295 last IDENTIFIERTYPE; 296 } 297 298 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 299 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 300 last IDENTIFIERTYPE; 301 } 302 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 303 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 304 last IDENTIFIERTYPE; 305 } 306 307 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 308 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 309 last IDENTIFIERTYPE; 310 } 311 312 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 313 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 314 last IDENTIFIERTYPE; 315 } 316 317 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 318 } 319 320 # Assign atom types... 321 $SpecifiedAtomTypes->AssignAtomTypes(); 322 323 # Make sure atom types assignment is successful... 324 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 325 return undef; 326 } 327 328 # Collect assigned atom types... 329 ATOM: for $Atom (@{$This->{Atoms}}) { 330 if ($Atom->IsHydrogen()) { 331 next ATOM; 332 } 333 $AtomID = $Atom->GetID(); 334 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 335 } 336 337 return $This; 338 } 339 340 # Initialize topological atom pairs between specified distance range... 341 # 342 sub _InitializeToplogicalAtomPairs { 343 my($This) = @_; 344 my($Distance); 345 346 @{$This->{AtomPairsIDs}} = (); 347 %{$This->{AtomPairsCount}} = (); 348 349 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 350 %{$This->{AtomPairsCount}{$Distance}} = (); 351 } 352 353 return $This; 354 } 355 356 # Count atom pairs between mininum and maximum distance at each 357 # distance using distance matrix and atom types assiged to each heavy 358 # atom. 359 # 360 # Notes: 361 # . The row and column indices of distance matrix correspond to atom indices. 362 # . Distance value of BigNumber implies the atom is not connected to any other atom. 363 # . Due to symmetric nature of distance matrix, only upper or lower triangular matrix 364 # needs to be processed during identification and count of atom pairs. 365 # 366 sub _GenerateAndCountAtomPairs { 367 my($This) = @_; 368 369 my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement); 370 371 $DistanceMatrix = $This->{DistanceMatrix}; 372 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize(); 373 $SkipIndexCheck = 0; 374 375 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) { 376 $AtomID1 = $This->{AtomIndexToID}{$RowIndex}; 377 if ( !(exists($This->{AssignedAtomTypes}{$AtomID1})) ) { 378 next ROWINDEX; 379 } 380 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1}; 381 382 COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) { 383 $AtomID2 = $This->{AtomIndexToID}{$ColIndex}; 384 if ( !(exists($This->{AssignedAtomTypes}{$AtomID2})) ) { 385 next COLINDEX; 386 } 387 $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck); 388 if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) { 389 next COLINDEX; 390 } 391 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2}; 392 393 if ($AtomType1 le $AtomType2) { 394 $This->_SetAtomPairsCount($Distance, $AtomType1, $AtomType2); 395 } 396 else { 397 $This->_SetAtomPairsCount($Distance, $AtomType2, $AtomType1); 398 } 399 } 400 } 401 return $This; 402 } 403 404 # Set atom paris count for a specific atom ID pair at a specific distance... 405 # 406 sub _SetAtomPairsCount { 407 my($This, $Distance, $AtomType1, $AtomType2) = @_; 408 409 if (! exists $This->{AtomPairsCount}{$Distance}{$AtomType1}) { 410 %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = (); 411 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1; 412 return $This; 413 } 414 415 if (exists $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}) { 416 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += 1; 417 } 418 else { 419 $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1; 420 } 421 422 return $This; 423 } 424 425 # Set final fingerpritns vector... 426 # 427 sub _SetFinalFingerprints { 428 my($This) = @_; 429 my($Distance, $AtomType1, $AtomType2, $Value, @Values); 430 431 # Mark successful generation of fingerprints... 432 $This->{FingerprintsGenerated} = 1; 433 434 @Values = (); 435 @{$This->{AtomPairsIDs}} = (); 436 437 for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) { 438 for $AtomType1 (sort keys %{$This->{AtomPairsCount}{$Distance}} ) { 439 for $AtomType2 (sort keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) { 440 push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}"; 441 $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}; 442 push @Values, $Value; 443 } 444 } 445 } 446 447 # Add AtomPairsIDs and values to fingerprint vector... 448 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}}); 449 $This->{FingerprintsVector}->AddValues(\@Values); 450 451 return $This; 452 } 453 454 # Get atom pair IDs corresponding to atom pairs count values in fingerprint 455 # vector as an array or reference to an array... 456 # 457 # AtomPairIDs list differes in molecules and is generated during finalization 458 # of fingerprints to make sure the fingerprint vector containing count values 459 # matches the atom pairs array. 460 # 461 sub GetAtomPairIDs { 462 my($This) = @_; 463 464 return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}}; 465 } 466 467 # Cache appropriate molecule data... 468 # 469 sub _SetupMoleculeDataCache { 470 my($This) = @_; 471 472 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for 473 # usage of distance matrix. The hydrogen atoms are ignored during processing... 474 # 475 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 476 477 # Get all atom IDs... 478 my(@AtomIDs); 479 @AtomIDs = (); 480 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; 481 482 # Set AtomIndex to AtomID hash... 483 %{$This->{AtomIndexToID}} = (); 484 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; 485 486 return $This; 487 } 488 489 # Clear cached molecule data... 490 # 491 sub _ClearMoleculeDataCache { 492 my($This) = @_; 493 494 @{$This->{Atoms}} = (); 495 496 return $This; 497 } 498 499 # Set atomic invariants to use for atom identifiers... 500 # 501 sub SetAtomicInvariantsToUse { 502 my($This, @Values) = @_; 503 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 504 505 if (!@Values) { 506 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 507 return; 508 } 509 510 $FirstValue = $Values[0]; 511 $TypeOfFirstValue = ref $FirstValue; 512 513 @SpecifiedAtomicInvariants = (); 514 @AtomicInvariantsToUse = (); 515 516 if ($TypeOfFirstValue =~ /^ARRAY/) { 517 push @SpecifiedAtomicInvariants, @{$FirstValue}; 518 } 519 else { 520 push @SpecifiedAtomicInvariants, @Values; 521 } 522 523 # Make sure specified AtomicInvariants are valid... 524 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 525 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 526 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 527 } 528 $AtomicInvariant = $SpecifiedAtomicInvariant; 529 push @AtomicInvariantsToUse, $AtomicInvariant; 530 } 531 532 # Set atomic invariants to use... 533 @{$This->{AtomicInvariantsToUse}} = (); 534 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 535 536 return $This; 537 } 538 539 # Set functional classes to use for atom identifiers... 540 # 541 sub SetFunctionalClassesToUse { 542 my($This, @Values) = @_; 543 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 544 545 if (!@Values) { 546 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 547 return; 548 } 549 550 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 551 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 552 return; 553 } 554 555 $FirstValue = $Values[0]; 556 $TypeOfFirstValue = ref $FirstValue; 557 558 @SpecifiedFunctionalClasses = (); 559 @FunctionalClassesToUse = (); 560 561 if ($TypeOfFirstValue =~ /^ARRAY/) { 562 push @SpecifiedFunctionalClasses, @{$FirstValue}; 563 } 564 else { 565 push @SpecifiedFunctionalClasses, @Values; 566 } 567 568 # Make sure specified FunctionalClasses are valid... 569 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 570 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 571 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 572 } 573 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 574 } 575 576 # Set functional classes to use... 577 @{$This->{FunctionalClassesToUse}} = (); 578 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 579 580 return $This; 581 } 582 583 # Initialize atom indentifier type information... 584 # 585 # Current supported values: 586 # 587 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 588 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 589 # 590 sub _InitializeAtomIdentifierTypeInformation { 591 my($This) = @_; 592 593 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 594 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 595 } 596 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 597 $This->_InitializeFunctionalClassAtomTypesInformation(); 598 } 599 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 600 # Nothing to do for now... 601 } 602 else { 603 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 604 } 605 606 return $This; 607 } 608 609 # Initialize atomic invariants atom types to use for generating atom identifiers... 610 # 611 # Let: 612 # AS = Atom symbol corresponding to element symbol 613 # 614 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 615 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 616 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 617 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 618 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 619 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 620 # H<n> = Number of implicit and explicit hydrogens for atom 621 # Ar = Aromatic annotation indicating whether atom is aromatic 622 # RA = Ring atom annotation indicating whether atom is a ring 623 # FC<+n/-n> = Formal charge assigned to atom 624 # MN<n> = Mass number indicating isotope other than most abundant isotope 625 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 626 # 627 # AtomTypeIDx = Atomic invariants atom type for atom x 628 # AtomTypeIDy = Atomic invariants atom type for atom y 629 # Dn = Topological distance between atom x and y 630 # 631 # Then: 632 # 633 # Atom pair AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 634 # 635 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 636 # 637 # AtomPairID corresponds to: 638 # 639 # AtomTypeIDx-D<n>-AtomTypeIDy 640 # 641 # Except for AS which is a required atomic invariant in atom pair AtomIDs, all other atomic invariants are 642 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 643 # AtomID specification doesn't include atomic invariants with zero or undefined values. 644 # 645 # Examples of atom pair AtomIDs: 646 # 647 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge 648 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge 649 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon 650 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom 651 # 652 # C.X2.BO3.H1.Ar - Aromatic carbon 653 # 654 # Examples of AtomPairIDs: 655 # 656 # C.X2.BO2.H3-D1-O.X1.BO1 - Carbon with two heavy atom neighbors attached to oxygen at bond distance 1(methanol) 657 # 658 # C.X2.BO3.H1.Ar-D3-C.X2.BO3.H1.Ar - Two aromatic carbons at bond distance 3 where each carbon has 659 # two heavy atom neighbors and bond order of 3 (benzene) 660 # 661 sub _InitializeAtomicInvariantsAtomTypesInformation { 662 my($This) = @_; 663 664 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC 665 # 666 @{$This->{AtomicInvariantsToUse}} = (); 667 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 668 669 return $This; 670 } 671 672 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 673 # class, to use for generating atom identifiers... 674 # 675 # Let: 676 # HBD: HydrogenBondDonor 677 # HBA: HydrogenBondAcceptor 678 # PI : PositivelyIonizable 679 # NI : NegativelyIonizable 680 # Ar : Aromatic 681 # Hal : Halogen 682 # H : Hydrophobic 683 # RA : RingAtom 684 # CA : ChainAtom 685 # 686 # Then: 687 # 688 # Functiononal class atom type specification for an atom corresponds to: 689 # 690 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 691 # 692 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 693 # 694 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 695 # 696 # HydrogenBondDonor: NH, NH2, OH 697 # HydrogenBondAcceptor: N[!H], O 698 # PositivelyIonizable: +, NH2 699 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 700 # 701 sub _InitializeFunctionalClassAtomTypesInformation { 702 my($This) = @_; 703 704 # Default functional class atom typess to use for generating atom identifiers 705 # are: HBD, HBA, PI, NI, Ar, Hal 706 # 707 @{$This->{FunctionalClassesToUse}} = (); 708 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 709 710 return $This; 711 } 712 713 # Return a string containg data for TopologicalAtomPairsFingerprints object... 714 # 715 sub StringifyTopologicalAtomPairsFingerprints { 716 my($This) = @_; 717 my($FingerprintsString); 718 719 # Type of fingerprint... 720 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; 721 722 # Min and max distance... 723 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}"; 724 725 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 726 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 727 728 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 729 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 730 731 for $AtomicInvariant (@AtomicInvariantsOrder) { 732 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 733 } 734 735 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 736 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 737 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 738 } 739 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 740 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 741 742 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 743 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 744 745 for $FunctionalClass (@FunctionalClassesOrder) { 746 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 747 } 748 749 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 750 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 751 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 752 } 753 754 # Total number of atom pairs... 755 $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues(); 756 757 # FingerprintsVector... 758 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 759 760 return $FingerprintsString; 761 } 762