1 package Fingerprints::TopologicalAtomTripletsFingerprints; 2 # 3 # File: TopologicalAtomTripletsFingerprints.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Fingerprints::Fingerprints; 30 use TextUtil (); 31 use Molecule; 32 use AtomTypes::AtomicInvariantsAtomTypes; 33 use AtomTypes::DREIDINGAtomTypes; 34 use AtomTypes::EStateAtomTypes; 35 use AtomTypes::FunctionalClassAtomTypes; 36 use AtomTypes::MMFF94AtomTypes; 37 use AtomTypes::SLogPAtomTypes; 38 use AtomTypes::SYBYLAtomTypes; 39 use AtomTypes::TPSAAtomTypes; 40 use AtomTypes::UFFAtomTypes; 41 42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 43 44 @ISA = qw(Fingerprints::Fingerprints Exporter); 45 @EXPORT = qw(); 46 @EXPORT_OK = qw(); 47 48 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 49 50 # Setup class variables... 51 my($ClassName); 52 _InitializeClass(); 53 54 # Overload Perl functions... 55 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints'; 56 57 # Class constructor... 58 sub new { 59 my($Class, %NamesAndValues) = @_; 60 61 # Initialize object... 62 my $This = $Class->SUPER::new(); 63 bless $This, ref($Class) || $Class; 64 $This->_InitializeTopologicalAtomTripletsFingerprints(); 65 66 $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues); 67 68 return $This; 69 } 70 71 # Initialize object data... 72 # 73 sub _InitializeTopologicalAtomTripletsFingerprints { 74 my($This) = @_; 75 76 # Type of fingerprint... 77 $This->{Type} = 'TopologicalAtomTriplets'; 78 79 # Type of vector... 80 $This->{VectorType} = 'FingerprintsVector'; 81 82 # Type of FingerprintsVector... 83 $This->{FingerprintsVectorType} = 'NumericalValues'; 84 85 # Minimum and maximum bond distance between atom paris... 86 $This->{MinDistance} = 1; 87 $This->{MaxDistance} = 10; 88 89 # Determines whether to apply triangle inequality to distance triplets... 90 # 91 $This->{UseTriangleInequality} = 0; 92 93 # Atom identifier type to use for atom IDs in atom triplets... 94 # 95 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 96 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 97 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 98 # 99 $This->{AtomIdentifierType} = ''; 100 101 # Atom types assigned to each heavy atom... 102 # 103 %{$This->{AssignedAtomTypes}} = (); 104 105 # All atom triplets between minimum and maximum distance... 106 # 107 @{$This->{AtomTripletsIDs}} = (); 108 %{$This->{AtomTripletsCount}} = (); 109 } 110 111 # Initialize class ... 112 sub _InitializeClass { 113 #Class name... 114 $ClassName = __PACKAGE__; 115 } 116 117 # Initialize object properties.... 118 sub _InitializeTopologicalAtomTripletsFingerprintsProperties { 119 my($This, %NamesAndValues) = @_; 120 121 my($Name, $Value, $MethodName); 122 while (($Name, $Value) = each %NamesAndValues) { 123 $MethodName = "Set${Name}"; 124 $This->$MethodName($Value); 125 } 126 127 # Make sure molecule object was specified... 128 if (!exists $NamesAndValues{Molecule}) { 129 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 130 } 131 if (!exists $NamesAndValues{AtomIdentifierType}) { 132 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 133 } 134 135 $This->_InitializeFingerprintsVector(); 136 137 return $This; 138 } 139 140 # Set minimum distance for atom triplets... 141 # 142 sub SetMinDistance { 143 my($This, $Value) = @_; 144 145 if (!TextUtil::IsPositiveInteger($Value)) { 146 croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid: It must be a positive integer..."; 147 } 148 $This->{MinDistance} = $Value; 149 150 return $This; 151 } 152 153 # Set maximum distance for atom triplets... 154 # 155 sub SetMaxDistance { 156 my($This, $Value) = @_; 157 158 if (!TextUtil::IsPositiveInteger($Value)) { 159 croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid: It must be a positive integer..."; 160 } 161 $This->{MaxDistance} = $Value; 162 163 return $This; 164 } 165 166 # Set atom identifier type.. 167 # 168 sub SetAtomIdentifierType { 169 my($This, $IdentifierType) = @_; 170 171 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 172 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 173 } 174 175 if ($This->{AtomIdentifierType}) { 176 croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 177 } 178 179 $This->{AtomIdentifierType} = $IdentifierType; 180 181 # Initialize atom identifier type information... 182 $This->_InitializeAtomIdentifierTypeInformation(); 183 184 return $This; 185 } 186 187 # Generate fingerprints description... 188 # 189 sub GetDescription { 190 my($This) = @_; 191 192 # Is description explicity set? 193 if (exists $This->{Description}) { 194 return $This->{Description}; 195 } 196 197 # Generate fingerprints description... 198 199 return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}"; 200 } 201 202 # Generate topological atom triplets fingerprints... 203 # 204 # Let: 205 # 206 # AT = Any of the supported atom types 207 # 208 # ATx = Atom type for atom x 209 # ATy = Atom type for atom y 210 # ATz = Atom type for atom z 211 # 212 # Dxy = Distance between Px and Py 213 # Dxz = Distance between Px and Pz 214 # Dyz = Distance between Py and Pz 215 # 216 # Then: 217 # 218 # ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz 219 # 220 # Methodology: 221 # . Generate a distance matrix. 222 # . Assign atom types to all the atoms. 223 # . Using distance matrix and atom types, count occurrence of unique atom triplets 224 # within specified distance range along with optional trinagle inequality 225 # 226 # Notes: 227 # . Hydrogen atoms are ignored during the fingerprint generation. 228 # . For a molecule containing N atoms with all different atom type, the total number of 229 # possible unique atom triplets without applying triangle inquality check corresponds to: 230 # 231 # Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) ) 232 # 233 # However, due to similar atom types assigned to atoms in a molecule for a specific atom 234 # typing methodology and specified distance range used during fingerprints generation, the 235 # actual number of unique triplets is usually smaller than the theoretical limit. 236 # 237 sub GenerateFingerprints { 238 my($This) = @_; 239 240 if ($This->{MinDistance} > $This->{MaxDistance}) { 241 croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}..."; 242 } 243 244 # Cache appropriate molecule data... 245 $This->_SetupMoleculeDataCache(); 246 247 # Generate distance matrix... 248 if (!$This->_SetupDistanceMatrix()) { 249 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix..."; 250 return $This; 251 } 252 253 # Assign atom types to all heavy atoms... 254 if (!$This->_AssignAtomTypes()) { 255 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 256 return $This; 257 } 258 259 # Intialize values of toplogical atom triplets... 260 $This->_InitializeToplogicalAtomTriplets(); 261 262 # Count atom triplets... 263 $This->_GenerateAndCountAtomTriplets(); 264 265 # Set final fingerprints... 266 $This->_SetFinalFingerprints(); 267 268 # Clear cached molecule data... 269 $This->_ClearMoleculeDataCache(); 270 271 return $This; 272 } 273 274 # Setup distance matrix... 275 # 276 sub _SetupDistanceMatrix { 277 my($This) = @_; 278 279 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); 280 281 if (!$This->{DistanceMatrix}) { 282 return undef; 283 } 284 285 return $This; 286 } 287 288 # Assign appropriate atom types to all heavy atoms... 289 # 290 sub _AssignAtomTypes { 291 my($This) = @_; 292 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); 293 294 %{$This->{AssignedAtomTypes}} = (); 295 $IgnoreHydrogens = 1; 296 297 $SpecifiedAtomTypes = undef; 298 299 IDENTIFIERTYPE: { 300 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 301 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 302 last IDENTIFIERTYPE; 303 } 304 305 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 306 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 307 last IDENTIFIERTYPE; 308 } 309 310 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 311 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 312 last IDENTIFIERTYPE; 313 } 314 315 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 316 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 317 last IDENTIFIERTYPE; 318 } 319 320 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 321 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 322 last IDENTIFIERTYPE; 323 } 324 325 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 326 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 327 last IDENTIFIERTYPE; 328 } 329 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 330 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 331 last IDENTIFIERTYPE; 332 } 333 334 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 335 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 336 last IDENTIFIERTYPE; 337 } 338 339 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 340 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 341 last IDENTIFIERTYPE; 342 } 343 344 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 345 } 346 347 # Assign atom types... 348 $SpecifiedAtomTypes->AssignAtomTypes(); 349 350 # Make sure atom types assignment is successful... 351 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 352 return undef; 353 } 354 355 # Collect assigned atom types... 356 ATOM: for $Atom (@{$This->{Atoms}}) { 357 if ($Atom->IsHydrogen()) { 358 next ATOM; 359 } 360 $AtomID = $Atom->GetID(); 361 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 362 } 363 364 return $This; 365 } 366 367 # Initialize topological atom triplets between specified distance range... 368 # 369 sub _InitializeToplogicalAtomTriplets { 370 my($This) = @_; 371 my($Distance); 372 373 @{$This->{AtomTripletsIDs}} = (); 374 %{$This->{AtomTripletsCount}} = (); 375 376 return $This; 377 } 378 379 # Count atom triplets between mininum and maximum distance at each 380 # distance using distance matrix and atom types assiged to each heavy 381 # atom. 382 # 383 sub _GenerateAndCountAtomTriplets { 384 my($This) = @_; 385 my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID); 386 387 $NumOfAtoms = @{$This->{Atoms}}; 388 $DistanceMatrix = $This->{DistanceMatrix}; 389 $SkipIndexCheck = 0; 390 391 ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) { 392 $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1}; 393 if (!exists($This->{AssignedAtomTypes}{$AtomID1})) { 394 next ATOMINDEX1; 395 } 396 $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1}; 397 398 ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) { 399 $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2}; 400 if (!exists($This->{AssignedAtomTypes}{$AtomID2})) { 401 next ATOMINDEX2; 402 } 403 $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2}; 404 405 $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck); 406 if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) { 407 next ATOMINDEX2; 408 } 409 410 ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) { 411 $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3}; 412 if (!exists($This->{AssignedAtomTypes}{$AtomID3})) { 413 next ATOMINDEX3; 414 } 415 $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3}; 416 417 $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck); 418 $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck); 419 420 if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) { 421 next ATOMINDEX3; 422 } 423 if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) { 424 next ATOMINDEX3; 425 } 426 if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) { 427 next ATOMINDEX3; 428 } 429 430 $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12); 431 if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) { 432 $This->{AtomTripletsCount}{$AtomTripletID} = 0; 433 } 434 $This->{AtomTripletsCount}{$AtomTripletID} += 1; 435 } 436 } 437 } 438 return $This; 439 } 440 441 # Check triangle inequality... 442 # 443 sub _DoDistancesSatisfyTriangleInequality { 444 my($This, $Distance1, $Distance2, $Distance3) = @_; 445 446 if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) { 447 return 0; 448 } 449 if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) { 450 return 0; 451 } 452 if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) { 453 return 0; 454 } 455 return 1; 456 } 457 458 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet... 459 # 460 sub _GetAtomTripletID { 461 my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_; 462 my($AtomTripletID, @AtomIDs); 463 464 @AtomIDs = (); 465 466 @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}"); 467 $AtomTripletID = join "-", @AtomIDs; 468 469 return $AtomTripletID; 470 } 471 472 # Set final fingerpritns vector... 473 # 474 sub _SetFinalFingerprints { 475 my($This) = @_; 476 my($AtomTripletID, $Value, @Values); 477 478 # Mark successful generation of fingerprints... 479 $This->{FingerprintsGenerated} = 1; 480 481 @Values = (); 482 @{$This->{AtomTripletsIDs}} = (); 483 484 for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) { 485 push @{$This->{AtomTripletsIDs}}, $AtomTripletID; 486 $Value = $This->{AtomTripletsCount}{$AtomTripletID}; 487 push @Values, $Value; 488 } 489 490 # Add AtomTripletsIDs and values to fingerprint vector... 491 $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}}); 492 $This->{FingerprintsVector}->AddValues(\@Values); 493 494 return $This; 495 } 496 497 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint 498 # vector as an array or reference to an array... 499 # 500 # AtomTripletIDs list differes in molecules and is generated during finalization 501 # of fingerprints to make sure the fingerprint vector containing count values 502 # matches the atom triplets array. 503 # 504 sub GetAtomTripletIDs { 505 my($This) = @_; 506 507 return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}}; 508 } 509 510 # Cache appropriate molecule data... 511 # 512 sub _SetupMoleculeDataCache { 513 my($This) = @_; 514 515 # Get all atoms including hydrogens to correctly map atom indices to atom IDs for 516 # usage of distance matrix. The hydrogen atoms are ignored during processing... 517 # 518 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 519 520 # Get all atom IDs... 521 my(@AtomIDs); 522 @AtomIDs = (); 523 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; 524 525 # Set AtomIndex to AtomID hash... 526 %{$This->{AtomIndexToID}} = (); 527 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; 528 529 return $This; 530 } 531 532 # Set atomic invariants to use for atom identifiers... 533 # 534 sub SetAtomicInvariantsToUse { 535 my($This, @Values) = @_; 536 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 537 538 if (!@Values) { 539 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 540 return; 541 } 542 543 $FirstValue = $Values[0]; 544 $TypeOfFirstValue = ref $FirstValue; 545 546 @SpecifiedAtomicInvariants = (); 547 @AtomicInvariantsToUse = (); 548 549 if ($TypeOfFirstValue =~ /^ARRAY/) { 550 push @SpecifiedAtomicInvariants, @{$FirstValue}; 551 } 552 else { 553 push @SpecifiedAtomicInvariants, @Values; 554 } 555 556 # Make sure specified AtomicInvariants are valid... 557 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 558 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 559 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 560 } 561 $AtomicInvariant = $SpecifiedAtomicInvariant; 562 push @AtomicInvariantsToUse, $AtomicInvariant; 563 } 564 565 # Set atomic invariants to use... 566 @{$This->{AtomicInvariantsToUse}} = (); 567 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 568 569 return $This; 570 } 571 572 # Set functional classes to use for atom identifiers... 573 # 574 sub SetFunctionalClassesToUse { 575 my($This, @Values) = @_; 576 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 577 578 if (!@Values) { 579 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 580 return; 581 } 582 583 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 584 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 585 return; 586 } 587 588 $FirstValue = $Values[0]; 589 $TypeOfFirstValue = ref $FirstValue; 590 591 @SpecifiedFunctionalClasses = (); 592 @FunctionalClassesToUse = (); 593 594 if ($TypeOfFirstValue =~ /^ARRAY/) { 595 push @SpecifiedFunctionalClasses, @{$FirstValue}; 596 } 597 else { 598 push @SpecifiedFunctionalClasses, @Values; 599 } 600 601 # Make sure specified FunctionalClasses are valid... 602 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 603 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 604 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 605 } 606 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 607 } 608 609 # Set functional classes to use... 610 @{$This->{FunctionalClassesToUse}} = (); 611 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 612 613 return $This; 614 } 615 616 # Initialize atom indentifier type information... 617 # 618 # Current supported values: 619 # 620 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 621 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 622 # 623 sub _InitializeAtomIdentifierTypeInformation { 624 my($This) = @_; 625 626 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 627 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 628 } 629 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 630 $This->_InitializeFunctionalClassAtomTypesInformation(); 631 } 632 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 633 # Nothing to do for now... 634 } 635 else { 636 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 637 } 638 639 return $This; 640 } 641 642 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets... 643 # 644 # Let: 645 # AS = Atom symbol corresponding to element symbol 646 # 647 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 648 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 649 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 650 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 651 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 652 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 653 # H<n> = Number of implicit and explicit hydrogens for atom 654 # Ar = Aromatic annotation indicating whether atom is aromatic 655 # RA = Ring atom annotation indicating whether atom is a ring 656 # FC<+n/-n> = Formal charge assigned to atom 657 # MN<n> = Mass number indicating isotope other than most abundant isotope 658 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 659 # 660 # ATx = Atomic invariants atom type for atom x 661 # ATy = Atomic invariants atom type for atom y 662 # ATz = Atomic invariants atom type for atom z 663 # 664 # Dxy = Distance between Px and Py 665 # Dxz = Distance between Px and Pz 666 # Dyz = Distance between Py and Pz 667 # 668 # Then: 669 # 670 # Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 671 # 672 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 673 # 674 # Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to: 675 # 676 # ATx-Dyz-ATy-Dxz-ATz-Dxy 677 # 678 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are 679 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 680 # AtomID specification doesn't include atomic invariants with zero or undefined values. 681 # 682 # Examples of atom triplet AtomIDs: 683 # 684 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge 685 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge 686 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon 687 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom 688 # 689 # C.X2.BO3.H1.Ar - Aromatic carbon 690 # 691 sub _InitializeAtomicInvariantsAtomTypesInformation { 692 my($This) = @_; 693 694 # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC 695 # 696 @{$This->{AtomicInvariantsToUse}} = (); 697 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 698 699 return $This; 700 } 701 702 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 703 # class, to use for generating atom identifiers... 704 # 705 # Let: 706 # HBD: HydrogenBondDonor 707 # HBA: HydrogenBondAcceptor 708 # PI : PositivelyIonizable 709 # NI : NegativelyIonizable 710 # Ar : Aromatic 711 # Hal : Halogen 712 # H : Hydrophobic 713 # RA : RingAtom 714 # CA : ChainAtom 715 # 716 # Then: 717 # 718 # Functiononal class atom type specification for an atom corresponds to: 719 # 720 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 721 # 722 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 723 # 724 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 725 # 726 # HydrogenBondDonor: NH, NH2, OH 727 # HydrogenBondAcceptor: N[!H], O 728 # PositivelyIonizable: +, NH2 729 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 730 # 731 sub _InitializeFunctionalClassAtomTypesInformation { 732 my($This) = @_; 733 734 # Default functional class atom typess to use for generating atom identifiers 735 # are: HBD, HBA, PI, NI, Ar, Hal 736 # 737 @{$This->{FunctionalClassesToUse}} = (); 738 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 739 740 return $This; 741 } 742 743 # Clear cached molecule data... 744 # 745 sub _ClearMoleculeDataCache { 746 my($This) = @_; 747 748 @{$This->{Atoms}} = (); 749 750 return $This; 751 } 752 753 # Return a string containg data for TopologicalAtomTripletsFingerprints object... 754 # 755 sub StringifyTopologicalAtomTripletsFingerprints { 756 my($This) = @_; 757 my($FingerprintsString); 758 759 # Type of fingerprint... 760 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}"; 761 762 # Min and max distance... 763 $FingerprintsString .= "; MinDistance: $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No"); 764 765 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 766 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 767 768 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 769 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 770 771 for $AtomicInvariant (@AtomicInvariantsOrder) { 772 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 773 } 774 775 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 776 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 777 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 778 } 779 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 780 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 781 782 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 783 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 784 785 for $FunctionalClass (@FunctionalClassesOrder) { 786 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 787 } 788 789 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 790 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 791 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 792 } 793 794 # Total number of atom triplets... 795 $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues(); 796 797 # FingerprintsVector... 798 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 799 800 return $FingerprintsString; 801 } 802