1 package Fingerprints::AtomNeighborhoodsFingerprints; 2 # 3 # File: AtomNeighborhoodsFingerprints.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Fingerprints::Fingerprints; 30 use TextUtil (); 31 use Molecule; 32 use AtomTypes::AtomicInvariantsAtomTypes; 33 use AtomTypes::DREIDINGAtomTypes; 34 use AtomTypes::EStateAtomTypes; 35 use AtomTypes::FunctionalClassAtomTypes; 36 use AtomTypes::MMFF94AtomTypes; 37 use AtomTypes::SLogPAtomTypes; 38 use AtomTypes::SYBYLAtomTypes; 39 use AtomTypes::TPSAAtomTypes; 40 use AtomTypes::UFFAtomTypes; 41 42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 43 44 @ISA = qw(Fingerprints::Fingerprints Exporter); 45 @EXPORT = qw(); 46 @EXPORT_OK = qw(); 47 48 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 49 50 # Setup class variables... 51 my($ClassName); 52 _InitializeClass(); 53 54 # Overload Perl functions... 55 use overload '""' => 'StringifyAtomNeighborhoodsFingerprints'; 56 57 # Class constructor... 58 sub new { 59 my($Class, %NamesAndValues) = @_; 60 61 # Initialize object... 62 my $This = $Class->SUPER::new(); 63 bless $This, ref($Class) || $Class; 64 $This->_InitializeAtomNeighborhoodsFingerprints(); 65 66 $This->_InitializeAtomNeighborhoodsFingerprintsProperties(%NamesAndValues); 67 68 return $This; 69 } 70 71 # Initialize object data... 72 # 73 sub _InitializeAtomNeighborhoodsFingerprints { 74 my($This) = @_; 75 76 # Type of fingerprint... 77 $This->{Type} = 'AtomNeighborhoods'; 78 79 # Type of vector... 80 $This->{VectorType} = 'FingerprintsVector'; 81 82 # Type of FingerprintsVector... 83 $This->{FingerprintsVectorType} = 'AlphaNumericalValues'; 84 85 # Minimum and maximum atomic neighborhoods radii... 86 $This->{MinNeighborhoodRadius} = 0; 87 $This->{MaxNeighborhoodRadius} = 2; 88 89 # Atom identifier type to use for atom IDs in atom neighborhood atoms... 90 # 91 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 92 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 93 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 94 # 95 $This->{AtomIdentifierType} = ''; 96 97 # Atom types assigned to each heavy atom... 98 %{$This->{AssignedAtomTypes}} = (); 99 100 # Atom neighorhoods with in specified atom radii.. 101 %{$This->{AtomNeighborhoods}} = (); 102 103 # Atom neighborhoods atom types count at different neighborhoods... 104 %{$This->{NeighborhoodAtomTypesCount}} = (); 105 106 # Atom neighborhood identifiers using specified atom identifier types methodology... 107 @{$This->{AtomNeighborhoodsIdentifiers}} = (); 108 } 109 110 # Initialize class ... 111 sub _InitializeClass { 112 #Class name... 113 $ClassName = __PACKAGE__; 114 } 115 116 # Initialize object properties.... 117 sub _InitializeAtomNeighborhoodsFingerprintsProperties { 118 my($This, %NamesAndValues) = @_; 119 120 my($Name, $Value, $MethodName); 121 while (($Name, $Value) = each %NamesAndValues) { 122 $MethodName = "Set${Name}"; 123 $This->$MethodName($Value); 124 } 125 126 # Make sure molecule object was specified... 127 if (!exists $NamesAndValues{Molecule}) { 128 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 129 } 130 if (exists $NamesAndValues{Size}) { 131 croak "Error: ${ClassName}->New: Object can't be instantiated with a user specified size: It's an arbitrary length vector..."; 132 } 133 if (!exists $NamesAndValues{AtomIdentifierType}) { 134 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 135 } 136 137 $This->_InitializeFingerprintsVector(); 138 139 return $This; 140 } 141 142 # Set atom identifier type.. 143 # 144 sub SetAtomIdentifierType { 145 my($This, $IdentifierType) = @_; 146 147 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 148 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 149 } 150 151 if ($This->{AtomIdentifierType}) { 152 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 153 } 154 155 $This->{AtomIdentifierType} = $IdentifierType; 156 157 # Initialize atom identifier type information... 158 $This->_InitializeAtomIdentifierTypeInformation(); 159 160 return $This; 161 } 162 163 # Set minimum atom neighborhood radius... 164 # 165 sub SetMinNeighborhoodRadius { 166 my($This, $Value) = @_; 167 168 if (!TextUtil::IsInteger($Value)) { 169 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; 170 } 171 172 if ($Value < 0 ) { 173 croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; 174 } 175 $This->{MinNeighborhoodRadius} = $Value; 176 177 return $This; 178 } 179 180 # Set maximum atom neighborhood radius... 181 # 182 sub SetMaxNeighborhoodRadius { 183 my($This, $Value) = @_; 184 185 if (!TextUtil::IsInteger($Value)) { 186 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be an integer..."; 187 } 188 189 if ($Value < 0 ) { 190 croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; 191 } 192 $This->{MaxNeighborhoodRadius} = $Value; 193 194 return $This; 195 } 196 197 # Generate fingerprints description... 198 # 199 sub GetDescription { 200 my($This) = @_; 201 202 # Is description explicity set? 203 if (exists $This->{Description}) { 204 return $This->{Description}; 205 } 206 207 # Generate fingerprints description... 208 209 return "$This->{Type}:$This->{AtomIdentifierType}:MinRadius$This->{MinNeighborhoodRadius}:MaxRadius$This->{MaxNeighborhoodRadius}"; 210 } 211 212 # Generate atom neighborhood [ Ref 53-56, Ref 73 ] fingerprints... 213 # 214 # Methodology: 215 # . Assign atom types to all non-hydrogen atoms in the molecule 216 # . Get atom neighborhoods up to MaxNeighborhoodRadis 217 # . Count unqiue atom types at each neighborhood radii for all heavy atoms 218 # . Generate neighborhood identifiers for all neighborhoods around central 219 # heavy atom 220 # . Atom neighborhood identifier for a specific radii is generated using neighborhood 221 # radius, assigned atom type and its count as follows: 222 # 223 # NR<n>-<AtomType>-ATC<n> 224 # 225 # . Atom neighborhood identifier for a central atom at all specified radii is generated 226 # by concatenating neighborhood identifiers at each radii by colon: 227 # 228 # NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>: 229 # 230 # . Set final fingerprints as list of neighborhood atom indentifiers 231 # 232 sub GenerateFingerprints { 233 my($This) = @_; 234 235 if ($This->{MinNeighborhoodRadius} > $This->{MaxNeighborhoodRadius}) { 236 croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinNeighborhoodRadius}, must be less than MaxLength, $This->{MaxNeighborhoodRadius}..."; 237 } 238 239 # Cache appropriate molecule data... 240 $This->_SetupMoleculeDataCache(); 241 242 # Assign atom types to all heavy atoms... 243 if (!$This->_AssignAtomTypes()) { 244 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 245 return $This; 246 } 247 248 # Intialize atom neighborhoods information... 249 $This->_InitializeAtomNeighborhoods(); 250 251 # Identify atom neighborhoods with in specified radii... 252 $This->_GetAtomNeighborhoods(); 253 254 # Count atom neighborhoods atom types... 255 $This->_CountAtomNeighborhoodsAtomTypes(); 256 257 # Genenerate atom neighborhood identifiers... 258 $This->_GenerateAtomNeighborhoodIdentifiers(); 259 260 # Set final fingerprints... 261 $This->_SetFinalFingerprints(); 262 263 # Clear cached molecule data... 264 $This->_ClearMoleculeDataCache(); 265 266 return $This; 267 } 268 269 # Assign appropriate atom types to all heavy atoms... 270 # 271 sub _AssignAtomTypes { 272 my($This) = @_; 273 my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens); 274 275 %{$This->{AssignedAtomTypes}} = (); 276 $IgnoreHydrogens = 1; 277 278 $SpecifiedAtomTypes = undef; 279 280 IDENTIFIERTYPE: { 281 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 282 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 283 last IDENTIFIERTYPE; 284 } 285 286 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 287 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 288 last IDENTIFIERTYPE; 289 } 290 291 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 292 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 293 last IDENTIFIERTYPE; 294 } 295 296 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 297 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 298 last IDENTIFIERTYPE; 299 } 300 301 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 302 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 303 last IDENTIFIERTYPE; 304 } 305 306 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 307 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 308 last IDENTIFIERTYPE; 309 } 310 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 311 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 312 last IDENTIFIERTYPE; 313 } 314 315 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 316 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 317 last IDENTIFIERTYPE; 318 } 319 320 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 321 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 322 last IDENTIFIERTYPE; 323 } 324 325 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 326 } 327 328 # Assign atom types... 329 $SpecifiedAtomTypes->AssignAtomTypes(); 330 331 # Make sure atom types assignment is successful... 332 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 333 return undef; 334 } 335 336 # Collect assigned atom types... 337 ATOM: for $Atom (@{$This->{Atoms}}) { 338 if ($Atom->IsHydrogen()) { 339 next ATOM; 340 } 341 $AtomID = $Atom->GetID(); 342 $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 343 } 344 345 return $This; 346 } 347 348 # Initialize topological atom pairs between specified distance range... 349 # 350 sub _InitializeAtomNeighborhoods { 351 my($This) = @_; 352 my($Radius); 353 354 # Initialize atom neighborhood count information between specified radii... 355 %{$This->{NeighborhoodAtomTypesCount}} = (); 356 357 for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { 358 %{$This->{NeighborhoodAtomTypesCount}{$Radius}} = (); 359 } 360 361 # Initialize atom neighborhoods atoms information at all specified radii... 362 # 363 %{$This->{AtomNeighborhoods}} = (); 364 365 for $Radius (0 .. $This->{MaxNeighborhoodRadius}) { 366 %{$This->{AtomNeighborhoods}{$Radius}} = (); 367 } 368 369 return $This; 370 } 371 372 # Collect atom neighborhoods upto maximum neighborhood radius... 373 # 374 # Notes: 375 # . Fingerprints are only generated for neighborhoods between specified minimum 376 # and maximum neighborhood radii. 377 # 378 sub _GetAtomNeighborhoods { 379 my($This) = @_; 380 my($Atom, $AtomID, $MaxRadius, $Radius, $Molecule); 381 382 $MaxRadius = $This->{MaxNeighborhoodRadius}; 383 $Molecule = $This->GetMolecule(); 384 385 # Collect atom neighborhoods... 386 387 ATOM: for $Atom (@{$This->{Atoms}}) { 388 $AtomID = $Atom->GetID(); 389 $Radius = 0; 390 391 if ($MaxRadius == 0) { 392 # Atom is its own neighborhood at 0 radius... 393 my(@AtomNeighborhoodsAtoms); 394 395 @AtomNeighborhoodsAtoms = ($Atom); 396 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = \@AtomNeighborhoodsAtoms; 397 398 next ATOM; 399 } 400 401 # Collect available atom neighborhoods at different neighborhood radii levels... 402 my($AtomNeighborhoodAtomsRef); 403 404 for $AtomNeighborhoodAtomsRef ($Molecule->GetAtomNeighborhoodsWithRadiusUpto($Atom, $MaxRadius)) { 405 $This->{AtomNeighborhoods}{$Radius}{$AtomID} = $AtomNeighborhoodAtomsRef; 406 $Radius++; 407 } 408 } 409 return $This; 410 } 411 412 # Count atom neighborhoods atom types for each non-hydrogen central atoms with 413 # neighborhoods in specified radii range... 414 # 415 sub _CountAtomNeighborhoodsAtomTypes { 416 my($This) = @_; 417 my($AtomID, $NeighborhoodAtomID, $Radius, $NeighborhoodAtom, $NeighborhoodAtomType, $AtomNeighborhoodAtomsRef); 418 419 RADIUS: for $Radius (sort { $a <=> $b } keys %{$This->{AtomNeighborhoods}} ) { 420 if ($Radius < $This->{MinNeighborhoodRadius} || $Radius > $This->{MaxNeighborhoodRadius}) { 421 next RADIUS; 422 } 423 # Go over the neighborhoods of each atom at the current radius... 424 for $AtomID (keys %{$This->{AtomNeighborhoods}{$Radius}}) { 425 $AtomNeighborhoodAtomsRef = $This->{AtomNeighborhoods}{$Radius}{$AtomID}; 426 NEIGHBORHOODATOM: for $NeighborhoodAtom (@{$AtomNeighborhoodAtomsRef}) { 427 if ($NeighborhoodAtom->IsHydrogen()) { 428 next NEIGHBORHOODATOM; 429 } 430 $NeighborhoodAtomID = $NeighborhoodAtom->GetID(); 431 $NeighborhoodAtomType = $This->{AssignedAtomTypes}{$NeighborhoodAtomID}; 432 433 # Count neighbothood atom types for each atom at different radii... 434 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { 435 %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}} = (); 436 } 437 if (exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType}) { 438 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} += 1; 439 } 440 else { 441 $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} = 1; 442 } 443 } 444 } 445 } 446 return $This; 447 } 448 449 # Generate atom neighborhood identifiers for each non-hydrogen atom using atom 450 # neighborhood atom types and their count information... 451 # 452 # Let: 453 # NR<n> = Neighborhood radius 454 # AtomType = Assigned atom type 455 # ATC<n> = AtomType count 456 # 457 # Then: 458 # 459 # AtomNeighborhoodAtomIdentifier for a neighborhood atom generated for 460 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 461 # 462 # NR<n>-<AtomType>-ATC<n> 463 # 464 # AtomNeighborhoodsIdentifier for all specified atom neighbothoods of an atom generated for 465 # AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 466 # 467 # NR<n>-<AtomType>-ATC<n>;NR<n>-<AtomType>-ATC<n>;... 468 # 469 sub _GenerateAtomNeighborhoodIdentifiers { 470 my($This) = @_; 471 my($Atom, $AtomID, $Radius, $AtomType, $AtomTypeCount, $AtomNeighborhoodIdentifier, @AtomNeighborhoodIdentifiers); 472 473 @{$This->{AtomNeighborhoodsIdentifiers}} = (); 474 475 for $Atom (@{$This->{Atoms}}) { 476 $AtomID = $Atom->GetID(); 477 @AtomNeighborhoodIdentifiers = (); 478 RADIUS: for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) { 479 if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) { 480 next RADIUS; 481 } 482 for $AtomType (sort keys %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}}) { 483 $AtomTypeCount = $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$AtomType}; 484 push @AtomNeighborhoodIdentifiers, "NR${Radius}-${AtomType}-ATC${AtomTypeCount}"; 485 } 486 } 487 $AtomNeighborhoodIdentifier = join(":", @AtomNeighborhoodIdentifiers); 488 push @{$This->{AtomNeighborhoodsIdentifiers}}, $AtomNeighborhoodIdentifier; 489 } 490 491 return $This; 492 } 493 494 # Set final fingerprits vector... 495 # 496 sub _SetFinalFingerprints { 497 my($This) = @_; 498 499 # Mark successful generation of fingerprints... 500 $This->{FingerprintsGenerated} = 1; 501 502 # Sort AtomNeighborhoodsIdentifiers.. 503 # 504 @{$This->{AtomNeighborhoodsIdentifiers}} = sort @{$This->{AtomNeighborhoodsIdentifiers}}; 505 506 # Add sorted atom neighborhood identifiers to FingerprintsVector which is already defined 507 # during initialization containing AlphaNumericalValues... 508 # 509 $This->{FingerprintsVector}->AddValues(\@{$This->{AtomNeighborhoodsIdentifiers}}); 510 511 return $This; 512 } 513 514 # Cache appropriate molecule data... 515 # 516 sub _SetupMoleculeDataCache { 517 my($This) = @_; 518 519 # Get all non-hydrogen atoms... 520 my($NegateAtomCheckMethod); 521 $NegateAtomCheckMethod = 1; 522 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 523 524 return $This; 525 } 526 527 # Clear cached molecule data... 528 # 529 sub _ClearMoleculeDataCache { 530 my($This) = @_; 531 532 @{$This->{Atoms}} = (); 533 534 return $This; 535 } 536 537 # Set atomic invariants to use for atom identifiers... 538 # 539 sub SetAtomicInvariantsToUse { 540 my($This, @Values) = @_; 541 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 542 543 if (!@Values) { 544 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 545 return; 546 } 547 548 $FirstValue = $Values[0]; 549 $TypeOfFirstValue = ref $FirstValue; 550 551 @SpecifiedAtomicInvariants = (); 552 @AtomicInvariantsToUse = (); 553 554 if ($TypeOfFirstValue =~ /^ARRAY/) { 555 push @SpecifiedAtomicInvariants, @{$FirstValue}; 556 } 557 else { 558 push @SpecifiedAtomicInvariants, @Values; 559 } 560 561 # Make sure specified AtomicInvariants are valid... 562 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 563 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 564 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 565 } 566 $AtomicInvariant = $SpecifiedAtomicInvariant; 567 push @AtomicInvariantsToUse, $AtomicInvariant; 568 } 569 570 # Set atomic invariants to use... 571 @{$This->{AtomicInvariantsToUse}} = (); 572 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 573 574 return $This; 575 } 576 577 # Set functional classes to use for atom identifiers... 578 # 579 sub SetFunctionalClassesToUse { 580 my($This, @Values) = @_; 581 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 582 583 if (!@Values) { 584 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 585 return; 586 } 587 588 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 589 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 590 return; 591 } 592 593 $FirstValue = $Values[0]; 594 $TypeOfFirstValue = ref $FirstValue; 595 596 @SpecifiedFunctionalClasses = (); 597 @FunctionalClassesToUse = (); 598 599 if ($TypeOfFirstValue =~ /^ARRAY/) { 600 push @SpecifiedFunctionalClasses, @{$FirstValue}; 601 } 602 else { 603 push @SpecifiedFunctionalClasses, @Values; 604 } 605 606 # Make sure specified FunctionalClasses are valid... 607 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 608 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 609 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 610 } 611 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 612 } 613 614 # Set functional classes to use... 615 @{$This->{FunctionalClassesToUse}} = (); 616 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 617 618 return $This; 619 } 620 621 # Initialize atom indentifier type information... 622 # 623 # Current supported values: 624 # 625 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 626 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 627 # 628 sub _InitializeAtomIdentifierTypeInformation { 629 my($This) = @_; 630 631 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 632 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 633 } 634 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 635 $This->_InitializeFunctionalClassAtomTypesInformation(); 636 } 637 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 638 # Nothing to do for now... 639 } 640 else { 641 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 642 } 643 644 return $This; 645 } 646 647 # Initialize atomic invariants atom types to use for generating atom identifiers... 648 # 649 # Let: 650 # AS = Atom symbol corresponding to element symbol 651 # 652 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 653 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 654 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 655 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 656 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 657 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 658 # H<n> = Number of implicit and explicit hydrogens for atom 659 # Ar = Aromatic annotation indicating whether atom is aromatic 660 # RA = Ring atom annotation indicating whether atom is a ring 661 # FC<+n/-n> = Formal charge assigned to atom 662 # MN<n> = Mass number indicating isotope other than most abundant isotope 663 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 664 # 665 # Then: 666 # 667 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 668 # 669 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 670 # 671 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are 672 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 673 # AtomID specification doesn't include atomic invariants with zero or undefined values. 674 # 675 sub _InitializeAtomicInvariantsAtomTypesInformation { 676 my($This) = @_; 677 678 # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC 679 # 680 @{$This->{AtomicInvariantsToUse}} = (); 681 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 682 683 return $This; 684 } 685 686 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 687 # class, to use for generating atom identifiers... 688 # 689 # Let: 690 # HBD: HydrogenBondDonor 691 # HBA: HydrogenBondAcceptor 692 # PI : PositivelyIonizable 693 # NI : NegativelyIonizable 694 # Ar : Aromatic 695 # Hal : Halogen 696 # H : Hydrophobic 697 # RA : RingAtom 698 # CA : ChainAtom 699 # 700 # Then: 701 # 702 # Functiononal class atom type specification for an atom corresponds to: 703 # 704 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 705 # 706 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 707 # 708 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 709 # 710 # HydrogenBondDonor: NH, NH2, OH 711 # HydrogenBondAcceptor: N[!H], O 712 # PositivelyIonizable: +, NH2 713 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 714 # 715 sub _InitializeFunctionalClassAtomTypesInformation { 716 my($This) = @_; 717 718 # Default functional class atom typess to use for generating atom identifiers 719 # are: HBD, HBA, PI, NI, Ar, Hal 720 # 721 @{$This->{FunctionalClassesToUse}} = (); 722 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 723 724 return $This; 725 } 726 727 # Return a string containg data for AtomNeighborhoodsFingerprints object... 728 # 729 sub StringifyAtomNeighborhoodsFingerprints { 730 my($This) = @_; 731 my($FingerprintsString); 732 733 # Type of fingerprint... 734 $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; MinNeighborhoodRadius: $This->{MinNeighborhoodRadius}; MaxNeighborhoodRadius: $This->{MaxNeighborhoodRadius}"; 735 736 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 737 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 738 739 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 740 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 741 742 for $AtomicInvariant (@AtomicInvariantsOrder) { 743 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 744 } 745 746 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 747 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 748 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 749 } 750 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 751 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 752 753 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 754 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 755 756 for $FunctionalClass (@FunctionalClassesOrder) { 757 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 758 } 759 760 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 761 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 762 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 763 } 764 765 # Total number of atom neighborhood atom IDs... 766 $FingerprintsString .= "; NumOfAtomNeighborhoodAtomIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues(); 767 768 # FingerprintsVector... 769 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 770 771 return $FingerprintsString; 772 } 773