1 package Fingerprints::ExtendedConnectivityFingerprints; 2 # 3 # File: ExtendedConnectivityFingerprints.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use TextUtil (); 30 use MathUtil (); 31 use Fingerprints::Fingerprints; 32 use Molecule; 33 use AtomTypes::AtomicInvariantsAtomTypes; 34 use AtomTypes::FunctionalClassAtomTypes; 35 use AtomTypes::DREIDINGAtomTypes; 36 use AtomTypes::EStateAtomTypes; 37 use AtomTypes::MMFF94AtomTypes; 38 use AtomTypes::SLogPAtomTypes; 39 use AtomTypes::SYBYLAtomTypes; 40 use AtomTypes::TPSAAtomTypes; 41 use AtomTypes::UFFAtomTypes; 42 43 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 44 45 @ISA = qw(Fingerprints::Fingerprints Exporter); 46 @EXPORT = qw(); 47 @EXPORT_OK = qw(); 48 49 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 50 51 # Setup class variables... 52 my($ClassName); 53 _InitializeClass(); 54 55 # Overload Perl functions... 56 use overload '""' => 'StringifyExtendedConnectivityFingerprints'; 57 58 # Class constructor... 59 sub new { 60 my($Class, %NamesAndValues) = @_; 61 62 # Initialize object... 63 my $This = $Class->SUPER::new(); 64 bless $This, ref($Class) || $Class; 65 $This->_InitializeExtendedConnectivityFingerprints(); 66 67 $This->_InitializeExtendedConnectivityFingerprintsProperties(%NamesAndValues); 68 69 return $This; 70 } 71 72 # Initialize object data... 73 # 74 sub _InitializeExtendedConnectivityFingerprints { 75 my($This) = @_; 76 77 # Type of fingerprint to generate: 78 # 79 # ExtendedConnectivity - Set of integer identifiers corresponding to structurally unique features 80 # ExtendedConnectivityCount - Set of integer identifiers corresponding to structurally unique features and their count 81 # ExtendedConnectivityBits - A bit vector indicating presence/absence of structurally unique features 82 # 83 $This->{Type} = 'ExtendedConnectivity'; 84 85 # Atomic neighborhoods radius for extended connectivity... 86 $This->{NeighborhoodRadius} = 2; 87 88 # Size of bit bector to use during generation of ExtendedConnectivityBits fingerprints... 89 $This->{Size} = 1024; 90 91 # Min and max size of bit bector to use during generation of ExtendedConnectivityBits fingerprints... 92 $This->{MinSize} = 32; 93 $This->{MaxSize} = 2**32; 94 95 # Type of atom attributes to use for initial identifier assignment to non-hydrogen atoms 96 # during the calculation of extended connectivity fingerprints [ Ref 48, Ref 52 ]... 97 # 98 # Currently supported values are: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, 99 # DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, 100 # TPSAAtomTypes, UFFAtomTypes 101 # 102 $This->{AtomIdentifierType} = ''; 103 104 # Random number generator to use during generation of fingerprints bit-vector 105 # string: Perl CORE::rand or MayaChemTools MathUtil::random function. 106 # 107 # The random number generator implemented in MayaChemTools is a variant of 108 # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ]. 109 # It is also referred to as Lehmer random number generator or Park-Miller 110 # random number generator. 111 # 112 # Unlike Perl's core random number generator function rand, the random number 113 # generator implemented in MayaChemTools, MathUtil::random, generates consistent 114 # random values across different platformsfor a specific random seed and leads 115 # to generation of portable fingerprints bit-vector strings. 116 # 117 $This->{UsePerlCoreRandom} = 1; 118 119 # Atom neighorhoods up to specified neighborhood radius... 120 %{$This->{AtomNeighborhoods}} = (); 121 122 # Atom identifiers at different neighborhoods up to specified neighborhood radius... 123 %{$This->{AtomIdentifiers}} = (); 124 125 # Structurally unique atom identifiers at different neighborhoods up to specified neighborhood radius... 126 %{$This->{UniqueAtomIdentifiers}} = (); 127 %{$This->{UniqueAtomIdentifiersCount}} = (); 128 129 # Unique atom identifiers at different neighborhoods up to specified neighborhood radius... 130 %{$This->{StructurallyUniqueAtomIdentifiers}} = (); 131 %{$This->{StructurallyUniqueAtomIdentifiersCount}} = (); 132 133 # Structure feature information at different neighborhoods up to specified neighborhood 134 # radius used during removal of atom indentifiers which are structually equivalent... 135 %{$This->{StructureFeatures}} = (); 136 } 137 138 # Initialize class ... 139 sub _InitializeClass { 140 #Class name... 141 $ClassName = __PACKAGE__; 142 } 143 144 # Initialize object properties.... 145 sub _InitializeExtendedConnectivityFingerprintsProperties { 146 my($This, %NamesAndValues) = @_; 147 148 my($Name, $Value, $MethodName); 149 while (($Name, $Value) = each %NamesAndValues) { 150 $MethodName = "Set${Name}"; 151 $This->$MethodName($Value); 152 } 153 154 # Make sure molecule object was specified... 155 if (!exists $NamesAndValues{Molecule}) { 156 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 157 } 158 159 # Make sure AtomIdentifierType was specified... 160 if (!exists $NamesAndValues{AtomIdentifierType}) { 161 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 162 } 163 164 # Make sure it's power of 2... 165 if (exists $NamesAndValues{Size}) { 166 if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) { 167 croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2..."; 168 } 169 } 170 171 if ($This->{Type} =~ /^ExtendedConnectivity$/i) { 172 $This->_InitializeExtendedConnectivityFingerprintsVector(); 173 } 174 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) { 175 $This->_InitializeExtendedConnectivityCountFingerprintsVector(); 176 } 177 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { 178 $This->_InitializeExtendedConnectivityBitsFingerprintsBitVector(); 179 } 180 else { 181 croak "Error: ${ClassName}->_InitializeExtendedConnectivityFingerprintsProperties: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits..."; 182 } 183 184 return $This; 185 } 186 187 # Initialize extended connectivity fingerprints vector... 188 # 189 sub _InitializeExtendedConnectivityFingerprintsVector { 190 my($This) = @_; 191 192 # Type of vector... 193 $This->{VectorType} = 'FingerprintsVector'; 194 195 # Type of FingerprintsVector... 196 $This->{FingerprintsVectorType} = 'AlphaNumericalValues'; 197 198 $This->_InitializeFingerprintsVector(); 199 200 return $This; 201 } 202 203 # Initialize extended connectivity count fingerprints vector... 204 # 205 sub _InitializeExtendedConnectivityCountFingerprintsVector { 206 my($This) = @_; 207 208 # Type of vector... 209 $This->{VectorType} = 'FingerprintsVector'; 210 211 # Type of FingerprintsVector... 212 $This->{FingerprintsVectorType} = 'NumericalValues'; 213 214 $This->_InitializeFingerprintsVector(); 215 216 return $This; 217 } 218 219 # Initialize extended connectivity bit fingerprints vector... 220 # 221 sub _InitializeExtendedConnectivityBitsFingerprintsBitVector { 222 my($This) = @_; 223 224 # Type of vector... 225 $This->{VectorType} = 'FingerprintsBitVector'; 226 227 $This->_InitializeFingerprintsBitVector(); 228 229 return $This; 230 } 231 232 # Set type... 233 # 234 sub SetType { 235 my($This, $Type) = @_; 236 237 if ($Type =~ /^ExtendedConnectivity$/i) { 238 $This->{Type} = 'ExtendedConnectivity';; 239 } 240 elsif ($Type =~ /^ExtendedConnectivityCount$/i) { 241 $This->{Type} = 'ExtendedConnectivityCount';; 242 } 243 elsif ($Type =~ /^ExtendedConnectivityBits$/i) { 244 $This->{Type} = 'ExtendedConnectivityBits';; 245 } 246 else { 247 croak "Error: ${ClassName}->SetType: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits..."; 248 } 249 return $This; 250 } 251 252 # Disable vector type change... 253 # 254 sub SetVectorType { 255 my($This, $Type) = @_; 256 257 croak "Error: ${ClassName}->SetVectorType: Can't change vector type..."; 258 259 return $This; 260 } 261 262 # Disable vector type change... 263 # 264 sub SetFingerprintsVectorType { 265 my($This, $Type) = @_; 266 267 croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type..."; 268 269 return $This; 270 } 271 272 # Set intial atom identifier type.. 273 # 274 sub SetAtomIdentifierType { 275 my($This, $IdentifierType) = @_; 276 277 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 278 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes."; 279 } 280 281 if ($This->{AtomIdentifierType}) { 282 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 283 } 284 285 $This->{AtomIdentifierType} = $IdentifierType; 286 287 # Initialize identifier type information... 288 $This->_InitializeAtomIdentifierTypeInformation(); 289 290 return $This; 291 } 292 293 # Set atom neighborhood radius... 294 # 295 sub SetNeighborhoodRadius { 296 my($This, $Value) = @_; 297 298 if (!TextUtil::IsInteger($Value)) { 299 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be an integer..."; 300 } 301 302 if ($Value < 0 ) { 303 croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid: It must be >= 0..."; 304 } 305 $This->{NeighborhoodRadius} = $Value; 306 307 return $This; 308 } 309 310 # Generate fingerprints description... 311 # 312 sub GetDescription { 313 my($This) = @_; 314 315 # Is description explicity set? 316 if (exists $This->{Description}) { 317 return $This->{Description}; 318 } 319 320 # Generate fingerprints description... 321 322 return "$This->{Type}:$This->{AtomIdentifierType}:Radius$This->{NeighborhoodRadius}"; 323 } 324 325 # Generate fingerprints... 326 # 327 # Methodology: 328 # . Assign initial atom identfiers to all non-hydrogen atoms in the molecule 329 # 330 # . Remove duplicates from the initial identifiers and add them to list corresponding 331 # to molecule fingerprint 332 # 333 # . For NeighborhoodRadius value of 0, just return the molecule fingerprint list 334 # 335 # . For each NeighborhoodRadius level 336 # . For each non-hydrogen CentralAtom at this NeighborhoodRadius level 337 # . For each non-hydrogen SuccessorNeighborAtom 338 # . Collect (BondOrder AtomIdentifier) pair of values corresponding to 339 # (CentralAtom SuccessorNeighborAtom) and add it to a list 340 # 341 # . Sort list containing (BondOrder AtomIdentifier) pairs first by BondOrder followed 342 # by AtomIdendifiers to make these values graph invariant 343 # . Generate a hash code for the values in the list 344 # . Assign hash code as new atom identifier at the current NeighborhoodRadius level 345 # . Save all atoms and bonds corresponding to the substructure involved in 346 # generating the hash code to be used for identifying structural duplicate hash code 347 # 348 # . Add the new identifier to the molecule fingerprint list making sure it's not a duplicate 349 # identifier 350 # 351 # Hash code atom identifier deduplication: 352 # . Track/remove the identifier generated at higher neighborhood radius level 353 # 354 # Structural atom identifier deduplication: 355 # . For equivalent atoms and bonds corresponding to substructure at a NeighborhoodRadius level, 356 # track/remove the atom identifier with largest value 357 # 358 # 359 sub GenerateFingerprints { 360 my($This) = @_; 361 362 # Cache appropriate molecule data... 363 $This->_SetupMoleculeDataCache(); 364 365 # Assign intial atom identifers... 366 if (!$This->_AssignInitialAtomIdentifiers()) { 367 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 368 return $This; 369 } 370 371 # Identify atom neighborhoods up to specified radius... 372 $This->_GetAtomNeighborhoods(); 373 374 # Assign atom identifiers to central atoms considering atom neighborhoods at each 375 # radius level... 376 $This->_AssignAtomIdentifiersToAtomNeighborhoods(); 377 378 # Remove duplicates identifiers... 379 $This->_RemoveDuplicateAtomIdentifiers(); 380 381 # Set final fingerprints... 382 $This->_SetFinalFingerprints(); 383 384 # Clear cached molecule data... 385 $This->_ClearMoleculeDataCache(); 386 387 return $This; 388 } 389 390 # Assign appropriate initial atom identifiers... 391 # 392 # Generation of initial identifier for a specific atom involves: 393 # . Values of the specified atom attributes are appended in a specific order to 394 # generate an initial atom identifier string 395 # . A 32 bit unsigned integer hash key, using TextUtil::HashCode function, is 396 # generated for the atom indentifier and assigned to the atom as initial 397 # atom identifier. 398 # 399 sub _AssignInitialAtomIdentifiers { 400 my($This) = @_; 401 my($Atom, $AtomID, $Radius, $SpecifiedAtomTypes, $IgnoreHydrogens, $AtomType, $InitialAtomTypeString, $InitialAtomIdentifier); 402 403 # Initialize atom identifiers... 404 $This->_InitializeAtomIdentifiers(); 405 406 # Set up atom types... 407 $IgnoreHydrogens = 1; 408 $SpecifiedAtomTypes = undef; 409 410 IDENTIFIERTYPE: { 411 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 412 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 413 last IDENTIFIERTYPE; 414 } 415 416 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 417 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 418 last IDENTIFIERTYPE; 419 } 420 421 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 422 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 423 last IDENTIFIERTYPE; 424 } 425 426 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 427 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 428 last IDENTIFIERTYPE; 429 } 430 431 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 432 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 433 last IDENTIFIERTYPE; 434 } 435 436 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 437 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 438 last IDENTIFIERTYPE; 439 } 440 441 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 442 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 443 last IDENTIFIERTYPE; 444 } 445 446 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 447 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 448 last IDENTIFIERTYPE; 449 } 450 451 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 452 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens); 453 last IDENTIFIERTYPE; 454 } 455 456 croak "Error: ${ClassName}->_AssignInitialAtomIdentifiers: Couldn't assign intial atom identifiers: InitialAtomIdentifierType $This->{AtomIdentifierType} is not supported..."; 457 } 458 459 # Assign atom types... 460 $SpecifiedAtomTypes->AssignAtomTypes(); 461 462 # Make sure atom types assignment is successful... 463 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 464 return undef; 465 } 466 467 # Assign atom identifiers at radius 0... 468 $Radius = 0; 469 for $Atom (@{$This->{Atoms}}) { 470 $AtomID = $Atom->GetID(); 471 472 $AtomType = $SpecifiedAtomTypes->GetAtomType($Atom); 473 $InitialAtomTypeString = $AtomType ? $AtomType : 'None'; 474 475 $InitialAtomIdentifier = TextUtil::HashCode($InitialAtomTypeString); 476 $This->{AtomIdentifiers}{$Radius}{$AtomID} = $InitialAtomIdentifier; 477 } 478 479 return $This; 480 } 481 482 # Initialize atom identifiers... 483 # 484 sub _InitializeAtomIdentifiers { 485 my($This) = @_; 486 my($Radius, $CurrentRadius); 487 488 $Radius = $This->{NeighborhoodRadius}; 489 490 %{$This->{AtomIdentifiers}} = (); 491 for $CurrentRadius (0 .. $Radius) { 492 # Atom idenfiers key and value correspond to AtomID and AtomIdentifier 493 %{$This->{AtomIdentifiers}{$CurrentRadius}} = (); 494 495 # Unique and strcuturally unique idenfiers key and value correspond to AtomIdentifier and AtomID 496 %{$This->{UniqueAtomIdentifiers}{$CurrentRadius}} = (); 497 %{$This->{UniqueAtomIdentifiersCount}{$CurrentRadius}} = (); 498 499 %{$This->{StructurallyUniqueAtomIdentifiers}{$CurrentRadius}} = (); 500 %{$This->{StructurallyUniqueAtomIdentifiersCount}{$CurrentRadius}} = (); 501 } 502 503 } 504 505 # Collect atom neighborhoods upto specified neighborhood radius... 506 # 507 sub _GetAtomNeighborhoods { 508 my($This) = @_; 509 my($Atom, $AtomID, $Radius, $CurrentRadius, $Molecule); 510 511 %{$This->{AtomNeighborhoods}} = (); 512 513 $Radius = $This->{NeighborhoodRadius}; 514 if ($Radius < 1) { 515 # At radius level 0, it's just the atoms... 516 return; 517 } 518 519 # Initialize neighborhood at different radii... 520 for $CurrentRadius (0 .. $Radius) { 521 %{$This->{AtomNeighborhoods}{$CurrentRadius}} = (); 522 } 523 524 $Molecule = $This->GetMolecule(); 525 526 # Collect available atom neighborhoods at different at different neighborhood level for each atom... 527 my($AtomsNeighborhoodWithSuccessorAtomsRef); 528 529 for $Atom (@{$This->{Atoms}}) { 530 $AtomID = $Atom->GetID(); 531 $CurrentRadius = 0; 532 for $AtomsNeighborhoodWithSuccessorAtomsRef ($Molecule->GetAtomNeighborhoodsWithSuccessorAtomsAndRadiusUpto($Atom, $Radius)) { 533 $This->{AtomNeighborhoods}{$CurrentRadius}{$AtomID} = $AtomsNeighborhoodWithSuccessorAtomsRef; 534 $CurrentRadius++; 535 } 536 } 537 return $This; 538 } 539 540 # Assign atom identifiers to central atom at each neighborhood radius level... 541 # 542 sub _AssignAtomIdentifiersToAtomNeighborhoods { 543 my($This) = @_; 544 my($Radius, $NextRadius, $Atom, $AtomID, $NeighborhoodAtom, $SuccessorAtom, $SuccessorAtomID, $NeighborhoodAtomSuccessorAtomsRef, $NeighborhoodAtomsWithSuccessorAtomsRef, $Bond, $BondOrder, $SuccessorAtomCount); 545 546 if ($This->{NeighborhoodRadius} < 1) { 547 return; 548 } 549 550 # Go over the atom neighborhoods at each radius upto specified radius and assign atom 551 # indentifiers using their connected successor atoms and their identifiers. 552 # 553 # For a neighborhood atom at a specified radius, the successor connected atoms correpond 554 # to next radius level and the last set of neighorhood atoms don't have any successor connected 555 # atoms. Additionally, radius level 0 just correspond to initial atom identifiers. 556 # 557 # So in order to process atom neighborhood upto specified radius level, the last atom neighborhood 558 # doesn't need to be processed: it gets processed at previous radius level as successor connected 559 # atoms. 560 # 561 RADIUS: for $Radius (0 .. ($This->{NeighborhoodRadius} - 1)) { 562 ATOM: for $Atom (@{$This->{Atoms}}) { 563 $AtomID = $Atom->GetID(); 564 565 # Are there any available atom neighborhoods at this radius? 566 if (!exists $This->{AtomNeighborhoods}{$Radius}{$AtomID}) { 567 next ATOM; 568 } 569 $NextRadius = $Radius + 1; 570 571 # Go over neighborhood atoms and their successor connected atoms at this radius and collect 572 # (BondOrder AtomIdentifier) values for bonded atom pairs. Additionally, keep track of atom and bonds 573 # for the neighorhoods to remove identifieres generated from structurally duplicate features. 574 # 575 my(%BondOrdersAndAtomIdentifiers); 576 577 %BondOrdersAndAtomIdentifiers = (); 578 $SuccessorAtomCount = 0; 579 580 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) { 581 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef}; 582 583 # Any connected successors for the NeighborhoodAtom? 584 if (!@{$NeighborhoodAtomSuccessorAtomsRef}) { 585 next NEIGHBORHOODS; 586 } 587 SUCCESSORATOM: for $SuccessorAtom (@{$NeighborhoodAtomSuccessorAtomsRef}) { 588 if ($SuccessorAtom->IsHydrogen()) { 589 # Skip successor hydrogen atom... 590 next SUCCESSORATOM; 591 } 592 $SuccessorAtomID = $SuccessorAtom->GetID(); 593 $SuccessorAtomCount++; 594 595 $Bond = $NeighborhoodAtom->GetBondToAtom($SuccessorAtom); 596 $BondOrder = $Bond->IsAromatic() ? "1.5" : $Bond->GetBondOrder(); 597 598 if (!exists $BondOrdersAndAtomIdentifiers{$BondOrder}) { 599 @{$BondOrdersAndAtomIdentifiers{$BondOrder}} = (); 600 } 601 push @{$BondOrdersAndAtomIdentifiers{$BondOrder}}, $This->{AtomIdentifiers}{$Radius}{$SuccessorAtomID}; 602 } 603 } 604 if (!$SuccessorAtomCount) { 605 next ATOM; 606 } 607 # Assign a new atom identifier at the NextRadius level... 608 $This->_AssignAtomIdentifierToAtomNeighborhood($AtomID, $Radius, \%BondOrdersAndAtomIdentifiers); 609 } 610 } 611 return $This; 612 } 613 614 # Generate and assign atom indentifier for AtomID using atom neighborhood at next radius level... 615 # 616 sub _AssignAtomIdentifierToAtomNeighborhood { 617 my($This, $AtomID, $Radius, $BondOrdersAndAtomIdentifiersRef) = @_; 618 my($NextRadius, $AtomIdentifier, $SuccessorAtomIdentifier, $BondOrder, $AtomIdentifierString, @AtomIndentifiersInfo); 619 620 $NextRadius = $Radius + 1; 621 622 @AtomIndentifiersInfo = (); 623 624 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID}; 625 push @AtomIndentifiersInfo, ($NextRadius, $AtomIdentifier); 626 627 # Sort out successor atom bond order and identifier pairs by bond order followed by atom identifiers 628 # in order to make the final atom identifier graph invariant... 629 # 630 for $BondOrder (sort { $a <=> $b } keys %{$BondOrdersAndAtomIdentifiersRef}) { 631 for $SuccessorAtomIdentifier (sort { $a <=> $b } @{$BondOrdersAndAtomIdentifiersRef->{$BondOrder}}) { 632 push @AtomIndentifiersInfo, ($BondOrder, $SuccessorAtomIdentifier); 633 } 634 } 635 $AtomIdentifierString = join("", @AtomIndentifiersInfo); 636 $AtomIdentifier = TextUtil::HashCode($AtomIdentifierString); 637 638 # Assign atom identifier to the atom at next radius level... 639 $This->{AtomIdentifiers}{$NextRadius}{$AtomID} = $AtomIdentifier; 640 641 return $This; 642 } 643 644 # Remove duplicates atom identifiers... 645 # 646 sub _RemoveDuplicateAtomIdentifiers { 647 my($This) = @_; 648 649 $This->_RemoveDuplicateIdentifiersByValue(); 650 $This->_RemoveStructurallyDuplicateIdenfiers(); 651 652 return $This; 653 } 654 655 # Remove duplicate identifiers at each radius level by just using their value... 656 # 657 sub _RemoveDuplicateIdentifiersByValue { 658 my($This) = @_; 659 my($Radius, $Atom, $AtomID, $AtomIdentifier); 660 661 for $Radius (0 .. $This->{NeighborhoodRadius}) { 662 ATOM: for $Atom (@{$This->{Atoms}}) { 663 $AtomID = $Atom->GetID(); 664 if (!exists $This->{AtomIdentifiers}{$Radius}{$AtomID}) { 665 next ATOM; 666 } 667 $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID}; 668 if (exists $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}) { 669 # It's a duplicate atom idenfier at this radius level... 670 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} += 1; 671 next ATOM; 672 } 673 $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID; 674 $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = 1; 675 } 676 } 677 return $This; 678 } 679 680 # Remove structurally duplicate identifiers at each radius level... 681 # 682 # Methodology: 683 # . For unquie atom identifiers at each radius level, assign complete structure features 684 # in terms all the bonds involved to generate that identifier 685 # . Use the complete structure features to remover atom identifiers which are 686 # structurally equivalent which can also be at earlier radii levels 687 # 688 # 689 sub _RemoveStructurallyDuplicateIdenfiers { 690 my($This) = @_; 691 my($Radius, $AtomID, $AtomIdentifier, $SimilarAtomIdentifierRadius, $SimilarAtomIdentifier); 692 693 # Setup structure features... 694 $This->_SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers(); 695 696 # Identify structurally unqiue identifiers... 697 for $Radius (0 .. $This->{NeighborhoodRadius}) { 698 ATOMIDENTIFIER: for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) { 699 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; 700 701 ($SimilarAtomIdentifierRadius, $SimilarAtomIdentifier) = $This->_FindStructurallySimilarAtomIdentifier($Radius, $AtomID, $AtomIdentifier); 702 if ($SimilarAtomIdentifier) { 703 # Current atom identifier is similar to an earlier structurally unique atom identifier... 704 $This->{StructurallyUniqueAtomIdentifiersCount}{$SimilarAtomIdentifierRadius}{$SimilarAtomIdentifier} += $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; 705 next ATOMIDENTIFIER; 706 } 707 $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID; 708 709 # Set structurally unique atom identifier count to the unique atom identifiers count... 710 $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; 711 } 712 } 713 return $This; 714 } 715 716 # Set final fingerpritns vector... 717 # 718 sub _SetFinalFingerprints { 719 my($This) = @_; 720 721 # Mark successful generation of fingerprints... 722 $This->{FingerprintsGenerated} = 1; 723 724 if ($This->{Type} =~ /^ExtendedConnectivity$/i) { 725 $This->_SetFinalExtendedConnectivityFingerprints(); 726 } 727 elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) { 728 $This->_SetFinalExtendedConnectivityCountFingerprints(); 729 } 730 elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { 731 $This->_SetFinalExtendedConnectivityBitsFingerprints(); 732 } 733 734 return $This; 735 } 736 737 # Set final extended connectivity fingerpritns vector... 738 # 739 sub _SetFinalExtendedConnectivityFingerprints { 740 my($This) = @_; 741 my($Radius, $AtomIdentifier, @AtomIdentifiers); 742 743 @AtomIdentifiers = (); 744 745 for $Radius (0 .. $This->{NeighborhoodRadius}) { 746 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 747 push @AtomIdentifiers, $AtomIdentifier; 748 } 749 } 750 # Add atom identifiers to fingerprint vector... 751 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiers); 752 753 return $This; 754 } 755 756 # Set final extended connectivity count fingerpritns vector... 757 # 758 sub _SetFinalExtendedConnectivityCountFingerprints { 759 my($This) = @_; 760 my($Radius, $AtomIdentifier, $AtomIdentifierCount, @AtomIdentifiers, @AtomIdentifiersCount); 761 762 @AtomIdentifiers = (); @AtomIdentifiersCount = (); 763 764 for $Radius (0 .. $This->{NeighborhoodRadius}) { 765 for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 766 $AtomIdentifierCount = $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier}; 767 push @AtomIdentifiers, $AtomIdentifier; 768 push @AtomIdentifiersCount, $AtomIdentifierCount; 769 } 770 } 771 # Add atom identifiers to fingerprint vector as value IDs... 772 $This->{FingerprintsVector}->AddValueIDs(\@AtomIdentifiers); 773 774 # Add atom identifiers to count to fingerprint vector as values... 775 $This->{FingerprintsVector}->AddValues(\@AtomIdentifiersCount); 776 777 return $This; 778 } 779 780 # Set final extended connectivity bits fingerpritns vector... 781 # 782 sub _SetFinalExtendedConnectivityBitsFingerprints { 783 my($This) = @_; 784 my($Radius, $AtomIdentifier, $FingerprintsBitVector, $Size, $SkipBitPosCheck, $AtomIdentifierBitPos, $SetBitNum); 785 786 $FingerprintsBitVector = $This->{FingerprintsBitVector}; 787 788 $Size = $This->{Size}; 789 790 $SkipBitPosCheck = 1; 791 792 for $Radius (0 .. $This->{NeighborhoodRadius}) { 793 for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 794 # Set random number seed... 795 if ($This->{UsePerlCoreRandom}) { 796 CORE::srand($AtomIdentifier); 797 } 798 else { 799 MathUtil::srandom($AtomIdentifier); 800 } 801 802 # Set bit position... 803 $AtomIdentifierBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size)); 804 $FingerprintsBitVector->SetBit($AtomIdentifierBitPos, $SkipBitPosCheck); 805 } 806 } 807 return $This; 808 } 809 810 811 # Identify structurally unique identifiers by comparing structure features involved in 812 # generating identifiear by comparing it agains all the previous structurally unique 813 # identifiers... 814 # 815 sub _FindStructurallySimilarAtomIdentifier { 816 my($This, $SpecifiedRadius, $SpecifiedAtomID, $SpecifiedAtomIdentifier) = @_; 817 my($Radius, $AtomID, $AtomIdentifier, $FeatureAtomCount, $FeatureAtomIDsRef, $SpecifiedFeatureAtomID, $SpecifiedFeatureAtomCount, $SpecifiedFeatureAtomIDsRef); 818 819 if ($SpecifiedRadius == 0) { 820 # After duplicate removal by value, all identifier at radius level 0 would be structurally unique... 821 return (undef, undef); 822 } 823 824 $SpecifiedFeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$SpecifiedRadius}{$SpecifiedAtomID}; 825 $SpecifiedFeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$SpecifiedRadius}{$SpecifiedAtomID}; 826 827 # No need to compare features at radius 0... 828 for $Radius (1 .. $SpecifiedRadius) { 829 ATOMIDENTIFIER: for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) { 830 $AtomID = $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; 831 832 $FeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID}; 833 $FeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID}; 834 835 if ($SpecifiedFeatureAtomCount != $FeatureAtomCount) { 836 # Couldn't be structurally equivalent... 837 next ATOMIDENTIFIER; 838 } 839 for $SpecifiedFeatureAtomID (keys % {$SpecifiedFeatureAtomIDsRef}) { 840 if (! exists $FeatureAtomIDsRef->{$SpecifiedFeatureAtomID}) { 841 # For structural equivalency, all atom in specified feature must also be present in a previously 842 # identified structurally unique structure feature... 843 next ATOMIDENTIFIER; 844 } 845 } 846 # Found structurally equivalent feature... 847 return ($Radius, $AtomIdentifier); 848 } 849 } 850 return (undef, undef); 851 } 852 853 # Setup structure features for atom IDs involved in unique atom identifiers at all 854 # radii level... 855 # 856 sub _SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers { 857 my($This) = @_; 858 my($Radius, $PreviousRadius, $Atom, $AtomID, $AtomIdentifier, $NeighborhoodAtomID, $NeighborhoodAtomsWithSuccessorAtomsRef, $NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef, %AtomIDs); 859 860 $This->_InitializeStructureFeatures(); 861 862 # Collect atom IDs involved in unique atom identifiers... 863 %AtomIDs = (); 864 for $Radius (0 .. $This->{NeighborhoodRadius}) { 865 for $AtomIdentifier (keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) { 866 $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}; 867 $AtomIDs{$AtomID} = $AtomID; 868 } 869 } 870 871 # Setup structure features... 872 for $Radius (0 .. $This->{NeighborhoodRadius}) { 873 for $AtomID (keys %AtomIDs) { 874 my($StructureFeatureAtomCount, %StructureFeatureAtomIDs); 875 876 $StructureFeatureAtomCount = 0; 877 %StructureFeatureAtomIDs = (); 878 879 # Get partial structure features for the atom at previous radius level... 880 $PreviousRadius = $Radius - 1; 881 if ($PreviousRadius >= 0) { 882 $StructureFeatureAtomCount += $This->{StructureFeatures}{AtomCount}{$PreviousRadius}{$AtomID}; 883 %StructureFeatureAtomIDs = %{$This->{StructureFeatures}{AtomIDs}{$PreviousRadius}{$AtomID}}; 884 } 885 886 # Get all neighborhood atom at this radius level... 887 if (exists($This->{AtomNeighborhoods}{$Radius}) && exists($This->{AtomNeighborhoods}{$Radius}{$AtomID})) { 888 NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) { 889 ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef}; 890 if ($NeighborhoodAtom->IsHydrogen()) { 891 next NEIGHBORHOODS; 892 } 893 $NeighborhoodAtomID = $NeighborhoodAtom->GetID(); 894 $StructureFeatureAtomCount++; 895 $StructureFeatureAtomIDs{$NeighborhoodAtomID} = $NeighborhoodAtomID; 896 } 897 } 898 899 # Assign structure features to atom at this radius level... 900 $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID} = $StructureFeatureAtomCount; 901 $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID} = \%StructureFeatureAtomIDs; 902 } 903 } 904 return $This; 905 } 906 907 # Intialize structure features at each radius level... 908 # 909 sub _InitializeStructureFeatures { 910 my($This) = @_; 911 my($Radius, $CurrentRadius, $Atom, $AtomID); 912 913 # Initialize all structure features... 914 915 %{$This->{StructureFeatures}} = (); 916 %{$This->{StructureFeatures}{AtomCount}} = (); 917 %{$This->{StructureFeatures}{AtomIDs}} = (); 918 919 $Radius = $This->{NeighborhoodRadius}; 920 for $CurrentRadius (0 .. $Radius) { 921 # Structure features for at specific radii accessed using atom IDs... 922 %{$This->{StructureFeatures}{AtomCount}{$CurrentRadius}} = (); 923 %{$This->{StructureFeatures}{AtomIDs}{$CurrentRadius}} = (); 924 } 925 return $This; 926 } 927 928 # Cache appropriate molecule data... 929 # 930 sub _SetupMoleculeDataCache { 931 my($This) = @_; 932 933 # Get all non-hydrogen atoms... 934 my($NegateAtomCheckMethod); 935 $NegateAtomCheckMethod = 1; 936 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 937 938 return $This; 939 } 940 941 # Clear cached molecule data... 942 # 943 sub _ClearMoleculeDataCache { 944 my($This) = @_; 945 946 @{$This->{Atoms}} = (); 947 948 return $This; 949 } 950 951 # Initialize atom indentifier type information... 952 # 953 # Current supported values: 954 # 955 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, 956 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 957 # 958 sub _InitializeAtomIdentifierTypeInformation { 959 my($This) = @_; 960 961 IDENTIFIERTYPE: { 962 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 963 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 964 last IDENTIFIERTYPE; 965 } 966 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 967 $This->_InitializeFunctionalClassAtomTypesInformation(); 968 last IDENTIFIERTYPE; 969 } 970 if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 971 # Nothing to do for now... 972 last IDENTIFIERTYPE; 973 } 974 carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 975 } 976 return $This; 977 } 978 979 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes 980 # class, to use for generating initial atom identifiers... 981 # 982 # Let: 983 # AS = Atom symbol corresponding to element symbol 984 # 985 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 986 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 987 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 988 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 989 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 990 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 991 # H<n> = Number of implicit and explicit hydrogens for atom 992 # Ar = Aromatic annotation indicating whether atom is aromatic 993 # RA = Ring atom annotation indicating whether atom is a ring 994 # FC<+n/-n> = Formal charge assigned to atom 995 # MN<n> = Mass number indicating isotope other than most abundant isotope 996 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 997 # 998 # Then: 999 # 1000 # Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 1001 # 1002 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 1003 # 1004 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are 1005 # optional. 1006 # 1007 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]: 1008 # 1009 # AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n> 1010 # 1011 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words 1012 # are also allowed: 1013 # 1014 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors 1015 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms 1016 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms 1017 # SB : NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms 1018 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms 1019 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms 1020 # H : NumOfImplicitAndExplicitHydrogens 1021 # Ar : Aromatic 1022 # RA : RingAtom 1023 # FC : FormalCharge 1024 # MN : MassNumber 1025 # SM : SpinMultiplicity 1026 # 1027 sub _InitializeAtomicInvariantsAtomTypesInformation { 1028 my($This) = @_; 1029 1030 # Default atomic invariants to use for generating initial atom identifiers are: AS, X, BO, LBO, H, FC 1031 # 1032 @{$This->{AtomicInvariantsToUse}} = (); 1033 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN'); 1034 1035 return $This; 1036 } 1037 1038 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 1039 # class, to use for generating initial atom identifiers... 1040 # 1041 # Let: 1042 # HBD: HydrogenBondDonor 1043 # HBA: HydrogenBondAcceptor 1044 # PI : PositivelyIonizable 1045 # NI : NegativelyIonizable 1046 # Ar : Aromatic 1047 # Hal : Halogen 1048 # H : Hydrophobic 1049 # RA : RingAtom 1050 # CA : ChainAtom 1051 # 1052 # Then: 1053 # 1054 # Functiononal class atom type specification for an atom corresponds to: 1055 # 1056 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 1057 # 1058 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 1059 # 1060 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 1061 # 1062 # HydrogenBondDonor: NH, NH2, OH 1063 # HydrogenBondAcceptor: N[!H], O 1064 # PositivelyIonizable: +, NH2 1065 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 1066 # 1067 sub _InitializeFunctionalClassAtomTypesInformation { 1068 my($This) = @_; 1069 1070 # Default functional class atom typess to use for generating initial atom identifiers 1071 # are: HBD, HBA, PI, NI, Ar, Hal 1072 # 1073 @{$This->{FunctionalClassesToUse}} = (); 1074 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 1075 1076 return $This; 1077 } 1078 1079 # Set atomic invariants to use for generation of intial atom indentifiers... 1080 # 1081 sub SetAtomicInvariantsToUse { 1082 my($This, @Values) = @_; 1083 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 1084 1085 if (!@Values) { 1086 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 1087 return; 1088 } 1089 1090 if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) { 1091 carp "Warning: ${ClassName}->SetFunctionalAtomTypesToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 1092 return; 1093 } 1094 1095 $FirstValue = $Values[0]; 1096 $TypeOfFirstValue = ref $FirstValue; 1097 1098 @SpecifiedAtomicInvariants = (); 1099 @AtomicInvariantsToUse = (); 1100 1101 if ($TypeOfFirstValue =~ /^ARRAY/) { 1102 push @SpecifiedAtomicInvariants, @{$FirstValue}; 1103 } 1104 else { 1105 push @SpecifiedAtomicInvariants, @Values; 1106 } 1107 1108 # Make sure specified AtomicInvariants are valid... 1109 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 1110 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 1111 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 1112 } 1113 $AtomicInvariant = $SpecifiedAtomicInvariant; 1114 push @AtomicInvariantsToUse, $AtomicInvariant; 1115 } 1116 1117 # Set atomic invariants to use... 1118 @{$This->{AtomicInvariantsToUse}} = (); 1119 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 1120 1121 return $This; 1122 } 1123 1124 # Set functional classes to use for generation of intial atom indentifiers... 1125 # 1126 sub SetFunctionalClassesToUse { 1127 my($This, @Values) = @_; 1128 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 1129 1130 if (!@Values) { 1131 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 1132 return; 1133 } 1134 1135 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 1136 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 1137 return; 1138 } 1139 1140 $FirstValue = $Values[0]; 1141 $TypeOfFirstValue = ref $FirstValue; 1142 1143 @SpecifiedFunctionalClasses = (); 1144 @FunctionalClassesToUse = (); 1145 1146 if ($TypeOfFirstValue =~ /^ARRAY/) { 1147 push @SpecifiedFunctionalClasses, @{$FirstValue}; 1148 } 1149 else { 1150 push @SpecifiedFunctionalClasses, @Values; 1151 } 1152 1153 # Make sure specified FunctionalClasses are valid... 1154 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 1155 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 1156 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 1157 } 1158 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 1159 } 1160 1161 # Set functional classes to use... 1162 @{$This->{FunctionalClassesToUse}} = (); 1163 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 1164 1165 return $This; 1166 } 1167 1168 # Return a string containg data for ExtendedConnectivityFingerprints object... 1169 sub StringifyExtendedConnectivityFingerprints { 1170 my($This) = @_; 1171 my($ExtendedConnectivityFingerprintsString); 1172 1173 $ExtendedConnectivityFingerprintsString = "InitialAtomIdentifierType: $This->{AtomIdentifierType}; NeighborhoodRadius: $This->{NeighborhoodRadius}"; 1174 1175 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 1176 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 1177 1178 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 1179 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 1180 1181 for $AtomicInvariant (@AtomicInvariantsOrder) { 1182 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 1183 } 1184 1185 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 1186 $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 1187 $ExtendedConnectivityFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 1188 } 1189 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 1190 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 1191 1192 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 1193 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 1194 1195 for $FunctionalClass (@FunctionalClassesOrder) { 1196 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 1197 } 1198 1199 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 1200 $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 1201 $ExtendedConnectivityFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 1202 } 1203 1204 if ($This->{Type} =~ /^ExtendedConnectivityBits$/i) { 1205 # Size... 1206 $ExtendedConnectivityFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}"; 1207 1208 # Fingerprint bit density and num of bits set... 1209 my($NumOfSetBits, $BitDensity); 1210 $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits(); 1211 $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity(); 1212 $ExtendedConnectivityFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity"; 1213 1214 $ExtendedConnectivityFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >"; 1215 } 1216 else { 1217 # Number of identifiers... 1218 $ExtendedConnectivityFingerprintsString .= "; NumOfIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues(); 1219 1220 # FingerprintsVector... 1221 $ExtendedConnectivityFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 1222 } 1223 1224 return $ExtendedConnectivityFingerprintsString; 1225 } 1226