1 package Fingerprints::AtomTypesFingerprints; 2 # 3 # File: AtomTypesFingerprints.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Text::ParseWords; 30 use TextUtil (); 31 use FileUtil (); 32 use MathUtil (); 33 use Fingerprints::Fingerprints; 34 use Molecule; 35 use AtomTypes::AtomicInvariantsAtomTypes; 36 use AtomTypes::DREIDINGAtomTypes; 37 use AtomTypes::EStateAtomTypes; 38 use AtomTypes::FunctionalClassAtomTypes; 39 use AtomTypes::MMFF94AtomTypes; 40 use AtomTypes::SLogPAtomTypes; 41 use AtomTypes::SYBYLAtomTypes; 42 use AtomTypes::TPSAAtomTypes; 43 use AtomTypes::UFFAtomTypes; 44 45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 46 47 @ISA = qw(Fingerprints::Fingerprints Exporter); 48 @EXPORT = qw(); 49 @EXPORT_OK = qw(); 50 51 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 52 53 # Setup class variables... 54 my($ClassName); 55 _InitializeClass(); 56 57 # Overload Perl functions... 58 use overload '""' => 'StringifyAtomTypesFingerprints'; 59 60 # Class constructor... 61 sub new { 62 my($Class, %NamesAndValues) = @_; 63 64 # Initialize object... 65 my $This = $Class->SUPER::new(); 66 bless $This, ref($Class) || $Class; 67 $This->_InitializeAtomTypesFingerprints(); 68 69 $This->_InitializeAtomTypesFingerprintsProperties(%NamesAndValues); 70 71 return $This; 72 } 73 74 # Initialize object data... 75 # 76 sub _InitializeAtomTypesFingerprints { 77 my($This) = @_; 78 79 # Type of atom type fingerprint to generate: 80 # 81 # AtomTypesCount - A vector containing count of atom types 82 # AtomTypesBits - A bit vector indicating presence/absence of atom types 83 # 84 $This->{Type} = ''; 85 86 # AtomTypes to use for generating fingerprints... 87 # 88 # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, 89 # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, 90 # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 91 # 92 $This->{AtomIdentifierType} = ''; 93 94 # AtomTypesSetToUse for AtomTypesCount: 95 # 96 # ArbitrarySize - Corrresponds to only AtomTypes atom types detected in molecule 97 # FixedSize - Corresponds to fixed number of atom types previously defined for 98 # specific atom types. 99 # 100 # The default AtomTypesSetToUse value for AtomTypesCount fingerprints type: ArbitrarySize. 101 # 102 # Possible values: ArbitrarySize or FixedSize. However, for AtomTypesBits fingerprints type, only FixedSize 103 # value is allowed. 104 # 105 $This->{AtomTypesSetToUse} = ''; 106 107 # By default, hydrogens are ignored during fingerprint generation... 108 $This->{IgnoreHydrogens} = 1; 109 110 # Assigned AtomTypes atom types... 111 %{$This->{AtomTypes}} = (); 112 113 # AtomTypes atom types count for generating atom types count and bits fingerprints... 114 %{$This->{AtomTypesCount}} = (); 115 } 116 117 # Initialize class ... 118 sub _InitializeClass { 119 #Class name... 120 $ClassName = __PACKAGE__; 121 } 122 123 # Initialize object properties.... 124 sub _InitializeAtomTypesFingerprintsProperties { 125 my($This, %NamesAndValues) = @_; 126 127 my($Name, $Value, $MethodName); 128 while (($Name, $Value) = each %NamesAndValues) { 129 $MethodName = "Set${Name}"; 130 $This->$MethodName($Value); 131 } 132 133 # Make sure molecule object was specified... 134 if (!exists $NamesAndValues{Molecule}) { 135 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 136 } 137 138 # Make sure type and identifier type were specified... 139 if (!exists $NamesAndValues{Type}) { 140 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type..."; 141 } 142 if (!exists $NamesAndValues{AtomIdentifierType}) { 143 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType..."; 144 } 145 146 if ($This->{Type} =~ /^AtomTypesCount$/i) { 147 $This->_InitializeAtomTypesCount(); 148 } 149 elsif ($This->{Type} =~ /^AtomTypesBits$/i) { 150 $This->_InitializeAtomTypesBits(); 151 } 152 else { 153 croak "Error: ${ClassName}->_InitializeAtomTypesFingerprintsProperties: Unknown AtomTypes fingerprints type: $This->{Type}; Supported fingerprints types: AtomTypesCount or AtomTypesBits..."; 154 } 155 156 return $This; 157 } 158 159 # Initialize atom type counts... 160 # 161 sub _InitializeAtomTypesCount { 162 my($This) = @_; 163 164 # Set default AtomTypesSetToUse... 165 if (!$This->{AtomTypesSetToUse}) { 166 $This->{AtomTypesSetToUse} = ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) ? 'FixedSize' : 'ArbitrarySize'; 167 } 168 169 # Make sure AtomTypesSetToUse value is okay... 170 $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse}); 171 172 # Vector type and type of values... 173 $This->{VectorType} = 'FingerprintsVector'; 174 175 if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 176 $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; 177 } 178 else { 179 $This->{FingerprintsVectorType} = 'NumericalValues'; 180 } 181 182 $This->_InitializeFingerprintsVector(); 183 184 return $This; 185 } 186 187 # Initialize atom types bits... 188 # 189 sub _InitializeAtomTypesBits { 190 my($This) = @_; 191 192 # Set default AtomTypesSetToUse... 193 $This->{AtomTypesSetToUse} = 'FixedSize'; 194 195 # Make sure AtomTypesSetToUse value is okay... 196 $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse}); 197 198 # Vector type... 199 $This->{VectorType} = 'FingerprintsBitVector'; 200 201 # Vector size... 202 $This->{Size} = $This->_GetFixedSizeAtomTypesSetSize(); 203 204 $This->_InitializeFingerprintsBitVector(); 205 206 return $This; 207 } 208 209 # Set type... 210 # 211 sub SetType { 212 my($This, $Type) = @_; 213 214 if ($This->{Type}) { 215 croak "Error: ${ClassName}->SetType: Can't change type: It's already set..."; 216 } 217 218 if ($Type =~ /^AtomTypesCount$/i) { 219 $This->{Type} = 'AtomTypesCount';; 220 } 221 elsif ($Type =~ /^AtomTypesBits$/i) { 222 $This->{Type} = 'AtomTypesBits';; 223 } 224 else { 225 croak "Error: ${ClassName}->SetType: Unknown AtomTypes fingerprints type: $Type; Supported fingerprints types: AtomTypesCount or AtomTypesBit..."; 226 } 227 return $This; 228 } 229 230 # Disable set size method... 231 # 232 sub SetSize { 233 my($This, $Type) = @_; 234 235 croak "Error: ${ClassName}->SetSize: Can't change size: It's not allowed..."; 236 } 237 238 # Set atom types set to use... 239 # 240 sub SetAtomTypesSetToUse { 241 my($This, $Value) = @_; 242 243 if ($This->{AtomTypesSetToUse}) { 244 croak "Error: ${ClassName}->SetAtomTypesSetToUse: Can't change size: It's already set..."; 245 } 246 247 $This->_ValidateAtomTypesSetToUse($Value); 248 249 $This->{AtomTypesSetToUse} = $Value; 250 251 return $This; 252 } 253 254 # Validate AtomTypesSetToUse value... 255 # 256 sub _ValidateAtomTypesSetToUse { 257 my($This, $Value) = @_; 258 259 if ($Value !~ /^(ArbitrarySize|FixedSize)/i) { 260 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Unknown AtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; 261 } 262 263 if ($Value =~ /^ArbitrarySize$/i && $This->{Type} =~ /^AtomTypesBits$/i) { 264 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomTypesBits fingerprints..."; 265 } 266 267 if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 268 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomicInvariantsAtomTypes fingerprints..."; 269 } 270 271 if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 272 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for FunctionalClassAtomTypes fingerprints..."; 273 } 274 275 if ($Value =~ /^ArbitrarySize$/i && $This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 276 croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for TPSAAtomTypes fingerprints..."; 277 } 278 279 return $This; 280 } 281 282 # Set atom identifier type... 283 # 284 sub SetAtomIdentifierType { 285 my($This, $IdentifierType) = @_; 286 287 if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 288 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes."; 289 } 290 291 if ($This->{AtomIdentifierType}) { 292 croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type: It's already set..."; 293 } 294 295 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 296 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for AtomicInvariantsAtomTypes fingerprints..."; 297 } 298 299 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 300 croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for FunctionalClassAtomTypes fingerprints..."; 301 } 302 303 $This->{AtomIdentifierType} = $IdentifierType; 304 305 # Initialize atom identifier type information... 306 $This->_InitializeAtomIdentifierTypeInformation(); 307 308 return $This; 309 } 310 311 # Generate fingerprints description... 312 # 313 sub GetDescription { 314 my($This) = @_; 315 316 # Is description explicity set? 317 if (exists $This->{Description}) { 318 return $This->{Description}; 319 } 320 321 # Generate fingerprints description... 322 323 return "$This->{Type}:$This->{AtomIdentifierType}:$This->{AtomTypesSetToUse}"; 324 } 325 326 # Generate atom types fingerprints... 327 # 328 # The current release of MayaChemTools supports generation of two types of AtomTypes 329 # fingerprints corresponding to non-hydrogen and/or hydrogen atoms: 330 # 331 # AtomTypesCount - A vector containing count of atom types 332 # AtomTypesBits - A bit vector indicating presence/absence of atom types 333 # 334 # For AtomTypesCount fingerprints, two types of atom types set size is allowed: 335 # 336 # ArbitrarySize - Corrresponds to only atom types detected in molecule 337 # FixedSize - Corresponds to fixed number of atom types previously defined 338 # 339 # For AtomTypesBits fingeprints, only FixedSize atom type set is allowed. 340 # 341 # The fixed size atom type set size used during generation of fingerprints corresponding 342 # to FixedSize value of AtomTypesSetToUse contains all possible atom types in datafiles 343 # distributed with MayaChemTools release for each supported type. 344 # 345 # Combination of Type and AtomTypesSetToUse allow generation of 21 different types of 346 # AtomTypes fingerprints: 347 # 348 # Type AtomIdentifierType AtomTypesSetToUse 349 # 350 # AtomTypesCount AtomicInvariantsAtomTypes ArbitrarySize 351 # 352 # AtomTypesCount DREIDINGAtomTypes ArbitrarySize 353 # AtomTypesCount DREIDINGAtomTypes FixedSize 354 # AtomTypesBits DREIDINGAtomTypes FixedSize 355 # 356 # AtomTypesCount EStateAtomTypes ArbitrarySize 357 # AtomTypesCount EStateAtomTypes FixedSize 358 # AtomTypesBits EStateAtomTypes FixedSize 359 # 360 # AtomTypesCount FunctionalClassAtomTypes ArbitrarySize 361 # 362 # AtomTypesCount MMFF94AtomTypes ArbitrarySize 363 # AtomTypesCount MMFF94AtomTypes FixedSize 364 # AtomTypesBits MMFF94AtomTypes FixedSize 365 # 366 # AtomTypesCount SLogPAtomTypes ArbitrarySize 367 # AtomTypesCount SLogPAtomTypes FixedSize 368 # AtomTypesBits SLogPAtomTypes FixedSize 369 # 370 # AtomTypesCount SYBYLAtomTypes ArbitrarySize 371 # AtomTypesCount SYBYLAtomTypes FixedSize 372 # AtomTypesBits SYBYLAtomTypes FixedSize 373 # 374 # AtomTypesCount TPSAAtomTypes FixedSize 375 # AtomTypesBits TPSAAtomTypes FixedSize 376 # 377 # AtomTypesCount UFFAtomTypes ArbitrarySize 378 # AtomTypesCount UFFAtomTypes FixedSize 379 # AtomTypesBits UFFAtomTypes FixedSize 380 # 381 sub GenerateFingerprints { 382 my($This) = @_; 383 384 # Cache appropriate molecule data... 385 $This->_SetupMoleculeDataCache(); 386 387 # Check and assign appropriate atom types... 388 if (!$This->_AssignAtomTypes()) { 389 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms..."; 390 return $This; 391 } 392 393 # Count atom types... 394 $This->_CountAtomTypes(); 395 396 # Set final fingerprints... 397 $This->_SetFinalFingerprints(); 398 399 # Clear cached molecule data... 400 $This->_ClearMoleculeDataCache(); 401 402 return $This; 403 } 404 405 # Assign appropriate atom types... 406 # 407 sub _AssignAtomTypes { 408 my($This) = @_; 409 my($SpecifiedAtomTypes, $Atom, $AtomID); 410 411 %{$This->{AtomTypes}} = (); 412 $SpecifiedAtomTypes = undef; 413 414 IDENTIFIERTYPE: { 415 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 416 $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse}); 417 last IDENTIFIERTYPE; 418 } 419 420 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 421 $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 422 last IDENTIFIERTYPE; 423 } 424 425 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 426 $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 427 last IDENTIFIERTYPE; 428 } 429 430 if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 431 $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse}); 432 last IDENTIFIERTYPE; 433 } 434 435 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 436 $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 437 last IDENTIFIERTYPE; 438 } 439 440 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 441 $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 442 last IDENTIFIERTYPE; 443 } 444 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 445 $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 446 last IDENTIFIERTYPE; 447 } 448 449 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 450 $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0); 451 last IDENTIFIERTYPE; 452 } 453 454 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 455 $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}); 456 last IDENTIFIERTYPE; 457 } 458 459 croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 460 } 461 462 # Assign atom types... 463 $SpecifiedAtomTypes->AssignAtomTypes(); 464 465 # Make sure atom types assignment is successful... 466 if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) { 467 return undef; 468 } 469 470 # Collect assigned atom types... 471 for $Atom (@{$This->{Atoms}}) { 472 $AtomID = $Atom->GetID(); 473 $This->{AtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom); 474 } 475 476 return $This; 477 } 478 479 # Count atom types... 480 # 481 sub _CountAtomTypes { 482 my($This) = @_; 483 my($Atom, $AtomID, $AtomType); 484 485 %{$This->{AtomTypesCount}} = (); 486 487 for $Atom (@{$This->{Atoms}}) { 488 $AtomID = $Atom->GetID(); 489 $AtomType = $This->{AtomTypes}{$AtomID}; 490 491 if (!exists $This->{AtomTypesCount}{$AtomType}) { 492 $This->{AtomTypesCount}{$AtomType} = 0; 493 } 494 495 $This->{AtomTypesCount}{$AtomType} += 1; 496 } 497 return $This; 498 } 499 500 # Set final fingerprints... 501 # 502 sub _SetFinalFingerprints { 503 my($This) = @_; 504 505 # Mark successful generation of fingerprints... 506 $This->{FingerprintsGenerated} = 1; 507 508 if ($This->{Type} =~ /^AtomTypesCount$/i) { 509 $This->_SetFinalAtomTypesCountFingerprints(); 510 } 511 elsif ($This->{Type} =~ /^AtomTypesBits$/i) { 512 $This->_SetFinalAtomTypesBitsFingerprints(); 513 } 514 return $This; 515 } 516 517 # Set final final fingerpritns for atom types count... 518 # 519 sub _SetFinalAtomTypesCountFingerprints { 520 my($This) = @_; 521 my($AtomType, @Values, @IDs); 522 523 @Values = (); 524 @IDs = (); 525 526 if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) { 527 for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) { 528 push @IDs, $AtomType; 529 push @Values, exists($This->{AtomTypesCount}{$AtomType}) ? $This->{AtomTypesCount}{$AtomType} : 0; 530 } 531 } 532 else { 533 for $AtomType (sort keys %{$This->{AtomTypesCount}}) { 534 push @IDs, $AtomType; 535 push @Values, $This->{AtomTypesCount}{$AtomType}; 536 } 537 } 538 539 # Add IDs and values to fingerprint vector... 540 if (@IDs) { 541 $This->{FingerprintsVector}->AddValueIDs(\@IDs); 542 } 543 $This->{FingerprintsVector}->AddValues(\@Values); 544 545 return $This; 546 } 547 548 # Set final final fingerpritns for atom types count bits... 549 # 550 sub _SetFinalAtomTypesBitsFingerprints { 551 my($This) = @_; 552 my($AtomType, $SkipPosCheck, $AtomTypeNum, $AtomTypeBitIndex); 553 554 $SkipPosCheck = 1; 555 $AtomTypeNum = 0; 556 557 ATOMTYPE: for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) { 558 $AtomTypeNum++; 559 if (!(exists($This->{AtomTypesCount}{$AtomType}) && $This->{AtomTypesCount}{$AtomType})) { 560 next ATOMTYPE; 561 } 562 $AtomTypeBitIndex = $AtomTypeNum - 1; 563 $This->{FingerprintsBitVector}->SetBit($AtomTypeBitIndex, $SkipPosCheck); 564 } 565 566 return $This; 567 } 568 569 # Cache appropriate molecule data... 570 # 571 sub _SetupMoleculeDataCache { 572 my($This) = @_; 573 574 if ($This->{IgnoreHydrogens}) { 575 # Get all non-hydrogen atoms... 576 my($NegateAtomCheckMethod); 577 $NegateAtomCheckMethod = 1; 578 579 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 580 } 581 else { 582 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 583 } 584 585 return $This; 586 } 587 588 # Clear cached molecule data... 589 # 590 sub _ClearMoleculeDataCache { 591 my($This) = @_; 592 593 @{$This->{Atoms}} = (); 594 595 return $This; 596 } 597 598 # Get fixed size atom types set size... 599 # 600 sub _GetFixedSizeAtomTypesSetSize { 601 my($This) = @_; 602 my($Size); 603 604 $Size = 0; 605 606 IDENTIFIERTYPE: { 607 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 608 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes()} : scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes()}; 609 last IDENTIFIERTYPE; 610 } 611 612 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 613 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()} : scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes()}; 614 last IDENTIFIERTYPE; 615 } 616 617 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 618 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes()} : scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes()}; 619 last IDENTIFIERTYPE; 620 } 621 622 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 623 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes()} : scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes()}; 624 last IDENTIFIERTYPE; 625 } 626 627 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 628 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes()} : scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes()}; 629 last IDENTIFIERTYPE; 630 } 631 632 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 633 $Size = scalar @{AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes()}; 634 last IDENTIFIERTYPE; 635 } 636 637 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 638 $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes()} : scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes()}; 639 last IDENTIFIERTYPE; 640 } 641 642 croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSetSize: Atom types set size for atom indentifier type, $This->{AtomIdentifierType}, is not available..."; 643 } 644 645 return $Size; 646 } 647 648 # Get fixed size atom types set... 649 # 650 sub _GetFixedSizeAtomTypesSet { 651 my($This) = @_; 652 my($AtomTypesRef); 653 654 $AtomTypesRef = undef; 655 656 IDENTIFIERTYPE: { 657 if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) { 658 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes() : AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes(); 659 last IDENTIFIERTYPE; 660 } 661 662 if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) { 663 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes() : AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes(); 664 last IDENTIFIERTYPE; 665 } 666 667 if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) { 668 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes() : AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes(); 669 last IDENTIFIERTYPE; 670 } 671 672 if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) { 673 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes() : AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes(); 674 last IDENTIFIERTYPE; 675 } 676 677 if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) { 678 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes() : AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes(); 679 last IDENTIFIERTYPE; 680 } 681 682 if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) { 683 $AtomTypesRef = AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes(); 684 last IDENTIFIERTYPE; 685 } 686 687 if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) { 688 $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes() : AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes(); 689 last IDENTIFIERTYPE; 690 } 691 692 croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSet: Atom types set for atom indentifier type, $This->{AtomIdentifierType}, is not available..."; 693 } 694 695 return $AtomTypesRef; 696 } 697 698 # Initialize atom indentifier type information... 699 # 700 # Current supported values: 701 # 702 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, 703 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes 704 # 705 sub _InitializeAtomIdentifierTypeInformation { 706 my($This) = @_; 707 708 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 709 $This->_InitializeAtomicInvariantsAtomTypesInformation(); 710 } 711 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 712 $This->_InitializeFunctionalClassAtomTypesInformation(); 713 } 714 elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 715 # Nothing to do for now... 716 } 717 else { 718 croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}..."; 719 } 720 721 return $This; 722 } 723 724 # Initialize atomic invariants atom types to use for generating atom IDs in atom pairs... 725 # 726 # Let: 727 # AS = Atom symbol corresponding to element symbol 728 # 729 # X<n> = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 730 # BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom 731 # LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom 732 # SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 733 # DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 734 # TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom 735 # H<n> = Number of implicit and explicit hydrogens for atom 736 # Ar = Aromatic annotation indicating whether atom is aromatic 737 # RA = Ring atom annotation indicating whether atom is a ring 738 # FC<+n/-n> = Formal charge assigned to atom 739 # MN<n> = Mass number indicating isotope other than most abundant isotope 740 # SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet) 741 # 742 # AtomTypeIDx = Atomic invariants atom type for atom x 743 # AtomTypeIDy = Atomic invariants atom type for atom y 744 # Dn = Topological distance between atom x and y 745 # 746 # Then: 747 # 748 # AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to: 749 # 750 # AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n> 751 # 752 # Except for AS which is a required atomic invariant atom types AtomIDs, all other atomic invariants are 753 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>. 754 # AtomID specification doesn't include atomic invariants with zero or undefined values. 755 # 756 # Examples of AtomIDs: 757 # 758 # O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge 759 # O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge 760 # O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon 761 # O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom 762 # 763 # C.X2.BO3.H1.Ar - Aromatic carbon 764 # 765 sub _InitializeAtomicInvariantsAtomTypesInformation { 766 my($This) = @_; 767 768 # Default atomic invariants to use for generating atom pair atom IDs: AS, X, BO, H, FC 769 # 770 @{$This->{AtomicInvariantsToUse}} = (); 771 @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC'); 772 773 return $This; 774 } 775 776 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes 777 # class, to use for generating atom identifiers... 778 # 779 # Let: 780 # HBD: HydrogenBondDonor 781 # HBA: HydrogenBondAcceptor 782 # PI : PositivelyIonizable 783 # NI : NegativelyIonizable 784 # Ar : Aromatic 785 # Hal : Halogen 786 # H : Hydrophobic 787 # RA : RingAtom 788 # CA : ChainAtom 789 # 790 # Then: 791 # 792 # Functiononal class atom type specification for an atom corresponds to: 793 # 794 # Ar.CA.H.HBA.HBD.Hal.NI.PI.RA 795 # 796 # Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal 797 # 798 # FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]: 799 # 800 # HydrogenBondDonor: NH, NH2, OH 801 # HydrogenBondAcceptor: N[!H], O 802 # PositivelyIonizable: +, NH2 803 # NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH 804 # 805 sub _InitializeFunctionalClassAtomTypesInformation { 806 my($This) = @_; 807 808 # Default functional class atom typess to use for generating atom identifiers 809 # are: HBD, HBA, PI, NI, Ar, Hal 810 # 811 @{$This->{FunctionalClassesToUse}} = (); 812 @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal'); 813 814 return $This; 815 } 816 817 # Set atomic invariants to use for atom IDs... 818 # 819 sub SetAtomicInvariantsToUse { 820 my($This, @Values) = @_; 821 my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse); 822 823 if (!@Values) { 824 carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified..."; 825 return; 826 } 827 828 $FirstValue = $Values[0]; 829 $TypeOfFirstValue = ref $FirstValue; 830 831 @SpecifiedAtomicInvariants = (); 832 @AtomicInvariantsToUse = (); 833 834 if ($TypeOfFirstValue =~ /^ARRAY/) { 835 push @SpecifiedAtomicInvariants, @{$FirstValue}; 836 } 837 else { 838 push @SpecifiedAtomicInvariants, @Values; 839 } 840 841 # Make sure specified AtomicInvariants are valid... 842 for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) { 843 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) { 844 croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n "; 845 } 846 $AtomicInvariant = $SpecifiedAtomicInvariant; 847 push @AtomicInvariantsToUse, $AtomicInvariant; 848 } 849 850 # Set atomic invariants to use... 851 @{$This->{AtomicInvariantsToUse}} = (); 852 push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse; 853 854 return $This; 855 } 856 857 # Set functional classes to use for generation of intial atom indentifiers... 858 # 859 sub SetFunctionalClassesToUse { 860 my($This, @Values) = @_; 861 my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse); 862 863 if (!@Values) { 864 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified..."; 865 return; 866 } 867 868 if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) { 869 carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}..."; 870 return; 871 } 872 873 $FirstValue = $Values[0]; 874 $TypeOfFirstValue = ref $FirstValue; 875 876 @SpecifiedFunctionalClasses = (); 877 @FunctionalClassesToUse = (); 878 879 if ($TypeOfFirstValue =~ /^ARRAY/) { 880 push @SpecifiedFunctionalClasses, @{$FirstValue}; 881 } 882 else { 883 push @SpecifiedFunctionalClasses, @Values; 884 } 885 886 # Make sure specified FunctionalClasses are valid... 887 for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) { 888 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) { 889 croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n "; 890 } 891 push @FunctionalClassesToUse, $SpecifiedFunctionalClass; 892 } 893 894 # Set functional classes to use... 895 @{$This->{FunctionalClassesToUse}} = (); 896 push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse; 897 898 return $This; 899 } 900 901 # Return a string containg data for AtomTypesFingerprints object... 902 sub StringifyAtomTypesFingerprints { 903 my($This) = @_; 904 my($FingerprintsString, $IgnoreHydrogens); 905 906 $FingerprintsString = "Type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; AtomTypesSetToUse: $This->{AtomTypesSetToUse}"; 907 908 if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 909 my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants); 910 911 @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder(); 912 %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants(); 913 914 for $AtomicInvariant (@AtomicInvariantsOrder) { 915 push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}"; 916 } 917 918 $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">"; 919 $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">"; 920 $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">"; 921 } 922 elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 923 my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses); 924 925 @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder(); 926 %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses(); 927 928 for $FunctionalClass (@FunctionalClassesOrder) { 929 push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}"; 930 } 931 932 $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">"; 933 $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">"; 934 $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">"; 935 } 936 937 938 $IgnoreHydrogens = $This->{IgnoreHydrogens} ? "Yes" : "No"; 939 $FingerprintsString .= "; IgnoreHydrogens: $IgnoreHydrogens"; 940 941 if ($This->{Type} =~ /^AtomTypesCount$/i) { 942 $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 943 } 944 elsif ($This->{Type} =~ /^AtomTypesBits$/i) { 945 $FingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >"; 946 } 947 948 return $FingerprintsString; 949 } 950