MayaChemTools

   1 package Fingerprints::AtomNeighborhoodsFingerprints;
   2 #
   3 # File: AtomNeighborhoodsFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Fingerprints::Fingerprints;
  30 use TextUtil ();
  31 use Molecule;
  32 use AtomTypes::AtomicInvariantsAtomTypes;
  33 use AtomTypes::DREIDINGAtomTypes;
  34 use AtomTypes::EStateAtomTypes;
  35 use AtomTypes::FunctionalClassAtomTypes;
  36 use AtomTypes::MMFF94AtomTypes;
  37 use AtomTypes::SLogPAtomTypes;
  38 use AtomTypes::SYBYLAtomTypes;
  39 use AtomTypes::TPSAAtomTypes;
  40 use AtomTypes::UFFAtomTypes;
  41 
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  43 
  44 @ISA = qw(Fingerprints::Fingerprints Exporter);
  45 @EXPORT = qw();
  46 @EXPORT_OK = qw();
  47 
  48 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  49 
  50 # Setup class variables...
  51 my($ClassName);
  52 _InitializeClass();
  53 
  54 # Overload Perl functions...
  55 use overload '""' => 'StringifyAtomNeighborhoodsFingerprints';
  56 
  57 # Class constructor...
  58 sub new {
  59   my($Class, %NamesAndValues) = @_;
  60 
  61   # Initialize object...
  62   my $This = $Class->SUPER::new();
  63   bless $This, ref($Class) || $Class;
  64   $This->_InitializeAtomNeighborhoodsFingerprints();
  65 
  66   $This->_InitializeAtomNeighborhoodsFingerprintsProperties(%NamesAndValues);
  67 
  68   return $This;
  69 }
  70 
  71 # Initialize object data...
  72 #
  73 sub _InitializeAtomNeighborhoodsFingerprints {
  74   my($This) = @_;
  75 
  76   # Type of fingerprint...
  77   $This->{Type} = 'AtomNeighborhoods';
  78 
  79   # Type of vector...
  80   $This->{VectorType} = 'FingerprintsVector';
  81 
  82   # Type of FingerprintsVector...
  83   $This->{FingerprintsVectorType} = 'AlphaNumericalValues';
  84 
  85   # Minimum and maximum atomic neighborhoods radii...
  86   $This->{MinNeighborhoodRadius} = 0;
  87   $This->{MaxNeighborhoodRadius} = 2;
  88 
  89   # Atom identifier type to use for atom IDs in atom neighborhood atoms...
  90   #
  91   # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
  92   # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
  93   # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
  94   #
  95   $This->{AtomIdentifierType} = '';
  96 
  97   # Atom types assigned to each heavy atom...
  98   %{$This->{AssignedAtomTypes}} = ();
  99 
 100   # Atom neighorhoods with in specified atom radii..
 101   %{$This->{AtomNeighborhoods}} = ();
 102 
 103   # Atom neighborhoods atom types count at different neighborhoods...
 104   %{$This->{NeighborhoodAtomTypesCount}} = ();
 105 
 106   # Atom neighborhood identifiers using specified atom identifier types methodology...
 107   @{$This->{AtomNeighborhoodsIdentifiers}} = ();
 108 }
 109 
 110 # Initialize class ...
 111 sub _InitializeClass {
 112   #Class name...
 113   $ClassName = __PACKAGE__;
 114 }
 115 
 116 # Initialize object properties....
 117 sub _InitializeAtomNeighborhoodsFingerprintsProperties {
 118   my($This, %NamesAndValues) = @_;
 119 
 120   my($Name, $Value, $MethodName);
 121   while (($Name, $Value) = each  %NamesAndValues) {
 122     $MethodName = "Set${Name}";
 123     $This->$MethodName($Value);
 124   }
 125 
 126   # Make sure molecule object was specified...
 127   if (!exists $NamesAndValues{Molecule}) {
 128     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 129   }
 130   if (exists $NamesAndValues{Size}) {
 131     croak "Error: ${ClassName}->New: Object can't be instantiated with a user specified size: It's an arbitrary length vector...";
 132   }
 133   if (!exists $NamesAndValues{AtomIdentifierType}) {
 134     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 135   }
 136 
 137   $This->_InitializeFingerprintsVector();
 138 
 139   return $This;
 140 }
 141 
 142 # Set atom identifier type..
 143 #
 144 sub SetAtomIdentifierType {
 145   my($This, $IdentifierType) = @_;
 146 
 147   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 148     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
 149   }
 150 
 151   if ($This->{AtomIdentifierType}) {
 152     croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 153   }
 154 
 155   $This->{AtomIdentifierType} = $IdentifierType;
 156 
 157   # Initialize atom identifier type information...
 158   $This->_InitializeAtomIdentifierTypeInformation();
 159 
 160   return $This;
 161 }
 162 
 163 # Set minimum atom neighborhood radius...
 164 #
 165 sub SetMinNeighborhoodRadius {
 166   my($This, $Value) = @_;
 167 
 168   if (!TextUtil::IsInteger($Value)) {
 169     croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid:  It must be an  integer...";
 170   }
 171 
 172   if ($Value < 0 ) {
 173     croak "Error: ${ClassName}->SetMinNeighborhoodRadius: MinNeighborhoodRadius value, $Value, is not valid:  It must be >= 0...";
 174   }
 175   $This->{MinNeighborhoodRadius} = $Value;
 176 
 177   return $This;
 178 }
 179 
 180 # Set maximum atom neighborhood radius...
 181 #
 182 sub SetMaxNeighborhoodRadius {
 183   my($This, $Value) = @_;
 184 
 185   if (!TextUtil::IsInteger($Value)) {
 186     croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid:  It must be an  integer...";
 187   }
 188 
 189   if ($Value < 0 ) {
 190     croak "Error: ${ClassName}->SetMaxNeighborhoodRadius: MaxNeighborhoodRadius value, $Value, is not valid:  It must be >= 0...";
 191   }
 192   $This->{MaxNeighborhoodRadius} = $Value;
 193 
 194   return $This;
 195 }
 196 
 197 # Generate fingerprints description...
 198 #
 199 sub GetDescription {
 200   my($This) = @_;
 201 
 202   # Is description explicity set?
 203   if (exists $This->{Description}) {
 204     return $This->{Description};
 205   }
 206 
 207   # Generate fingerprints description...
 208 
 209   return "$This->{Type}:$This->{AtomIdentifierType}:MinRadius$This->{MinNeighborhoodRadius}:MaxRadius$This->{MaxNeighborhoodRadius}";
 210 }
 211 
 212 # Generate atom neighborhood [ Ref 53-56, Ref 73 ] fingerprints...
 213 #
 214 # Methodology:
 215 #   . Assign atom types to all non-hydrogen atoms in the molecule
 216 #   . Get atom neighborhoods up to MaxNeighborhoodRadis
 217 #   . Count unqiue atom types at each neighborhood radii for all heavy atoms
 218 #   . Generate neighborhood identifiers for all neighborhoods around central
 219 #     heavy atom
 220 #      . Atom neighborhood identifier for a specific radii is generated using neighborhood
 221 #        radius, assigned atom type and its count as follows:
 222 #
 223 #            NR<n>-<AtomType>-ATC<n>
 224 #
 225 #      . Atom neighborhood identifier for a central atom at all specified radii is generated
 226 #        by concatenating neighborhood identifiers at each radii by colon:
 227 #
 228 #            NR<n>-<AtomType>-ATC<n>:NR<n>-<AtomType>-ATC<n>:
 229 #
 230 #   . Set final fingerprints as list of neighborhood atom indentifiers
 231 #
 232 sub GenerateFingerprints {
 233   my($This) = @_;
 234 
 235   if ($This->{MinNeighborhoodRadius} > $This->{MaxNeighborhoodRadius}) {
 236     croak "Error: ${ClassName}->GenerateFingerprints: No fingerpritns generated: MinLength, $This->{MinNeighborhoodRadius}, must be less than MaxLength, $This->{MaxNeighborhoodRadius}...";
 237   }
 238 
 239   # Cache appropriate molecule data...
 240   $This->_SetupMoleculeDataCache();
 241 
 242   # Assign atom types to all heavy atoms...
 243   if (!$This->_AssignAtomTypes()) {
 244     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 245     return $This;
 246   }
 247 
 248   # Intialize atom neighborhoods information...
 249   $This->_InitializeAtomNeighborhoods();
 250 
 251   # Identify atom neighborhoods with in specified radii...
 252   $This->_GetAtomNeighborhoods();
 253 
 254   # Count atom neighborhoods atom types...
 255   $This->_CountAtomNeighborhoodsAtomTypes();
 256 
 257   # Genenerate atom neighborhood identifiers...
 258   $This->_GenerateAtomNeighborhoodIdentifiers();
 259 
 260   # Set final fingerprints...
 261   $This->_SetFinalFingerprints();
 262 
 263   # Clear cached molecule data...
 264   $This->_ClearMoleculeDataCache();
 265 
 266   return $This;
 267 }
 268 
 269 # Assign appropriate atom types to all heavy atoms...
 270 #
 271 sub _AssignAtomTypes {
 272   my($This) = @_;
 273   my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
 274 
 275   %{$This->{AssignedAtomTypes}} = ();
 276   $IgnoreHydrogens = 1;
 277 
 278   $SpecifiedAtomTypes = undef;
 279 
 280   IDENTIFIERTYPE: {
 281     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 282       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 283       last IDENTIFIERTYPE;
 284     }
 285 
 286     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 287       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 288       last IDENTIFIERTYPE;
 289     }
 290 
 291     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 292       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 293       last IDENTIFIERTYPE;
 294     }
 295 
 296     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 297       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 298       last IDENTIFIERTYPE;
 299     }
 300 
 301     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 302       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 303       last IDENTIFIERTYPE;
 304     }
 305 
 306     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 307       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 308       last IDENTIFIERTYPE;
 309     }
 310     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 311       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 312       last IDENTIFIERTYPE;
 313     }
 314 
 315     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 316       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 317       last IDENTIFIERTYPE;
 318     }
 319 
 320     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 321       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 322       last IDENTIFIERTYPE;
 323     }
 324 
 325     croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 326   }
 327 
 328   # Assign atom types...
 329   $SpecifiedAtomTypes->AssignAtomTypes();
 330 
 331   # Make sure atom types assignment is successful...
 332   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 333     return undef;
 334   }
 335 
 336   # Collect assigned atom types...
 337   ATOM: for $Atom (@{$This->{Atoms}}) {
 338     if ($Atom->IsHydrogen()) {
 339       next ATOM;
 340     }
 341     $AtomID = $Atom->GetID();
 342     $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
 343   }
 344 
 345   return $This;
 346 }
 347 
 348 # Initialize topological atom pairs between specified distance range...
 349 #
 350 sub _InitializeAtomNeighborhoods {
 351   my($This) = @_;
 352   my($Radius);
 353 
 354   # Initialize atom neighborhood count information between specified radii...
 355   %{$This->{NeighborhoodAtomTypesCount}} = ();
 356 
 357   for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) {
 358     %{$This->{NeighborhoodAtomTypesCount}{$Radius}} = ();
 359   }
 360 
 361   # Initialize atom neighborhoods atoms information at all specified radii...
 362   #
 363   %{$This->{AtomNeighborhoods}} = ();
 364 
 365   for $Radius (0 .. $This->{MaxNeighborhoodRadius}) {
 366     %{$This->{AtomNeighborhoods}{$Radius}} = ();
 367   }
 368 
 369   return $This;
 370 }
 371 
 372 # Collect atom neighborhoods upto maximum neighborhood radius...
 373 #
 374 # Notes:
 375 #  . Fingerprints are only generated for neighborhoods between specified minimum
 376 #    and maximum neighborhood radii.
 377 #
 378 sub _GetAtomNeighborhoods {
 379   my($This) = @_;
 380   my($Atom, $AtomID, $MaxRadius, $Radius, $Molecule);
 381 
 382   $MaxRadius = $This->{MaxNeighborhoodRadius};
 383   $Molecule = $This->GetMolecule();
 384 
 385   # Collect atom neighborhoods...
 386 
 387   ATOM: for $Atom (@{$This->{Atoms}}) {
 388     $AtomID = $Atom->GetID();
 389     $Radius = 0;
 390 
 391     if ($MaxRadius == 0) {
 392       # Atom is its own neighborhood at 0 radius...
 393       my(@AtomNeighborhoodsAtoms);
 394 
 395       @AtomNeighborhoodsAtoms = ($Atom);
 396       $This->{AtomNeighborhoods}{$Radius}{$AtomID} = \@AtomNeighborhoodsAtoms;
 397 
 398       next ATOM;
 399     }
 400 
 401     # Collect available atom neighborhoods at different neighborhood radii levels...
 402     my($AtomNeighborhoodAtomsRef);
 403 
 404     for $AtomNeighborhoodAtomsRef ($Molecule->GetAtomNeighborhoodsWithRadiusUpto($Atom, $MaxRadius)) {
 405       $This->{AtomNeighborhoods}{$Radius}{$AtomID} = $AtomNeighborhoodAtomsRef;
 406       $Radius++;
 407     }
 408   }
 409   return $This;
 410 }
 411 
 412 # Count atom neighborhoods atom types for each non-hydrogen central atoms with
 413 # neighborhoods in specified radii range...
 414 #
 415 sub _CountAtomNeighborhoodsAtomTypes {
 416   my($This) = @_;
 417   my($AtomID, $NeighborhoodAtomID, $Radius, $NeighborhoodAtom, $NeighborhoodAtomType, $AtomNeighborhoodAtomsRef);
 418 
 419   RADIUS: for $Radius (sort { $a <=> $b } keys %{$This->{AtomNeighborhoods}} ) {
 420     if ($Radius < $This->{MinNeighborhoodRadius} || $Radius > $This->{MaxNeighborhoodRadius}) {
 421       next RADIUS;
 422     }
 423     # Go over the neighborhoods of each atom at the current radius...
 424     for $AtomID (keys %{$This->{AtomNeighborhoods}{$Radius}}) {
 425       $AtomNeighborhoodAtomsRef = $This->{AtomNeighborhoods}{$Radius}{$AtomID};
 426       NEIGHBORHOODATOM: for $NeighborhoodAtom (@{$AtomNeighborhoodAtomsRef}) {
 427         if ($NeighborhoodAtom->IsHydrogen()) {
 428           next NEIGHBORHOODATOM;
 429         }
 430         $NeighborhoodAtomID = $NeighborhoodAtom->GetID();
 431         $NeighborhoodAtomType = $This->{AssignedAtomTypes}{$NeighborhoodAtomID};
 432 
 433         # Count neighbothood atom types for each atom at different radii...
 434         if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) {
 435           %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}} = ();
 436         }
 437         if (exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType}) {
 438           $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} += 1;
 439         }
 440         else {
 441           $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$NeighborhoodAtomType} = 1;
 442         }
 443       }
 444     }
 445   }
 446   return $This;
 447 }
 448 
 449 # Generate atom neighborhood identifiers for each non-hydrogen atom using atom
 450 # neighborhood atom types and their count information...
 451 #
 452 # Let:
 453 #   NR<n> = Neighborhood radius
 454 #   AtomType = Assigned atom type
 455 #   ATC<n> = AtomType count
 456 #
 457 # Then:
 458 #
 459 #   AtomNeighborhoodAtomIdentifier for a neighborhood atom generated for
 460 #   AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 461 #
 462 #     NR<n>-<AtomType>-ATC<n>
 463 #
 464 #   AtomNeighborhoodsIdentifier for all specified atom neighbothoods of an atom generated for
 465 #   AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 466 #
 467 #     NR<n>-<AtomType>-ATC<n>;NR<n>-<AtomType>-ATC<n>;...
 468 #
 469 sub _GenerateAtomNeighborhoodIdentifiers {
 470   my($This) = @_;
 471   my($Atom, $AtomID, $Radius, $AtomType, $AtomTypeCount, $AtomNeighborhoodIdentifier, @AtomNeighborhoodIdentifiers);
 472 
 473   @{$This->{AtomNeighborhoodsIdentifiers}} = ();
 474 
 475   for $Atom (@{$This->{Atoms}}) {
 476     $AtomID = $Atom->GetID();
 477     @AtomNeighborhoodIdentifiers = ();
 478     RADIUS: for $Radius ($This->{MinNeighborhoodRadius} .. $This->{MaxNeighborhoodRadius}) {
 479       if (!exists $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}) {
 480         next RADIUS;
 481       }
 482       for $AtomType (sort keys %{$This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}}) {
 483         $AtomTypeCount = $This->{NeighborhoodAtomTypesCount}{$Radius}{$AtomID}{$AtomType};
 484         push @AtomNeighborhoodIdentifiers, "NR${Radius}-${AtomType}-ATC${AtomTypeCount}";
 485       }
 486     }
 487     $AtomNeighborhoodIdentifier = join(":", @AtomNeighborhoodIdentifiers);
 488     push @{$This->{AtomNeighborhoodsIdentifiers}}, $AtomNeighborhoodIdentifier;
 489   }
 490 
 491   return $This;
 492 }
 493 
 494 # Set final fingerprits vector...
 495 #
 496 sub _SetFinalFingerprints {
 497   my($This) = @_;
 498 
 499   # Mark successful generation of fingerprints...
 500   $This->{FingerprintsGenerated} = 1;
 501 
 502   # Sort AtomNeighborhoodsIdentifiers..
 503   #
 504   @{$This->{AtomNeighborhoodsIdentifiers}} = sort @{$This->{AtomNeighborhoodsIdentifiers}};
 505 
 506   # Add sorted atom neighborhood identifiers to FingerprintsVector which is already defined
 507   # during initialization containing AlphaNumericalValues...
 508   #
 509   $This->{FingerprintsVector}->AddValues(\@{$This->{AtomNeighborhoodsIdentifiers}});
 510 
 511   return $This;
 512 }
 513 
 514 # Cache  appropriate molecule data...
 515 #
 516 sub _SetupMoleculeDataCache {
 517   my($This) = @_;
 518 
 519   # Get all non-hydrogen atoms...
 520   my($NegateAtomCheckMethod);
 521   $NegateAtomCheckMethod = 1;
 522   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
 523 
 524   return $This;
 525 }
 526 
 527 # Clear cached molecule data...
 528 #
 529 sub _ClearMoleculeDataCache {
 530   my($This) = @_;
 531 
 532   @{$This->{Atoms}} = ();
 533 
 534   return $This;
 535 }
 536 
 537 # Set atomic invariants to use for atom identifiers...
 538 #
 539 sub SetAtomicInvariantsToUse {
 540   my($This, @Values) = @_;
 541   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
 542 
 543   if (!@Values) {
 544     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
 545     return;
 546   }
 547 
 548   $FirstValue = $Values[0];
 549   $TypeOfFirstValue = ref $FirstValue;
 550 
 551   @SpecifiedAtomicInvariants = ();
 552   @AtomicInvariantsToUse = ();
 553 
 554   if ($TypeOfFirstValue =~ /^ARRAY/) {
 555     push @SpecifiedAtomicInvariants, @{$FirstValue};
 556   }
 557   else {
 558     push @SpecifiedAtomicInvariants, @Values;
 559   }
 560 
 561   # Make sure specified AtomicInvariants are valid...
 562   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
 563     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
 564       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
 565     }
 566     $AtomicInvariant = $SpecifiedAtomicInvariant;
 567     push @AtomicInvariantsToUse, $AtomicInvariant;
 568   }
 569 
 570   # Set atomic invariants to use...
 571   @{$This->{AtomicInvariantsToUse}} = ();
 572   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
 573 
 574   return $This;
 575 }
 576 
 577 # Set functional classes to use for atom identifiers...
 578 #
 579 sub SetFunctionalClassesToUse {
 580   my($This, @Values) = @_;
 581   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
 582 
 583   if (!@Values) {
 584     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
 585     return;
 586   }
 587 
 588   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
 589     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
 590     return;
 591   }
 592 
 593   $FirstValue = $Values[0];
 594   $TypeOfFirstValue = ref $FirstValue;
 595 
 596   @SpecifiedFunctionalClasses = ();
 597   @FunctionalClassesToUse = ();
 598 
 599   if ($TypeOfFirstValue =~ /^ARRAY/) {
 600     push @SpecifiedFunctionalClasses, @{$FirstValue};
 601   }
 602   else {
 603     push @SpecifiedFunctionalClasses, @Values;
 604   }
 605 
 606   # Make sure specified FunctionalClasses are valid...
 607   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
 608     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
 609       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
 610     }
 611     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
 612   }
 613 
 614   # Set functional classes to use...
 615   @{$This->{FunctionalClassesToUse}} = ();
 616   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
 617 
 618   return $This;
 619 }
 620 
 621 # Initialize atom indentifier type information...
 622 #
 623 # Current supported values:
 624 #
 625 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
 626 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 627 #
 628 sub _InitializeAtomIdentifierTypeInformation {
 629   my($This) = @_;
 630 
 631   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 632     $This->_InitializeAtomicInvariantsAtomTypesInformation();
 633   }
 634   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 635     $This->_InitializeFunctionalClassAtomTypesInformation();
 636   }
 637   elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 638     # Nothing to do for now...
 639   }
 640   else {
 641     croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 642   }
 643 
 644   return $This;
 645 }
 646 
 647 # Initialize atomic invariants atom types to use for generating atom identifiers...
 648 #
 649 # Let:
 650 #   AS = Atom symbol corresponding to element symbol
 651 #
 652 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 653 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 654 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 655 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 656 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 657 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 658 #   H<n>   = Number of implicit and explicit hydrogens for atom
 659 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 660 #   RA     = Ring atom annotation indicating whether atom is a ring
 661 #   FC<+n/-n> = Formal charge assigned to atom
 662 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 663 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 664 #
 665 # Then:
 666 #
 667 #   Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 668 #
 669 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
 670 #
 671 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
 672 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
 673 # AtomID specification doesn't include atomic invariants with zero or undefined values.
 674 #
 675 sub _InitializeAtomicInvariantsAtomTypesInformation {
 676   my($This) = @_;
 677 
 678   # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC
 679   #
 680   @{$This->{AtomicInvariantsToUse}} = ();
 681   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
 682 
 683   return $This;
 684 }
 685 
 686 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
 687 # class, to use for generating atom identifiers...
 688 #
 689 # Let:
 690 #   HBD: HydrogenBondDonor
 691 #   HBA: HydrogenBondAcceptor
 692 #   PI :  PositivelyIonizable
 693 #   NI : NegativelyIonizable
 694 #   Ar : Aromatic
 695 #   Hal : Halogen
 696 #   H : Hydrophobic
 697 #   RA : RingAtom
 698 #   CA : ChainAtom
 699 #
 700 # Then:
 701 #
 702 #   Functiononal class atom type specification for an atom corresponds to:
 703 #
 704 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
 705 #
 706 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
 707 #
 708 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
 709 #
 710 #     HydrogenBondDonor: NH, NH2, OH
 711 #     HydrogenBondAcceptor: N[!H], O
 712 #     PositivelyIonizable: +, NH2
 713 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
 714 #
 715 sub _InitializeFunctionalClassAtomTypesInformation {
 716   my($This) = @_;
 717 
 718   # Default functional class atom typess to use for generating atom identifiers
 719   # are: HBD, HBA, PI, NI, Ar, Hal
 720   #
 721   @{$This->{FunctionalClassesToUse}} = ();
 722   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
 723 
 724   return $This;
 725 }
 726 
 727 # Return a string containg data for AtomNeighborhoodsFingerprints object...
 728 #
 729 sub StringifyAtomNeighborhoodsFingerprints {
 730   my($This) = @_;
 731   my($FingerprintsString);
 732 
 733   # Type of fingerprint...
 734   $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; MinNeighborhoodRadius: $This->{MinNeighborhoodRadius}; MaxNeighborhoodRadius: $This->{MaxNeighborhoodRadius}";
 735 
 736   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 737     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
 738 
 739     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
 740     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
 741 
 742     for $AtomicInvariant (@AtomicInvariantsOrder) {
 743       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
 744     }
 745 
 746     $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
 747     $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
 748     $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
 749   }
 750   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 751     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
 752 
 753     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 754     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 755 
 756     for $FunctionalClass (@FunctionalClassesOrder) {
 757       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
 758     }
 759 
 760     $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
 761     $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
 762     $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
 763   }
 764 
 765   # Total number of atom neighborhood atom IDs...
 766   $FingerprintsString .= "; NumOfAtomNeighborhoodAtomIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues();
 767 
 768   # FingerprintsVector...
 769   $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 770 
 771   return $FingerprintsString;
 772 }
 773