MayaChemTools

   1 package Fingerprints::AtomTypesFingerprints;
   2 #
   3 # File: AtomTypesFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Text::ParseWords;
  30 use TextUtil ();
  31 use FileUtil ();
  32 use MathUtil ();
  33 use Fingerprints::Fingerprints;
  34 use Molecule;
  35 use AtomTypes::AtomicInvariantsAtomTypes;
  36 use AtomTypes::DREIDINGAtomTypes;
  37 use AtomTypes::EStateAtomTypes;
  38 use AtomTypes::FunctionalClassAtomTypes;
  39 use AtomTypes::MMFF94AtomTypes;
  40 use AtomTypes::SLogPAtomTypes;
  41 use AtomTypes::SYBYLAtomTypes;
  42 use AtomTypes::TPSAAtomTypes;
  43 use AtomTypes::UFFAtomTypes;
  44 
  45 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  46 
  47 @ISA = qw(Fingerprints::Fingerprints Exporter);
  48 @EXPORT = qw();
  49 @EXPORT_OK = qw();
  50 
  51 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  52 
  53 # Setup class variables...
  54 my($ClassName);
  55 _InitializeClass();
  56 
  57 # Overload Perl functions...
  58 use overload '""' => 'StringifyAtomTypesFingerprints';
  59 
  60 # Class constructor...
  61 sub new {
  62   my($Class, %NamesAndValues) = @_;
  63 
  64   # Initialize object...
  65   my $This = $Class->SUPER::new();
  66   bless $This, ref($Class) || $Class;
  67   $This->_InitializeAtomTypesFingerprints();
  68 
  69   $This->_InitializeAtomTypesFingerprintsProperties(%NamesAndValues);
  70 
  71   return $This;
  72 }
  73 
  74 # Initialize object data...
  75 #
  76 sub _InitializeAtomTypesFingerprints {
  77   my($This) = @_;
  78 
  79   # Type of atom type fingerprint to generate:
  80   #
  81   # AtomTypesCount - A vector containing count of atom types
  82   # AtomTypesBits - A bit vector indicating presence/absence of atom types
  83   #
  84   $This->{Type} = '';
  85 
  86   # AtomTypes to use for generating fingerprints...
  87   #
  88   # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
  89   # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
  90   # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
  91   #
  92   $This->{AtomIdentifierType} = '';
  93 
  94   # AtomTypesSetToUse for AtomTypesCount:
  95   #
  96   # ArbitrarySize - Corrresponds to only AtomTypes atom types detected in molecule
  97   # FixedSize - Corresponds to fixed number of atom types previously defined for
  98   #             specific atom types.
  99   #
 100   # The default AtomTypesSetToUse value for AtomTypesCount fingerprints type: ArbitrarySize.
 101   #
 102   # Possible values: ArbitrarySize or FixedSize. However, for AtomTypesBits fingerprints type, only FixedSize
 103   # value is allowed.
 104   #
 105   $This->{AtomTypesSetToUse} = '';
 106 
 107   # By default, hydrogens are ignored during fingerprint generation...
 108   $This->{IgnoreHydrogens} = 1;
 109 
 110   # Assigned AtomTypes atom types...
 111   %{$This->{AtomTypes}} = ();
 112 
 113   # AtomTypes atom types count for generating atom types count and bits fingerprints...
 114   %{$This->{AtomTypesCount}} = ();
 115 }
 116 
 117 # Initialize class ...
 118 sub _InitializeClass {
 119   #Class name...
 120   $ClassName = __PACKAGE__;
 121 }
 122 
 123 # Initialize object properties....
 124 sub _InitializeAtomTypesFingerprintsProperties {
 125   my($This, %NamesAndValues) = @_;
 126 
 127   my($Name, $Value, $MethodName);
 128   while (($Name, $Value) = each  %NamesAndValues) {
 129     $MethodName = "Set${Name}";
 130     $This->$MethodName($Value);
 131   }
 132 
 133   # Make sure molecule object was specified...
 134   if (!exists $NamesAndValues{Molecule}) {
 135     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 136   }
 137 
 138   # Make sure type and identifier type were specified...
 139   if (!exists $NamesAndValues{Type}) {
 140     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying type...";
 141   }
 142   if (!exists $NamesAndValues{AtomIdentifierType}) {
 143     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 144   }
 145 
 146   if ($This->{Type} =~ /^AtomTypesCount$/i) {
 147     $This->_InitializeAtomTypesCount();
 148   }
 149   elsif ($This->{Type} =~ /^AtomTypesBits$/i) {
 150     $This->_InitializeAtomTypesBits();
 151   }
 152   else {
 153     croak "Error: ${ClassName}->_InitializeAtomTypesFingerprintsProperties: Unknown AtomTypes fingerprints type: $This->{Type}; Supported fingerprints types: AtomTypesCount or AtomTypesBits...";
 154   }
 155 
 156   return $This;
 157 }
 158 
 159 # Initialize atom type counts...
 160 #
 161 sub _InitializeAtomTypesCount {
 162   my($This) = @_;
 163 
 164   # Set default AtomTypesSetToUse...
 165   if (!$This->{AtomTypesSetToUse}) {
 166     $This->{AtomTypesSetToUse} = ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) ? 'FixedSize' : 'ArbitrarySize';
 167   }
 168 
 169   # Make sure AtomTypesSetToUse value is okay...
 170   $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse});
 171 
 172   # Vector type and type of values...
 173   $This->{VectorType} = 'FingerprintsVector';
 174 
 175   if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 176     $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
 177   }
 178   else {
 179     $This->{FingerprintsVectorType} = 'NumericalValues';
 180   }
 181 
 182   $This->_InitializeFingerprintsVector();
 183 
 184   return $This;
 185 }
 186 
 187 # Initialize atom types bits...
 188 #
 189 sub _InitializeAtomTypesBits {
 190   my($This) = @_;
 191 
 192   # Set default AtomTypesSetToUse...
 193   $This->{AtomTypesSetToUse} = 'FixedSize';
 194 
 195   # Make sure AtomTypesSetToUse value is okay...
 196   $This->_ValidateAtomTypesSetToUse($This->{AtomTypesSetToUse});
 197 
 198   # Vector type...
 199   $This->{VectorType} = 'FingerprintsBitVector';
 200 
 201   # Vector size...
 202   $This->{Size} = $This->_GetFixedSizeAtomTypesSetSize();
 203 
 204   $This->_InitializeFingerprintsBitVector();
 205 
 206   return $This;
 207 }
 208 
 209 # Set type...
 210 #
 211 sub SetType {
 212   my($This, $Type) = @_;
 213 
 214   if ($This->{Type}) {
 215     croak "Error: ${ClassName}->SetType: Can't change type:  It's already set...";
 216   }
 217 
 218   if ($Type =~ /^AtomTypesCount$/i) {
 219     $This->{Type} = 'AtomTypesCount';;
 220   }
 221   elsif ($Type =~ /^AtomTypesBits$/i) {
 222     $This->{Type} = 'AtomTypesBits';;
 223   }
 224   else {
 225     croak "Error: ${ClassName}->SetType: Unknown AtomTypes fingerprints type: $Type; Supported fingerprints types: AtomTypesCount or AtomTypesBit...";
 226   }
 227   return $This;
 228 }
 229 
 230 # Disable set size method...
 231 #
 232 sub SetSize {
 233   my($This, $Type) = @_;
 234 
 235   croak "Error: ${ClassName}->SetSize: Can't change size:  It's not allowed...";
 236 }
 237 
 238 # Set atom types set to use...
 239 #
 240 sub SetAtomTypesSetToUse {
 241   my($This, $Value) = @_;
 242 
 243   if ($This->{AtomTypesSetToUse}) {
 244     croak "Error: ${ClassName}->SetAtomTypesSetToUse: Can't change size:  It's already set...";
 245   }
 246 
 247   $This->_ValidateAtomTypesSetToUse($Value);
 248 
 249   $This->{AtomTypesSetToUse} = $Value;
 250 
 251   return $This;
 252 }
 253 
 254 # Validate AtomTypesSetToUse value...
 255 #
 256 sub _ValidateAtomTypesSetToUse {
 257   my($This, $Value) = @_;
 258 
 259   if ($Value !~ /^(ArbitrarySize|FixedSize)/i) {
 260     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Unknown AtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
 261   }
 262 
 263   if ($Value =~ /^ArbitrarySize$/i && $This->{Type} =~ /^AtomTypesBits$/i) {
 264     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomTypesBits fingerprints...";
 265   }
 266 
 267   if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 268     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for AtomicInvariantsAtomTypes fingerprints...";
 269   }
 270 
 271   if ($Value =~ /^FixedSize$/i && $This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 272     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for FunctionalClassAtomTypes fingerprints...";
 273   }
 274 
 275   if ($Value =~ /^ArbitrarySize$/i && $This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 276     croak "Error: ${ClassName}->_ValidateAtomTypesSetToUse: Specified AtomTypesSetToUse value, $Value, is not allowed for TPSAAtomTypes fingerprints...";
 277   }
 278 
 279   return $This;
 280 }
 281 
 282 # Set atom identifier type...
 283 #
 284 sub SetAtomIdentifierType {
 285   my($This, $IdentifierType) = @_;
 286 
 287   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 288     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
 289   }
 290 
 291   if ($This->{AtomIdentifierType}) {
 292     croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 293   }
 294 
 295   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 296     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for AtomicInvariantsAtomTypes fingerprints...";
 297   }
 298 
 299   if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i && $This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 300     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified AtomTypesSetToUse value, $IdentifierType, is not allowed for FunctionalClassAtomTypes fingerprints...";
 301   }
 302 
 303   $This->{AtomIdentifierType} = $IdentifierType;
 304 
 305   # Initialize atom identifier type information...
 306   $This->_InitializeAtomIdentifierTypeInformation();
 307 
 308   return $This;
 309 }
 310 
 311 # Generate fingerprints description...
 312 #
 313 sub GetDescription {
 314   my($This) = @_;
 315 
 316   # Is description explicity set?
 317   if (exists $This->{Description}) {
 318     return $This->{Description};
 319   }
 320 
 321   # Generate fingerprints description...
 322 
 323   return "$This->{Type}:$This->{AtomIdentifierType}:$This->{AtomTypesSetToUse}";
 324 }
 325 
 326 # Generate atom types fingerprints...
 327 #
 328 # The current release of MayaChemTools supports generation of two types of AtomTypes
 329 # fingerprints corresponding to non-hydrogen and/or hydrogen atoms:
 330 #
 331 # AtomTypesCount - A vector containing count of  atom types
 332 # AtomTypesBits - A bit vector indicating presence/absence of atom types
 333 #
 334 # For AtomTypesCount fingerprints, two types of atom types set size is allowed:
 335 #
 336 # ArbitrarySize - Corrresponds to only atom types detected in molecule
 337 # FixedSize - Corresponds to fixed number of atom types previously defined
 338 #
 339 # For AtomTypesBits fingeprints, only FixedSize atom type set is allowed.
 340 #
 341 # The fixed size atom type set size used during generation of fingerprints corresponding
 342 # to FixedSize value of AtomTypesSetToUse contains all possible atom types in datafiles
 343 # distributed with MayaChemTools release for each supported type.
 344 #
 345 # Combination of Type and AtomTypesSetToUse allow generation of 21 different types of
 346 # AtomTypes fingerprints:
 347 #
 348 # Type                  AtomIdentifierType           AtomTypesSetToUse
 349 #
 350 # AtomTypesCount        AtomicInvariantsAtomTypes    ArbitrarySize
 351 #
 352 # AtomTypesCount        DREIDINGAtomTypes            ArbitrarySize
 353 # AtomTypesCount        DREIDINGAtomTypes            FixedSize
 354 # AtomTypesBits         DREIDINGAtomTypes            FixedSize
 355 #
 356 # AtomTypesCount        EStateAtomTypes              ArbitrarySize
 357 # AtomTypesCount        EStateAtomTypes              FixedSize
 358 # AtomTypesBits         EStateAtomTypes              FixedSize
 359 #
 360 # AtomTypesCount        FunctionalClassAtomTypes    ArbitrarySize
 361 #
 362 # AtomTypesCount        MMFF94AtomTypes              ArbitrarySize
 363 # AtomTypesCount        MMFF94AtomTypes              FixedSize
 364 # AtomTypesBits         MMFF94AtomTypes              FixedSize
 365 #
 366 # AtomTypesCount        SLogPAtomTypes               ArbitrarySize
 367 # AtomTypesCount        SLogPAtomTypes               FixedSize
 368 # AtomTypesBits         SLogPAtomTypes               FixedSize
 369 #
 370 # AtomTypesCount        SYBYLAtomTypes               ArbitrarySize
 371 # AtomTypesCount        SYBYLAtomTypes               FixedSize
 372 # AtomTypesBits         SYBYLAtomTypes               FixedSize
 373 #
 374 # AtomTypesCount        TPSAAtomTypes                 FixedSize
 375 # AtomTypesBits         TPSAAtomTypes                 FixedSize
 376 #
 377 # AtomTypesCount        UFFAtomTypes                 ArbitrarySize
 378 # AtomTypesCount        UFFAtomTypes                 FixedSize
 379 # AtomTypesBits         UFFAtomTypes                 FixedSize
 380 #
 381 sub GenerateFingerprints {
 382   my($This) = @_;
 383 
 384   # Cache appropriate molecule data...
 385   $This->_SetupMoleculeDataCache();
 386 
 387   # Check and assign appropriate atom types...
 388   if (!$This->_AssignAtomTypes()) {
 389     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 390     return $This;
 391   }
 392 
 393   # Count atom types...
 394   $This->_CountAtomTypes();
 395 
 396   # Set final fingerprints...
 397   $This->_SetFinalFingerprints();
 398 
 399   # Clear cached molecule data...
 400   $This->_ClearMoleculeDataCache();
 401 
 402   return $This;
 403 }
 404 
 405 # Assign appropriate atom types...
 406 #
 407 sub _AssignAtomTypes {
 408   my($This) = @_;
 409   my($SpecifiedAtomTypes, $Atom, $AtomID);
 410 
 411   %{$This->{AtomTypes}} = ();
 412   $SpecifiedAtomTypes = undef;
 413 
 414   IDENTIFIERTYPE: {
 415     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 416       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 417       last IDENTIFIERTYPE;
 418     }
 419 
 420     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 421       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 422       last IDENTIFIERTYPE;
 423     }
 424 
 425     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 426       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 427       last IDENTIFIERTYPE;
 428     }
 429 
 430     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 431       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens}, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 432       last IDENTIFIERTYPE;
 433     }
 434 
 435     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 436       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 437       last IDENTIFIERTYPE;
 438     }
 439 
 440     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 441       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 442       last IDENTIFIERTYPE;
 443     }
 444     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 445       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 446       last IDENTIFIERTYPE;
 447     }
 448 
 449     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 450       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 451       last IDENTIFIERTYPE;
 452     }
 453 
 454     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 455       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $This->{IgnoreHydrogens});
 456       last IDENTIFIERTYPE;
 457     }
 458 
 459     croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 460   }
 461 
 462   # Assign atom types...
 463   $SpecifiedAtomTypes->AssignAtomTypes();
 464 
 465   # Make sure atom types assignment is successful...
 466   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 467     return undef;
 468   }
 469 
 470   # Collect assigned atom types...
 471   for $Atom (@{$This->{Atoms}}) {
 472     $AtomID = $Atom->GetID();
 473     $This->{AtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
 474   }
 475 
 476   return $This;
 477 }
 478 
 479 # Count atom types...
 480 #
 481 sub _CountAtomTypes {
 482   my($This) = @_;
 483   my($Atom, $AtomID, $AtomType);
 484 
 485   %{$This->{AtomTypesCount}} = ();
 486 
 487   for $Atom (@{$This->{Atoms}}) {
 488     $AtomID = $Atom->GetID();
 489     $AtomType = $This->{AtomTypes}{$AtomID};
 490 
 491     if (!exists $This->{AtomTypesCount}{$AtomType}) {
 492       $This->{AtomTypesCount}{$AtomType} = 0;
 493     }
 494 
 495     $This->{AtomTypesCount}{$AtomType} += 1;
 496   }
 497   return $This;
 498 }
 499 
 500 # Set final fingerprints...
 501 #
 502 sub _SetFinalFingerprints {
 503   my($This) = @_;
 504 
 505   # Mark successful generation of fingerprints...
 506   $This->{FingerprintsGenerated} = 1;
 507 
 508   if ($This->{Type} =~ /^AtomTypesCount$/i) {
 509     $This->_SetFinalAtomTypesCountFingerprints();
 510   }
 511   elsif ($This->{Type} =~ /^AtomTypesBits$/i) {
 512     $This->_SetFinalAtomTypesBitsFingerprints();
 513   }
 514   return $This;
 515 }
 516 
 517 # Set final final fingerpritns for atom types count...
 518 #
 519 sub _SetFinalAtomTypesCountFingerprints {
 520   my($This) = @_;
 521   my($AtomType, @Values, @IDs);
 522 
 523   @Values = ();
 524   @IDs = ();
 525 
 526   if ($This->{AtomTypesSetToUse} =~ /^FixedSize$/i) {
 527     for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) {
 528       push @IDs, $AtomType;
 529       push @Values, exists($This->{AtomTypesCount}{$AtomType}) ? $This->{AtomTypesCount}{$AtomType} : 0;
 530     }
 531   }
 532   else {
 533     for $AtomType (sort keys %{$This->{AtomTypesCount}}) {
 534       push @IDs, $AtomType;
 535       push @Values, $This->{AtomTypesCount}{$AtomType};
 536     }
 537   }
 538 
 539   # Add IDs and values to fingerprint vector...
 540   if (@IDs) {
 541     $This->{FingerprintsVector}->AddValueIDs(\@IDs);
 542   }
 543   $This->{FingerprintsVector}->AddValues(\@Values);
 544 
 545   return $This;
 546 }
 547 
 548 # Set final final fingerpritns for atom types count bits...
 549 #
 550 sub _SetFinalAtomTypesBitsFingerprints {
 551   my($This) = @_;
 552   my($AtomType, $SkipPosCheck, $AtomTypeNum, $AtomTypeBitIndex);
 553 
 554   $SkipPosCheck = 1;
 555   $AtomTypeNum = 0;
 556 
 557   ATOMTYPE: for $AtomType (@{$This->_GetFixedSizeAtomTypesSet()}) {
 558     $AtomTypeNum++;
 559     if (!(exists($This->{AtomTypesCount}{$AtomType}) && $This->{AtomTypesCount}{$AtomType})) {
 560       next ATOMTYPE;
 561     }
 562     $AtomTypeBitIndex = $AtomTypeNum - 1;
 563     $This->{FingerprintsBitVector}->SetBit($AtomTypeBitIndex, $SkipPosCheck);
 564   }
 565 
 566   return $This;
 567 }
 568 
 569 # Cache  appropriate molecule data...
 570 #
 571 sub _SetupMoleculeDataCache {
 572   my($This) = @_;
 573 
 574   if ($This->{IgnoreHydrogens}) {
 575     # Get all non-hydrogen atoms...
 576     my($NegateAtomCheckMethod);
 577     $NegateAtomCheckMethod = 1;
 578 
 579     @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
 580   }
 581   else {
 582     @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 583   }
 584 
 585   return $This;
 586 }
 587 
 588 # Clear cached molecule data...
 589 #
 590 sub _ClearMoleculeDataCache {
 591   my($This) = @_;
 592 
 593   @{$This->{Atoms}} = ();
 594 
 595   return $This;
 596 }
 597 
 598 # Get fixed size atom types set size...
 599 #
 600 sub _GetFixedSizeAtomTypesSetSize {
 601   my($This) = @_;
 602   my($Size);
 603 
 604   $Size = 0;
 605 
 606   IDENTIFIERTYPE: {
 607     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 608       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes()} : scalar @{AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes()};
 609       last IDENTIFIERTYPE;
 610     }
 611 
 612     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 613       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()} : scalar @{AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes()};
 614       last IDENTIFIERTYPE;
 615     }
 616 
 617     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 618       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes()} : scalar @{AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes()};
 619       last IDENTIFIERTYPE;
 620     }
 621 
 622     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 623       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes()} : scalar @{AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes()};
 624       last IDENTIFIERTYPE;
 625     }
 626 
 627     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 628       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes()} : scalar @{AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes()};
 629       last IDENTIFIERTYPE;
 630     }
 631 
 632     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 633       $Size =  scalar @{AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes()};
 634       last IDENTIFIERTYPE;
 635     }
 636 
 637     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 638       $Size = $This->{IgnoreHydrogens} ? scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes()} : scalar @{AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes()};
 639       last IDENTIFIERTYPE;
 640     }
 641 
 642     croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSetSize: Atom types set size for atom indentifier type, $This->{AtomIdentifierType}, is not available...";
 643   }
 644 
 645   return $Size;
 646 }
 647 
 648 # Get fixed size atom types set...
 649 #
 650 sub _GetFixedSizeAtomTypesSet {
 651   my($This) = @_;
 652   my($AtomTypesRef);
 653 
 654   $AtomTypesRef = undef;
 655 
 656   IDENTIFIERTYPE: {
 657     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 658       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGNonHydrogenAtomTypes() : AtomTypes::DREIDINGAtomTypes::GetAllPossibleDREIDINGAtomTypes();
 659       last IDENTIFIERTYPE;
 660     }
 661 
 662     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 663       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes() : AtomTypes::EStateAtomTypes::GetAllPossibleEStateAtomTypes();
 664       last IDENTIFIERTYPE;
 665     }
 666 
 667     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 668       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94NonHydrogenAtomTypes() : AtomTypes::MMFF94AtomTypes::GetAllPossibleMMFF94AtomTypes();
 669       last IDENTIFIERTYPE;
 670     }
 671 
 672     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 673       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPNonHydrogenAtomTypes() : AtomTypes::SLogPAtomTypes::GetAllPossibleSLogPAtomTypes();
 674       last IDENTIFIERTYPE;
 675     }
 676 
 677     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 678       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLNonHydrogenAtomTypes() : AtomTypes::SYBYLAtomTypes::GetAllPossibleSYBYLAtomTypes();
 679       last IDENTIFIERTYPE;
 680     }
 681 
 682     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 683       $AtomTypesRef = AtomTypes::TPSAAtomTypes::GetAllPossibleTPSAAtomTypes();
 684       last IDENTIFIERTYPE;
 685     }
 686 
 687     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 688       $AtomTypesRef = $This->{IgnoreHydrogens} ? AtomTypes::UFFAtomTypes::GetAllPossibleUFFNonHydrogenAtomTypes() : AtomTypes::UFFAtomTypes::GetAllPossibleUFFAtomTypes();
 689       last IDENTIFIERTYPE;
 690     }
 691 
 692     croak "Error: ${ClassName}->_GetFixedSizeAtomTypesSet: Atom types set for atom indentifier type, $This->{AtomIdentifierType}, is not available...";
 693   }
 694 
 695   return $AtomTypesRef;
 696 }
 697 
 698 # Initialize atom indentifier type information...
 699 #
 700 # Current supported values:
 701 #
 702 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
 703 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 704 #
 705 sub _InitializeAtomIdentifierTypeInformation {
 706   my($This) = @_;
 707 
 708   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 709     $This->_InitializeAtomicInvariantsAtomTypesInformation();
 710   }
 711   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 712     $This->_InitializeFunctionalClassAtomTypesInformation();
 713   }
 714   elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 715     # Nothing to do for now...
 716   }
 717   else {
 718     croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 719   }
 720 
 721   return $This;
 722 }
 723 
 724 # Initialize atomic invariants atom types to use for generating atom IDs in atom pairs...
 725 #
 726 # Let:
 727 #   AS = Atom symbol corresponding to element symbol
 728 #
 729 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 730 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 731 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 732 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 733 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 734 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 735 #   H<n>   = Number of implicit and explicit hydrogens for atom
 736 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 737 #   RA     = Ring atom annotation indicating whether atom is a ring
 738 #   FC<+n/-n> = Formal charge assigned to atom
 739 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 740 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 741 #
 742 #   AtomTypeIDx = Atomic invariants atom type for atom x
 743 #   AtomTypeIDy = Atomic invariants atom type for atom y
 744 #   Dn   = Topological distance between atom x and y
 745 #
 746 # Then:
 747 #
 748 #   AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 749 #
 750 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
 751 #
 752 # Except for AS which is a required atomic invariant atom types AtomIDs, all other atomic invariants are
 753 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
 754 # AtomID specification doesn't include atomic invariants with zero or undefined values.
 755 #
 756 # Examples of  AtomIDs:
 757 #
 758 #   O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
 759 #   O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
 760 #   O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
 761 #   O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
 762 #
 763 #   C.X2.BO3.H1.Ar - Aromatic carbon
 764 #
 765 sub _InitializeAtomicInvariantsAtomTypesInformation {
 766   my($This) = @_;
 767 
 768   # Default atomic invariants to use for generating atom pair atom IDs: AS, X, BO, H, FC
 769   #
 770   @{$This->{AtomicInvariantsToUse}} = ();
 771   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
 772 
 773   return $This;
 774 }
 775 
 776 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
 777 # class, to use for generating atom identifiers...
 778 #
 779 # Let:
 780 #   HBD: HydrogenBondDonor
 781 #   HBA: HydrogenBondAcceptor
 782 #   PI :  PositivelyIonizable
 783 #   NI : NegativelyIonizable
 784 #   Ar : Aromatic
 785 #   Hal : Halogen
 786 #   H : Hydrophobic
 787 #   RA : RingAtom
 788 #   CA : ChainAtom
 789 #
 790 # Then:
 791 #
 792 #   Functiononal class atom type specification for an atom corresponds to:
 793 #
 794 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
 795 #
 796 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
 797 #
 798 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
 799 #
 800 #     HydrogenBondDonor: NH, NH2, OH
 801 #     HydrogenBondAcceptor: N[!H], O
 802 #     PositivelyIonizable: +, NH2
 803 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
 804 #
 805 sub _InitializeFunctionalClassAtomTypesInformation {
 806   my($This) = @_;
 807 
 808   # Default functional class atom typess to use for generating atom identifiers
 809   # are: HBD, HBA, PI, NI, Ar, Hal
 810   #
 811   @{$This->{FunctionalClassesToUse}} = ();
 812   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
 813 
 814   return $This;
 815 }
 816 
 817 # Set atomic invariants to use for atom IDs...
 818 #
 819 sub SetAtomicInvariantsToUse {
 820   my($This, @Values) = @_;
 821   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
 822 
 823   if (!@Values) {
 824     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
 825     return;
 826   }
 827 
 828   $FirstValue = $Values[0];
 829   $TypeOfFirstValue = ref $FirstValue;
 830 
 831   @SpecifiedAtomicInvariants = ();
 832   @AtomicInvariantsToUse = ();
 833 
 834   if ($TypeOfFirstValue =~ /^ARRAY/) {
 835     push @SpecifiedAtomicInvariants, @{$FirstValue};
 836   }
 837   else {
 838     push @SpecifiedAtomicInvariants, @Values;
 839   }
 840 
 841   # Make sure specified AtomicInvariants are valid...
 842   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
 843     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
 844       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
 845     }
 846     $AtomicInvariant = $SpecifiedAtomicInvariant;
 847     push @AtomicInvariantsToUse, $AtomicInvariant;
 848   }
 849 
 850   # Set atomic invariants to use...
 851   @{$This->{AtomicInvariantsToUse}} = ();
 852   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
 853 
 854   return $This;
 855 }
 856 
 857 # Set functional classes to use for generation of intial atom indentifiers...
 858 #
 859 sub SetFunctionalClassesToUse {
 860   my($This, @Values) = @_;
 861   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
 862 
 863   if (!@Values) {
 864     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
 865     return;
 866   }
 867 
 868   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
 869     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
 870     return;
 871   }
 872 
 873   $FirstValue = $Values[0];
 874   $TypeOfFirstValue = ref $FirstValue;
 875 
 876   @SpecifiedFunctionalClasses = ();
 877   @FunctionalClassesToUse = ();
 878 
 879   if ($TypeOfFirstValue =~ /^ARRAY/) {
 880     push @SpecifiedFunctionalClasses, @{$FirstValue};
 881   }
 882   else {
 883     push @SpecifiedFunctionalClasses, @Values;
 884   }
 885 
 886   # Make sure specified FunctionalClasses are valid...
 887   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
 888     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
 889       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
 890     }
 891     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
 892   }
 893 
 894   # Set functional classes to use...
 895   @{$This->{FunctionalClassesToUse}} = ();
 896   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
 897 
 898   return $This;
 899 }
 900 
 901 # Return a string containg data for AtomTypesFingerprints object...
 902 sub StringifyAtomTypesFingerprints {
 903   my($This) = @_;
 904   my($FingerprintsString, $IgnoreHydrogens);
 905 
 906   $FingerprintsString = "Type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}; AtomTypesSetToUse: $This->{AtomTypesSetToUse}";
 907 
 908   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 909     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
 910 
 911     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
 912     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
 913 
 914     for $AtomicInvariant (@AtomicInvariantsOrder) {
 915       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
 916     }
 917 
 918     $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
 919     $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
 920     $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
 921   }
 922   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 923     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
 924 
 925     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 926     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 927 
 928     for $FunctionalClass (@FunctionalClassesOrder) {
 929       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
 930     }
 931 
 932     $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
 933     $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
 934     $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
 935   }
 936 
 937 
 938   $IgnoreHydrogens = $This->{IgnoreHydrogens} ? "Yes" : "No";
 939   $FingerprintsString .= "; IgnoreHydrogens: $IgnoreHydrogens";
 940 
 941   if ($This->{Type} =~ /^AtomTypesCount$/i) {
 942     $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 943   }
 944   elsif ($This->{Type} =~ /^AtomTypesBits$/i) {
 945     $FingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >";
 946   }
 947 
 948   return $FingerprintsString;
 949 }
 950