MayaChemTools

   1 package Fingerprints::TopologicalAtomTripletsFingerprints;
   2 #
   3 # File: TopologicalAtomTripletsFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Fingerprints::Fingerprints;
  30 use TextUtil ();
  31 use Molecule;
  32 use AtomTypes::AtomicInvariantsAtomTypes;
  33 use AtomTypes::DREIDINGAtomTypes;
  34 use AtomTypes::EStateAtomTypes;
  35 use AtomTypes::FunctionalClassAtomTypes;
  36 use AtomTypes::MMFF94AtomTypes;
  37 use AtomTypes::SLogPAtomTypes;
  38 use AtomTypes::SYBYLAtomTypes;
  39 use AtomTypes::TPSAAtomTypes;
  40 use AtomTypes::UFFAtomTypes;
  41 
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  43 
  44 @ISA = qw(Fingerprints::Fingerprints Exporter);
  45 @EXPORT = qw();
  46 @EXPORT_OK = qw();
  47 
  48 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  49 
  50 # Setup class variables...
  51 my($ClassName);
  52 _InitializeClass();
  53 
  54 # Overload Perl functions...
  55 use overload '""' => 'StringifyTopologicalAtomTripletsFingerprints';
  56 
  57 # Class constructor...
  58 sub new {
  59   my($Class, %NamesAndValues) = @_;
  60 
  61   # Initialize object...
  62   my $This = $Class->SUPER::new();
  63   bless $This, ref($Class) || $Class;
  64   $This->_InitializeTopologicalAtomTripletsFingerprints();
  65 
  66   $This->_InitializeTopologicalAtomTripletsFingerprintsProperties(%NamesAndValues);
  67 
  68   return $This;
  69 }
  70 
  71 # Initialize object data...
  72 #
  73 sub _InitializeTopologicalAtomTripletsFingerprints {
  74   my($This) = @_;
  75 
  76   # Type of fingerprint...
  77   $This->{Type} = 'TopologicalAtomTriplets';
  78 
  79   # Type of vector...
  80   $This->{VectorType} = 'FingerprintsVector';
  81 
  82   # Type of FingerprintsVector...
  83   $This->{FingerprintsVectorType} = 'NumericalValues';
  84 
  85   # Minimum and maximum bond distance between atom paris...
  86   $This->{MinDistance} = 1;
  87   $This->{MaxDistance} = 10;
  88 
  89   # Determines whether to apply triangle inequality to distance triplets...
  90   #
  91   $This->{UseTriangleInequality} = 0;
  92 
  93   # Atom identifier type to use for atom IDs in atom triplets...
  94   #
  95   # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
  96   # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
  97   # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
  98   #
  99   $This->{AtomIdentifierType} = '';
 100 
 101   # Atom types assigned to each heavy atom...
 102   #
 103   %{$This->{AssignedAtomTypes}} = ();
 104 
 105   # All atom triplets between minimum and maximum distance...
 106   #
 107   @{$This->{AtomTripletsIDs}} = ();
 108   %{$This->{AtomTripletsCount}} = ();
 109 }
 110 
 111 # Initialize class ...
 112 sub _InitializeClass {
 113   #Class name...
 114   $ClassName = __PACKAGE__;
 115 }
 116 
 117 # Initialize object properties....
 118 sub _InitializeTopologicalAtomTripletsFingerprintsProperties {
 119   my($This, %NamesAndValues) = @_;
 120 
 121   my($Name, $Value, $MethodName);
 122   while (($Name, $Value) = each  %NamesAndValues) {
 123     $MethodName = "Set${Name}";
 124     $This->$MethodName($Value);
 125   }
 126 
 127   # Make sure molecule object was specified...
 128   if (!exists $NamesAndValues{Molecule}) {
 129     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 130   }
 131   if (!exists $NamesAndValues{AtomIdentifierType}) {
 132     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 133   }
 134 
 135   $This->_InitializeFingerprintsVector();
 136 
 137   return $This;
 138 }
 139 
 140 # Set minimum distance for atom triplets...
 141 #
 142 sub SetMinDistance {
 143   my($This, $Value) = @_;
 144 
 145   if (!TextUtil::IsPositiveInteger($Value)) {
 146     croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid:  It must be a positive integer...";
 147   }
 148   $This->{MinDistance} = $Value;
 149 
 150   return $This;
 151 }
 152 
 153 # Set maximum distance for atom triplets...
 154 #
 155 sub SetMaxDistance {
 156   my($This, $Value) = @_;
 157 
 158   if (!TextUtil::IsPositiveInteger($Value)) {
 159     croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid:  It must be a positive integer...";
 160   }
 161   $This->{MaxDistance} = $Value;
 162 
 163   return $This;
 164 }
 165 
 166 # Set atom identifier type..
 167 #
 168 sub SetAtomIdentifierType {
 169   my($This, $IdentifierType) = @_;
 170 
 171   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 172     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
 173   }
 174 
 175   if ($This->{AtomIdentifierType}) {
 176     croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 177   }
 178 
 179   $This->{AtomIdentifierType} = $IdentifierType;
 180 
 181   # Initialize atom identifier type information...
 182   $This->_InitializeAtomIdentifierTypeInformation();
 183 
 184   return $This;
 185 }
 186 
 187 # Generate fingerprints description...
 188 #
 189 sub GetDescription {
 190   my($This) = @_;
 191 
 192   # Is description explicity set?
 193   if (exists $This->{Description}) {
 194     return $This->{Description};
 195   }
 196 
 197   # Generate fingerprints description...
 198 
 199   return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
 200 }
 201 
 202 # Generate topological atom triplets fingerprints...
 203 #
 204 # Let:
 205 #
 206 #   AT = Any of the supported atom types
 207 #
 208 #   ATx = Atom type for  atom x
 209 #   ATy = Atom type for  atom y
 210 #   ATz = Atom type for  atom z
 211 #
 212 #   Dxy = Distance between Px and Py
 213 #   Dxz = Distance between Px and Pz
 214 #   Dyz = Distance between Py and Pz
 215 #
 216 # Then:
 217 #
 218 #   ATx-Dyz-ATy-Dxz-ATz-Dxy = Atom triplet ID for atom types ATx, ATy and Atz
 219 #
 220 # Methodology:
 221 #   . Generate a distance matrix.
 222 #   . Assign atom types to all the atoms.
 223 #   . Using distance matrix and atom types, count occurrence of unique atom triplets
 224 #     within specified distance range along with optional trinagle inequality
 225 #
 226 # Notes:
 227 #   . Hydrogen atoms are ignored during the fingerprint generation.
 228 #   . For a molecule containing N atoms with all different atom type, the total number of
 229 #     possible unique atom triplets without applying triangle inquality check corresponds to:
 230 #
 231 #     Factorial( N ) / ( Factorial( N - 3 ) * Factorial (3) )
 232 #
 233 #     However, due to similar atom types assigned to atoms in a molecule for a specific atom
 234 #     typing methodology and specified distance range used during fingerprints generation, the
 235 #     actual number of unique triplets is usually smaller than the theoretical limit.
 236 #
 237 sub GenerateFingerprints {
 238   my($This) = @_;
 239 
 240   if ($This->{MinDistance} > $This->{MaxDistance}) {
 241     croak "Error: ${ClassName}->GenerateTopologicalAtomTripletsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
 242   }
 243 
 244   # Cache appropriate molecule data...
 245   $This->_SetupMoleculeDataCache();
 246 
 247   # Generate distance matrix...
 248   if (!$This->_SetupDistanceMatrix()) {
 249     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix...";
 250     return $This;
 251   }
 252 
 253   # Assign atom types to all heavy atoms...
 254   if (!$This->_AssignAtomTypes()) {
 255     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 256     return $This;
 257   }
 258 
 259   # Intialize values of toplogical atom triplets...
 260   $This->_InitializeToplogicalAtomTriplets();
 261 
 262   # Count atom triplets...
 263   $This->_GenerateAndCountAtomTriplets();
 264 
 265   # Set final fingerprints...
 266   $This->_SetFinalFingerprints();
 267 
 268   # Clear cached molecule data...
 269   $This->_ClearMoleculeDataCache();
 270 
 271   return $This;
 272 }
 273 
 274 # Setup distance matrix...
 275 #
 276 sub _SetupDistanceMatrix {
 277   my($This) = @_;
 278 
 279   $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
 280 
 281   if (!$This->{DistanceMatrix}) {
 282     return undef;
 283   }
 284 
 285   return $This;
 286 }
 287 
 288 # Assign appropriate atom types to all heavy atoms...
 289 #
 290 sub _AssignAtomTypes {
 291   my($This) = @_;
 292   my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
 293 
 294   %{$This->{AssignedAtomTypes}} = ();
 295   $IgnoreHydrogens = 1;
 296 
 297   $SpecifiedAtomTypes = undef;
 298 
 299   IDENTIFIERTYPE: {
 300     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 301       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 302       last IDENTIFIERTYPE;
 303     }
 304 
 305     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 306       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 307       last IDENTIFIERTYPE;
 308     }
 309 
 310     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 311       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 312       last IDENTIFIERTYPE;
 313     }
 314 
 315     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 316       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 317       last IDENTIFIERTYPE;
 318     }
 319 
 320     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 321       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 322       last IDENTIFIERTYPE;
 323     }
 324 
 325     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 326       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 327       last IDENTIFIERTYPE;
 328     }
 329     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 330       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 331       last IDENTIFIERTYPE;
 332     }
 333 
 334     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 335       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 336       last IDENTIFIERTYPE;
 337     }
 338 
 339     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 340       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 341       last IDENTIFIERTYPE;
 342     }
 343 
 344     croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 345   }
 346 
 347   # Assign atom types...
 348   $SpecifiedAtomTypes->AssignAtomTypes();
 349 
 350   # Make sure atom types assignment is successful...
 351   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 352     return undef;
 353   }
 354 
 355   # Collect assigned atom types...
 356   ATOM: for $Atom (@{$This->{Atoms}}) {
 357     if ($Atom->IsHydrogen()) {
 358       next ATOM;
 359     }
 360     $AtomID = $Atom->GetID();
 361     $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
 362   }
 363 
 364   return $This;
 365 }
 366 
 367 # Initialize topological atom triplets between specified distance range...
 368 #
 369 sub _InitializeToplogicalAtomTriplets {
 370   my($This) = @_;
 371   my($Distance);
 372 
 373   @{$This->{AtomTripletsIDs}} = ();
 374   %{$This->{AtomTripletsCount}} = ();
 375 
 376   return $This;
 377 }
 378 
 379 # Count atom triplets between mininum and maximum distance at each
 380 # distance using distance matrix and atom types assiged to each heavy
 381 # atom.
 382 #
 383 sub _GenerateAndCountAtomTriplets {
 384   my($This) = @_;
 385   my($NumOfAtoms, $AtomIndex1, $AtomIndex2, $AtomIndex3, $AtomID1, $AtomID2, $AtomID3, $AtomType1, $AtomType2, $AtomType3, $Distance12, $Distance13, $Distance23, $SkipIndexCheck, $DistanceMatrix, $AtomTripletID);
 386 
 387   $NumOfAtoms = @{$This->{Atoms}};
 388   $DistanceMatrix = $This->{DistanceMatrix};
 389   $SkipIndexCheck = 0;
 390 
 391   ATOMINDEX1: for $AtomIndex1 (0 .. ($NumOfAtoms - 1)) {
 392     $AtomID1 = $This->{AtomIndexToID}{$AtomIndex1};
 393     if (!exists($This->{AssignedAtomTypes}{$AtomID1})) {
 394       next ATOMINDEX1;
 395     }
 396     $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1};
 397 
 398     ATOMINDEX2: for $AtomIndex2 (($AtomIndex1 + 1) .. ($NumOfAtoms - 1)) {
 399       $AtomID2 = $This->{AtomIndexToID}{$AtomIndex2};
 400       if (!exists($This->{AssignedAtomTypes}{$AtomID2})) {
 401         next ATOMINDEX2;
 402       }
 403       $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2};
 404 
 405       $Distance12 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex2, $SkipIndexCheck);
 406       if ($Distance12 < $This->{MinDistance} || $Distance12 > $This->{MaxDistance}) {
 407         next ATOMINDEX2;
 408       }
 409 
 410       ATOMINDEX3: for $AtomIndex3 (($AtomIndex2 + 1) .. ($NumOfAtoms - 1)) {
 411         $AtomID3 = $This->{AtomIndexToID}{$AtomIndex3};
 412         if (!exists($This->{AssignedAtomTypes}{$AtomID3})) {
 413           next ATOMINDEX3;
 414         }
 415         $AtomType3 = $This->{AssignedAtomTypes}{$AtomID3};
 416 
 417         $Distance13 = $DistanceMatrix->GetValue($AtomIndex1, $AtomIndex3, $SkipIndexCheck);
 418         $Distance23 = $DistanceMatrix->GetValue($AtomIndex2, $AtomIndex3, $SkipIndexCheck);
 419 
 420         if ($Distance13 < $This->{MinDistance} || $Distance13 > $This->{MaxDistance}) {
 421           next ATOMINDEX3;
 422         }
 423         if ($Distance23 < $This->{MinDistance} || $Distance23 > $This->{MaxDistance}) {
 424           next ATOMINDEX3;
 425         }
 426         if ($This->{UseTriangleInequality} && !$This->_DoDistancesSatisfyTriangleInequality($Distance12, $Distance13, $Distance23)) {
 427           next ATOMINDEX3;
 428         }
 429 
 430         $AtomTripletID = $This->_GetAtomTripletID($AtomType1, $Distance23, $AtomType2, $Distance13, $AtomType3, $Distance12);
 431         if (!exists $This->{AtomTripletsCount}{$AtomTripletID}) {
 432           $This->{AtomTripletsCount}{$AtomTripletID} = 0;
 433         }
 434         $This->{AtomTripletsCount}{$AtomTripletID} += 1;
 435       }
 436     }
 437   }
 438   return $This;
 439 }
 440 
 441 # Check triangle inequality...
 442 #
 443 sub _DoDistancesSatisfyTriangleInequality {
 444   my($This, $Distance1, $Distance2, $Distance3) = @_;
 445 
 446   if ( !($Distance1 > abs($Distance2 - $Distance3) && $Distance1 < ($Distance2 + $Distance3)) ) {
 447     return 0;
 448   }
 449   if ( !($Distance2 > abs($Distance1 - $Distance3) && $Distance2 < ($Distance1 + $Distance3)) ) {
 450     return 0;
 451   }
 452   if ( !($Distance3 > abs($Distance1 - $Distance2) && $Distance3 < ($Distance1 + $Distance2)) ) {
 453     return 0;
 454   }
 455   return 1;
 456 }
 457 
 458 # Get atom triplet ID corresponding to atom types and distances corresponding to atom triplet...
 459 #
 460 sub _GetAtomTripletID {
 461   my($This, $ATx, $Dyz, $ATy, $Dxz, $ATz, $Dxy) = @_;
 462   my($AtomTripletID, @AtomIDs);
 463 
 464   @AtomIDs = ();
 465 
 466   @AtomIDs = sort("${ATx}-D${Dyz}", "${ATy}-D${Dxz}", "${ATz}-D${Dxy}");
 467   $AtomTripletID = join "-", @AtomIDs;
 468 
 469   return $AtomTripletID;
 470 }
 471 
 472 # Set final fingerpritns vector...
 473 #
 474 sub _SetFinalFingerprints {
 475   my($This) = @_;
 476   my($AtomTripletID, $Value, @Values);
 477 
 478   # Mark successful generation of fingerprints...
 479   $This->{FingerprintsGenerated} = 1;
 480 
 481   @Values = ();
 482   @{$This->{AtomTripletsIDs}} = ();
 483 
 484   for $AtomTripletID (sort keys %{$This->{AtomTripletsCount}}) {
 485     push @{$This->{AtomTripletsIDs}}, $AtomTripletID;
 486     $Value = $This->{AtomTripletsCount}{$AtomTripletID};
 487     push @Values, $Value;
 488   }
 489 
 490   # Add AtomTripletsIDs and values to fingerprint vector...
 491   $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomTripletsIDs}});
 492   $This->{FingerprintsVector}->AddValues(\@Values);
 493 
 494   return $This;
 495 }
 496 
 497 # Get atom triplet IDs corresponding to atom triplets count values in fingerprint
 498 # vector as an array or reference to an array...
 499 #
 500 # AtomTripletIDs list differes in molecules and is generated during finalization
 501 # of fingerprints to make sure the fingerprint vector containing count values
 502 # matches the atom triplets array.
 503 #
 504 sub GetAtomTripletIDs {
 505   my($This) = @_;
 506 
 507   return wantarray ? @{$This->{AtomTripletsIDs}} : \@{$This->{AtomTripletsIDs}};
 508 }
 509 
 510 # Cache  appropriate molecule data...
 511 #
 512 sub _SetupMoleculeDataCache {
 513   my($This) = @_;
 514 
 515   # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
 516   # usage of distance matrix. The hydrogen atoms are ignored during processing...
 517   #
 518   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 519 
 520   # Get all atom IDs...
 521   my(@AtomIDs);
 522   @AtomIDs = ();
 523   @AtomIDs =  map { $_->GetID() } @{$This->{Atoms}};
 524 
 525   # Set AtomIndex to AtomID hash...
 526   %{$This->{AtomIndexToID}} = ();
 527   @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
 528 
 529   return $This;
 530 }
 531 
 532 # Set atomic invariants to use for atom identifiers...
 533 #
 534 sub SetAtomicInvariantsToUse {
 535   my($This, @Values) = @_;
 536   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
 537 
 538   if (!@Values) {
 539     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
 540     return;
 541   }
 542 
 543   $FirstValue = $Values[0];
 544   $TypeOfFirstValue = ref $FirstValue;
 545 
 546   @SpecifiedAtomicInvariants = ();
 547   @AtomicInvariantsToUse = ();
 548 
 549   if ($TypeOfFirstValue =~ /^ARRAY/) {
 550     push @SpecifiedAtomicInvariants, @{$FirstValue};
 551   }
 552   else {
 553     push @SpecifiedAtomicInvariants, @Values;
 554   }
 555 
 556   # Make sure specified AtomicInvariants are valid...
 557   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
 558     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
 559       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
 560     }
 561     $AtomicInvariant = $SpecifiedAtomicInvariant;
 562     push @AtomicInvariantsToUse, $AtomicInvariant;
 563   }
 564 
 565   # Set atomic invariants to use...
 566   @{$This->{AtomicInvariantsToUse}} = ();
 567   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
 568 
 569   return $This;
 570 }
 571 
 572 # Set functional classes to use for atom identifiers...
 573 #
 574 sub SetFunctionalClassesToUse {
 575   my($This, @Values) = @_;
 576   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
 577 
 578   if (!@Values) {
 579     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
 580     return;
 581   }
 582 
 583   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
 584     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
 585     return;
 586   }
 587 
 588   $FirstValue = $Values[0];
 589   $TypeOfFirstValue = ref $FirstValue;
 590 
 591   @SpecifiedFunctionalClasses = ();
 592   @FunctionalClassesToUse = ();
 593 
 594   if ($TypeOfFirstValue =~ /^ARRAY/) {
 595     push @SpecifiedFunctionalClasses, @{$FirstValue};
 596   }
 597   else {
 598     push @SpecifiedFunctionalClasses, @Values;
 599   }
 600 
 601   # Make sure specified FunctionalClasses are valid...
 602   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
 603     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
 604       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
 605     }
 606     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
 607   }
 608 
 609   # Set functional classes to use...
 610   @{$This->{FunctionalClassesToUse}} = ();
 611   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
 612 
 613   return $This;
 614 }
 615 
 616 # Initialize atom indentifier type information...
 617 #
 618 # Current supported values:
 619 #
 620 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
 621 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 622 #
 623 sub _InitializeAtomIdentifierTypeInformation {
 624   my($This) = @_;
 625 
 626   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 627     $This->_InitializeAtomicInvariantsAtomTypesInformation();
 628   }
 629   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 630     $This->_InitializeFunctionalClassAtomTypesInformation();
 631   }
 632   elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 633     # Nothing to do for now...
 634   }
 635   else {
 636     croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 637   }
 638 
 639   return $This;
 640 }
 641 
 642 # Initialize atomic invariants atom types to use for generating atom IDs in atom triplets...
 643 #
 644 # Let:
 645 #   AS = Atom symbol corresponding to element symbol
 646 #
 647 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 648 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 649 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 650 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 651 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 652 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 653 #   H<n>   = Number of implicit and explicit hydrogens for atom
 654 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 655 #   RA     = Ring atom annotation indicating whether atom is a ring
 656 #   FC<+n/-n> = Formal charge assigned to atom
 657 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 658 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 659 #
 660 #   ATx = Atomic invariants atom type for atom x
 661 #   ATy = Atomic invariants atom type for atom y
 662 #   ATz = Atomic invariants atom type for atom z
 663 #
 664 #   Dxy = Distance between Px and Py
 665 #   Dxz = Distance between Px and Pz
 666 #   Dyz = Distance between Py and Pz
 667 #
 668 # Then:
 669 #
 670 #   Atom triplet AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 671 #
 672 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
 673 #
 674 #  Toplogical atom triplet ID between atom IDs ATx, ATy and ATz corresponds to:
 675 #
 676 #    ATx-Dyz-ATy-Dxz-ATz-Dxy
 677 #
 678 # Except for AS which is a required atomic invariant in atom triplet AtomIDs, all other atomic invariants are
 679 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
 680 # AtomID specification doesn't include atomic invariants with zero or undefined values.
 681 #
 682 # Examples of atom triplet AtomIDs:
 683 #
 684 #   O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
 685 #   O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
 686 #   O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
 687 #   O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
 688 #
 689 #   C.X2.BO3.H1.Ar - Aromatic carbon
 690 #
 691 sub _InitializeAtomicInvariantsAtomTypesInformation {
 692   my($This) = @_;
 693 
 694   # Default atomic invariants to use for generating atom triplet atom IDs: AS, X, BO, H, FC
 695   #
 696   @{$This->{AtomicInvariantsToUse}} = ();
 697   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
 698 
 699   return $This;
 700 }
 701 
 702 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
 703 # class, to use for generating atom identifiers...
 704 #
 705 # Let:
 706 #   HBD: HydrogenBondDonor
 707 #   HBA: HydrogenBondAcceptor
 708 #   PI :  PositivelyIonizable
 709 #   NI : NegativelyIonizable
 710 #   Ar : Aromatic
 711 #   Hal : Halogen
 712 #   H : Hydrophobic
 713 #   RA : RingAtom
 714 #   CA : ChainAtom
 715 #
 716 # Then:
 717 #
 718 #   Functiononal class atom type specification for an atom corresponds to:
 719 #
 720 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
 721 #
 722 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
 723 #
 724 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
 725 #
 726 #     HydrogenBondDonor: NH, NH2, OH
 727 #     HydrogenBondAcceptor: N[!H], O
 728 #     PositivelyIonizable: +, NH2
 729 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
 730 #
 731 sub _InitializeFunctionalClassAtomTypesInformation {
 732   my($This) = @_;
 733 
 734   # Default functional class atom typess to use for generating atom identifiers
 735   # are: HBD, HBA, PI, NI, Ar, Hal
 736   #
 737   @{$This->{FunctionalClassesToUse}} = ();
 738   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
 739 
 740   return $This;
 741 }
 742 
 743 # Clear cached molecule data...
 744 #
 745 sub _ClearMoleculeDataCache {
 746   my($This) = @_;
 747 
 748   @{$This->{Atoms}} = ();
 749 
 750   return $This;
 751 }
 752 
 753 # Return a string containg data for TopologicalAtomTripletsFingerprints object...
 754 #
 755 sub StringifyTopologicalAtomTripletsFingerprints {
 756   my($This) = @_;
 757   my($FingerprintsString);
 758 
 759   # Type of fingerprint...
 760   $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
 761 
 762   # Min and max distance...
 763   $FingerprintsString .= "; MinDistance:  $This->{MinDistance}; MaxDistance: $This->{MaxDistance}; UseTriangleInequality: " . ($This->{UseTriangleInequality} ? "Yes" : "No");
 764 
 765   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 766     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
 767 
 768     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
 769     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
 770 
 771     for $AtomicInvariant (@AtomicInvariantsOrder) {
 772       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
 773     }
 774 
 775     $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
 776     $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
 777     $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
 778   }
 779   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 780     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
 781 
 782     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 783     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 784 
 785     for $FunctionalClass (@FunctionalClassesOrder) {
 786       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
 787     }
 788 
 789     $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
 790     $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
 791     $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
 792   }
 793 
 794   # Total number of atom triplets...
 795   $FingerprintsString .= "; NumOfAtomTriplets: " . $This->{FingerprintsVector}->GetNumOfValues();
 796 
 797   # FingerprintsVector...
 798   $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 799 
 800   return $FingerprintsString;
 801 }
 802