MayaChemTools

   1 package Fingerprints::TopologicalAtomPairsFingerprints;
   2 #
   3 # File: TopologicalAtomPairsFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Fingerprints::Fingerprints;
  30 use TextUtil ();
  31 use Molecule;
  32 use AtomTypes::AtomicInvariantsAtomTypes;
  33 use AtomTypes::DREIDINGAtomTypes;
  34 use AtomTypes::EStateAtomTypes;
  35 use AtomTypes::FunctionalClassAtomTypes;
  36 use AtomTypes::MMFF94AtomTypes;
  37 use AtomTypes::SLogPAtomTypes;
  38 use AtomTypes::SYBYLAtomTypes;
  39 use AtomTypes::TPSAAtomTypes;
  40 use AtomTypes::UFFAtomTypes;
  41 
  42 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  43 
  44 @ISA = qw(Fingerprints::Fingerprints Exporter);
  45 @EXPORT = qw();
  46 @EXPORT_OK = qw();
  47 
  48 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  49 
  50 # Setup class variables...
  51 my($ClassName);
  52 _InitializeClass();
  53 
  54 # Overload Perl functions...
  55 use overload '""' => 'StringifyTopologicalAtomPairsFingerprints';
  56 
  57 # Class constructor...
  58 sub new {
  59   my($Class, %NamesAndValues) = @_;
  60 
  61   # Initialize object...
  62   my $This = $Class->SUPER::new();
  63   bless $This, ref($Class) || $Class;
  64   $This->_InitializeTopologicalAtomPairsFingerprints();
  65 
  66   $This->_InitializeTopologicalAtomPairsFingerprintsProperties(%NamesAndValues);
  67 
  68   return $This;
  69 }
  70 
  71 # Initialize object data...
  72 #
  73 sub _InitializeTopologicalAtomPairsFingerprints {
  74   my($This) = @_;
  75 
  76   # Type of fingerprint...
  77   $This->{Type} = 'TopologicalAtomPairs';
  78 
  79   # Type of vector...
  80   $This->{VectorType} = 'FingerprintsVector';
  81 
  82   # Type of FingerprintsVector...
  83   $This->{FingerprintsVectorType} = 'NumericalValues';
  84 
  85   # Minimum and maximum bond distance between atom paris...
  86   $This->{MinDistance} = 1;
  87   $This->{MaxDistance} = 10;
  88 
  89   # Atom identifier type to use for atom IDs in atom pairs...
  90   #
  91   # Currently supported values are: AtomicInvariantsAtomTypes, DREIDINGAtomTypes,
  92   # EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes,
  93   # SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
  94   #
  95   $This->{AtomIdentifierType} = '';
  96 
  97   # Atom types assigned to each heavy atom...
  98   #
  99   %{$This->{AssignedAtomTypes}} = ();
 100 
 101   # All atom pairs between minimum and maximum distance...
 102   #
 103   @{$This->{AtomPairsIDs}} = ();
 104   %{$This->{AtomPairsCount}} = ();
 105 }
 106 
 107 # Initialize class ...
 108 sub _InitializeClass {
 109   #Class name...
 110   $ClassName = __PACKAGE__;
 111 }
 112 
 113 # Initialize object properties....
 114 sub _InitializeTopologicalAtomPairsFingerprintsProperties {
 115   my($This, %NamesAndValues) = @_;
 116 
 117   my($Name, $Value, $MethodName);
 118   while (($Name, $Value) = each  %NamesAndValues) {
 119     $MethodName = "Set${Name}";
 120     $This->$MethodName($Value);
 121   }
 122 
 123   # Make sure molecule object was specified...
 124   if (!exists $NamesAndValues{Molecule}) {
 125     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 126   }
 127   if (!exists $NamesAndValues{AtomIdentifierType}) {
 128     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 129   }
 130 
 131   $This->_InitializeFingerprintsVector();
 132 
 133   return $This;
 134 }
 135 
 136 # Set minimum distance for atom pairs...
 137 #
 138 sub SetMinDistance {
 139   my($This, $Value) = @_;
 140 
 141   if (!TextUtil::IsPositiveInteger($Value)) {
 142     croak "Error: ${ClassName}->SetMinDistance: MinDistance value, $Value, is not valid:  It must be a positive integer...";
 143   }
 144   $This->{MinDistance} = $Value;
 145 
 146   return $This;
 147 }
 148 
 149 # Set maximum distance for atom pairs...
 150 #
 151 sub SetMaxDistance {
 152   my($This, $Value) = @_;
 153 
 154   if (!TextUtil::IsPositiveInteger($Value)) {
 155     croak "Error: ${ClassName}->SetMaxDistance: MaxDistance value, $Value, is not valid:  It must be a positive integer...";
 156   }
 157   $This->{MaxDistance} = $Value;
 158 
 159   return $This;
 160 }
 161 
 162 # Set atom identifier type..
 163 #
 164 sub SetAtomIdentifierType {
 165   my($This, $IdentifierType) = @_;
 166 
 167   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 168     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, and UFFAtomTypes.";
 169   }
 170 
 171   if ($This->{AtomIdentifierType}) {
 172     croak "Error: ${ClassName}->SeAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 173   }
 174 
 175   $This->{AtomIdentifierType} = $IdentifierType;
 176 
 177   # Initialize atom identifier type information...
 178   $This->_InitializeAtomIdentifierTypeInformation();
 179 
 180   return $This;
 181 }
 182 
 183 # Generate fingerprints description...
 184 #
 185 sub GetDescription {
 186   my($This) = @_;
 187 
 188   # Is description explicity set?
 189   if (exists $This->{Description}) {
 190     return $This->{Description};
 191   }
 192 
 193   # Generate fingerprints description...
 194 
 195   return "$This->{Type}:$This->{AtomIdentifierType}:MinDistance$This->{MinDistance}:MaxDistance$This->{MaxDistance}";
 196 }
 197 
 198 # Generate topological atom pairs [ Ref 57, Ref 59, Ref 72 ] fingerprints...
 199 #
 200 # Methodology:
 201 #   . Generate a distance matrix.
 202 #   . Assign atom types to all the atoms.
 203 #   . Using distance matrix and atom types, count occurrence of
 204 #     unique atom pairs within specified distance range - It corresponds to the
 205 #     correlation-vector for the atom pairs.
 206 #
 207 # Notes:
 208 #   . Hydrogen atoms are ignored during the fingerprint generation.
 209 #
 210 sub GenerateFingerprints {
 211   my($This) = @_;
 212 
 213   if ($This->{MinDistance} > $This->{MaxDistance}) {
 214     croak "Error: ${ClassName}->GenerateTopologicalAtomPairsFingerprints: No fingerpritns generated: MinDistance, $This->{MinDistance}, must be <= MaxDistance, $This->{MaxDistance}...";
 215   }
 216 
 217   # Cache appropriate molecule data...
 218   $This->_SetupMoleculeDataCache();
 219 
 220   # Generate distance matrix...
 221   if (!$This->_SetupDistanceMatrix()) {
 222     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't generate distance matrix...";
 223     return $This;
 224   }
 225 
 226   # Assign atom types to all heavy atoms...
 227   if (!$This->_AssignAtomTypes()) {
 228     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 229     return $This;
 230   }
 231 
 232   # Intialize values of toplogical atom pairs...
 233   $This->_InitializeToplogicalAtomPairs();
 234 
 235   # Count atom pairs...
 236   $This->_GenerateAndCountAtomPairs();
 237 
 238   # Set final fingerprints...
 239   $This->_SetFinalFingerprints();
 240 
 241   # Clear cached molecule data...
 242   $This->_ClearMoleculeDataCache();
 243 
 244   return $This;
 245 }
 246 
 247 # Setup distance matrix...
 248 #
 249 sub _SetupDistanceMatrix {
 250   my($This) = @_;
 251 
 252   $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
 253 
 254   if (!$This->{DistanceMatrix}) {
 255     return undef;
 256   }
 257 
 258   return $This;
 259 }
 260 
 261 # Assign appropriate atom types to all heavy atoms...
 262 #
 263 sub _AssignAtomTypes {
 264   my($This) = @_;
 265   my($SpecifiedAtomTypes, $Atom, $AtomID, $IgnoreHydrogens);
 266 
 267   %{$This->{AssignedAtomTypes}} = ();
 268   $IgnoreHydrogens = 1;
 269 
 270   $SpecifiedAtomTypes = undef;
 271 
 272   IDENTIFIERTYPE: {
 273     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 274       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 275       last IDENTIFIERTYPE;
 276     }
 277 
 278     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 279       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 280       last IDENTIFIERTYPE;
 281     }
 282 
 283     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 284       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 285       last IDENTIFIERTYPE;
 286     }
 287 
 288     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 289       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 290       last IDENTIFIERTYPE;
 291     }
 292 
 293     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 294       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 295       last IDENTIFIERTYPE;
 296     }
 297 
 298     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 299       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 300       last IDENTIFIERTYPE;
 301     }
 302     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 303       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 304       last IDENTIFIERTYPE;
 305     }
 306 
 307     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 308       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 309       last IDENTIFIERTYPE;
 310     }
 311 
 312     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 313       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 314       last IDENTIFIERTYPE;
 315     }
 316 
 317     croak "Error: ${ClassName}->_AssignAtomTypes: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 318   }
 319 
 320   # Assign atom types...
 321   $SpecifiedAtomTypes->AssignAtomTypes();
 322 
 323   # Make sure atom types assignment is successful...
 324   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 325     return undef;
 326   }
 327 
 328   # Collect assigned atom types...
 329   ATOM: for $Atom (@{$This->{Atoms}}) {
 330     if ($Atom->IsHydrogen()) {
 331       next ATOM;
 332     }
 333     $AtomID = $Atom->GetID();
 334     $This->{AssignedAtomTypes}{$AtomID} = $SpecifiedAtomTypes->GetAtomType($Atom);
 335   }
 336 
 337   return $This;
 338 }
 339 
 340 # Initialize topological atom pairs between specified distance range...
 341 #
 342 sub _InitializeToplogicalAtomPairs {
 343   my($This) = @_;
 344   my($Distance);
 345 
 346   @{$This->{AtomPairsIDs}} = ();
 347   %{$This->{AtomPairsCount}} = ();
 348 
 349   for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
 350     %{$This->{AtomPairsCount}{$Distance}} = ();
 351   }
 352 
 353   return $This;
 354 }
 355 
 356 # Count atom pairs between mininum and maximum distance at each
 357 # distance using distance matrix and atom types assiged to each heavy
 358 # atom.
 359 #
 360 # Notes:
 361 #   . The row and column indices of distance matrix correspond to atom indices.
 362 #   . Distance value of BigNumber implies the atom is not connected to any other atom.
 363 #   . Due to symmetric nature of distance matrix, only upper or lower triangular matrix
 364 #     needs to be processed during identification and count of atom pairs.
 365 #
 366 sub _GenerateAndCountAtomPairs {
 367   my($This) = @_;
 368 
 369   my($NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $DistanceMatrix, $Distance, $AtomID1, $AtomID2, $AtomType1, $AtomType2, $SkipIndexCheck, $CountIncrement);
 370 
 371   $DistanceMatrix = $This->{DistanceMatrix};
 372   ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
 373   $SkipIndexCheck = 0;
 374 
 375   ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
 376     $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
 377     if ( !(exists($This->{AssignedAtomTypes}{$AtomID1})) ) {
 378       next ROWINDEX;
 379     }
 380     $AtomType1 = $This->{AssignedAtomTypes}{$AtomID1};
 381 
 382     COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) {
 383       $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
 384       if ( !(exists($This->{AssignedAtomTypes}{$AtomID2})) ) {
 385         next COLINDEX;
 386       }
 387       $Distance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
 388       if ($Distance < $This->{MinDistance} || $Distance > $This->{MaxDistance}) {
 389         next COLINDEX;
 390       }
 391       $AtomType2 = $This->{AssignedAtomTypes}{$AtomID2};
 392 
 393       if ($AtomType1 le $AtomType2) {
 394         $This->_SetAtomPairsCount($Distance, $AtomType1, $AtomType2);
 395       }
 396       else {
 397         $This->_SetAtomPairsCount($Distance, $AtomType2, $AtomType1);
 398       }
 399     }
 400   }
 401   return $This;
 402 }
 403 
 404 # Set atom paris count for a specific atom ID pair at a specific distance...
 405 #
 406 sub _SetAtomPairsCount {
 407   my($This, $Distance, $AtomType1, $AtomType2) = @_;
 408 
 409   if (! exists $This->{AtomPairsCount}{$Distance}{$AtomType1}) {
 410     %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} = ();
 411     $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1;
 412     return $This;
 413   }
 414 
 415   if (exists $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2}) {
 416     $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} += 1;
 417   }
 418   else {
 419     $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2} = 1;
 420   }
 421 
 422   return $This;
 423 }
 424 
 425 # Set final fingerpritns vector...
 426 #
 427 sub _SetFinalFingerprints {
 428   my($This) = @_;
 429   my($Distance, $AtomType1, $AtomType2, $Value, @Values);
 430 
 431   # Mark successful generation of fingerprints...
 432   $This->{FingerprintsGenerated} = 1;
 433 
 434   @Values = ();
 435   @{$This->{AtomPairsIDs}} = ();
 436 
 437   for $Distance ($This->{MinDistance} .. $This->{MaxDistance}) {
 438     for $AtomType1 (sort keys %{$This->{AtomPairsCount}{$Distance}} ) {
 439       for $AtomType2 (sort keys %{$This->{AtomPairsCount}{$Distance}{$AtomType1}} ) {
 440         push @{$This->{AtomPairsIDs}}, "${AtomType1}-D${Distance}-${AtomType2}";
 441         $Value = $This->{AtomPairsCount}{$Distance}{$AtomType1}{$AtomType2};
 442         push @Values, $Value;
 443       }
 444     }
 445   }
 446 
 447   # Add AtomPairsIDs and values to fingerprint vector...
 448   $This->{FingerprintsVector}->AddValueIDs(\@{$This->{AtomPairsIDs}});
 449   $This->{FingerprintsVector}->AddValues(\@Values);
 450 
 451   return $This;
 452 }
 453 
 454 # Get atom pair IDs corresponding to atom pairs count values in fingerprint
 455 # vector as an array or reference to an array...
 456 #
 457 # AtomPairIDs list differes in molecules and is generated during finalization
 458 # of fingerprints to make sure the fingerprint vector containing count values
 459 # matches the atom pairs array.
 460 #
 461 sub GetAtomPairIDs {
 462   my($This) = @_;
 463 
 464   return wantarray ? @{$This->{AtomPairsIDs}} : \@{$This->{AtomPairsIDs}};
 465 }
 466 
 467 # Cache  appropriate molecule data...
 468 #
 469 sub _SetupMoleculeDataCache {
 470   my($This) = @_;
 471 
 472   # Get all atoms including hydrogens to correctly map atom indices to atom IDs for
 473   # usage of distance matrix. The hydrogen atoms are ignored during processing...
 474   #
 475   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 476 
 477   # Get all atom IDs...
 478   my(@AtomIDs);
 479   @AtomIDs = ();
 480   @AtomIDs =  map { $_->GetID() } @{$This->{Atoms}};
 481 
 482   # Set AtomIndex to AtomID hash...
 483   %{$This->{AtomIndexToID}} = ();
 484   @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
 485 
 486   return $This;
 487 }
 488 
 489 # Clear cached molecule data...
 490 #
 491 sub _ClearMoleculeDataCache {
 492   my($This) = @_;
 493 
 494   @{$This->{Atoms}} = ();
 495 
 496   return $This;
 497 }
 498 
 499 # Set atomic invariants to use for atom identifiers...
 500 #
 501 sub SetAtomicInvariantsToUse {
 502   my($This, @Values) = @_;
 503   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
 504 
 505   if (!@Values) {
 506     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
 507     return;
 508   }
 509 
 510   $FirstValue = $Values[0];
 511   $TypeOfFirstValue = ref $FirstValue;
 512 
 513   @SpecifiedAtomicInvariants = ();
 514   @AtomicInvariantsToUse = ();
 515 
 516   if ($TypeOfFirstValue =~ /^ARRAY/) {
 517     push @SpecifiedAtomicInvariants, @{$FirstValue};
 518   }
 519   else {
 520     push @SpecifiedAtomicInvariants, @Values;
 521   }
 522 
 523   # Make sure specified AtomicInvariants are valid...
 524   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
 525     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
 526       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
 527     }
 528     $AtomicInvariant = $SpecifiedAtomicInvariant;
 529     push @AtomicInvariantsToUse, $AtomicInvariant;
 530   }
 531 
 532   # Set atomic invariants to use...
 533   @{$This->{AtomicInvariantsToUse}} = ();
 534   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
 535 
 536   return $This;
 537 }
 538 
 539 # Set functional classes to use for atom identifiers...
 540 #
 541 sub SetFunctionalClassesToUse {
 542   my($This, @Values) = @_;
 543   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
 544 
 545   if (!@Values) {
 546     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
 547     return;
 548   }
 549 
 550   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
 551     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
 552     return;
 553   }
 554 
 555   $FirstValue = $Values[0];
 556   $TypeOfFirstValue = ref $FirstValue;
 557 
 558   @SpecifiedFunctionalClasses = ();
 559   @FunctionalClassesToUse = ();
 560 
 561   if ($TypeOfFirstValue =~ /^ARRAY/) {
 562     push @SpecifiedFunctionalClasses, @{$FirstValue};
 563   }
 564   else {
 565     push @SpecifiedFunctionalClasses, @Values;
 566   }
 567 
 568   # Make sure specified FunctionalClasses are valid...
 569   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
 570     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
 571       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
 572     }
 573     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
 574   }
 575 
 576   # Set functional classes to use...
 577   @{$This->{FunctionalClassesToUse}} = ();
 578   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
 579 
 580   return $This;
 581 }
 582 
 583 # Initialize atom indentifier type information...
 584 #
 585 # Current supported values:
 586 #
 587 # AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes,
 588 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 589 #
 590 sub _InitializeAtomIdentifierTypeInformation {
 591   my($This) = @_;
 592 
 593   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 594     $This->_InitializeAtomicInvariantsAtomTypesInformation();
 595   }
 596   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 597     $This->_InitializeFunctionalClassAtomTypesInformation();
 598   }
 599   elsif ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 600     # Nothing to do for now...
 601   }
 602   else {
 603     croak "Error: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 604   }
 605 
 606   return $This;
 607 }
 608 
 609 # Initialize atomic invariants atom types to use for generating atom identifiers...
 610 #
 611 # Let:
 612 #   AS = Atom symbol corresponding to element symbol
 613 #
 614 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 615 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 616 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 617 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 618 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 619 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 620 #   H<n>   = Number of implicit and explicit hydrogens for atom
 621 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 622 #   RA     = Ring atom annotation indicating whether atom is a ring
 623 #   FC<+n/-n> = Formal charge assigned to atom
 624 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 625 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 626 #
 627 #   AtomTypeIDx = Atomic invariants atom type for atom x
 628 #   AtomTypeIDy = Atomic invariants atom type for atom y
 629 #   Dn   = Topological distance between atom x and y
 630 #
 631 # Then:
 632 #
 633 #   Atom pair AtomID generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
 634 #
 635 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
 636 #
 637 #  AtomPairID corresponds to:
 638 #
 639 #    AtomTypeIDx-D<n>-AtomTypeIDy
 640 #
 641 # Except for AS which is a required atomic invariant in atom pair AtomIDs, all other atomic invariants are
 642 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
 643 # AtomID specification doesn't include atomic invariants with zero or undefined values.
 644 #
 645 # Examples of atom pair AtomIDs:
 646 #
 647 #   O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
 648 #   O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
 649 #   O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
 650 #   O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
 651 #
 652 #   C.X2.BO3.H1.Ar - Aromatic carbon
 653 #
 654 # Examples of AtomPairIDs:
 655 #
 656 #   C.X2.BO2.H3-D1-O.X1.BO1 - Carbon with two heavy atom neighbors attached to oxygen at bond distance 1(methanol)
 657 #
 658 #   C.X2.BO3.H1.Ar-D3-C.X2.BO3.H1.Ar  - Two aromatic carbons at bond distance 3 where each carbon has
 659 #                                       two heavy atom neighbors and bond order of 3 (benzene)
 660 #
 661 sub _InitializeAtomicInvariantsAtomTypesInformation {
 662   my($This) = @_;
 663 
 664   # Default atomic invariants to use for generating atom neighborhood atom IDs: AS, X, BO, H, FC
 665   #
 666   @{$This->{AtomicInvariantsToUse}} = ();
 667   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC');
 668 
 669   return $This;
 670 }
 671 
 672 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
 673 # class, to use for generating atom identifiers...
 674 #
 675 # Let:
 676 #   HBD: HydrogenBondDonor
 677 #   HBA: HydrogenBondAcceptor
 678 #   PI :  PositivelyIonizable
 679 #   NI : NegativelyIonizable
 680 #   Ar : Aromatic
 681 #   Hal : Halogen
 682 #   H : Hydrophobic
 683 #   RA : RingAtom
 684 #   CA : ChainAtom
 685 #
 686 # Then:
 687 #
 688 #   Functiononal class atom type specification for an atom corresponds to:
 689 #
 690 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
 691 #
 692 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
 693 #
 694 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
 695 #
 696 #     HydrogenBondDonor: NH, NH2, OH
 697 #     HydrogenBondAcceptor: N[!H], O
 698 #     PositivelyIonizable: +, NH2
 699 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
 700 #
 701 sub _InitializeFunctionalClassAtomTypesInformation {
 702   my($This) = @_;
 703 
 704   # Default functional class atom typess to use for generating atom identifiers
 705   # are: HBD, HBA, PI, NI, Ar, Hal
 706   #
 707   @{$This->{FunctionalClassesToUse}} = ();
 708   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
 709 
 710   return $This;
 711 }
 712 
 713 # Return a string containg data for TopologicalAtomPairsFingerprints object...
 714 #
 715 sub StringifyTopologicalAtomPairsFingerprints {
 716   my($This) = @_;
 717   my($FingerprintsString);
 718 
 719   # Type of fingerprint...
 720   $FingerprintsString = "Fingerprint type: $This->{Type}; AtomIdentifierType: $This->{AtomIdentifierType}";
 721 
 722   # Min and max distance...
 723   $FingerprintsString .= "; MinDistance:  $This->{MinDistance}; MaxDistance: $This->{MaxDistance}";
 724 
 725   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 726     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
 727 
 728     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
 729     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
 730 
 731     for $AtomicInvariant (@AtomicInvariantsOrder) {
 732       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
 733     }
 734 
 735     $FingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
 736     $FingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
 737     $FingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
 738   }
 739   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 740     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
 741 
 742     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
 743     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
 744 
 745     for $FunctionalClass (@FunctionalClassesOrder) {
 746       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
 747     }
 748 
 749     $FingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
 750     $FingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
 751     $FingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
 752   }
 753 
 754   # Total number of atom pairs...
 755   $FingerprintsString .= "; NumOfAtomPairs: " . $This->{FingerprintsVector}->GetNumOfValues();
 756 
 757   # FingerprintsVector...
 758   $FingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 759 
 760   return $FingerprintsString;
 761 }
 762