MayaChemTools

   1 package Fingerprints::ExtendedConnectivityFingerprints;
   2 #
   3 # File: ExtendedConnectivityFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use TextUtil ();
  30 use MathUtil ();
  31 use Fingerprints::Fingerprints;
  32 use Molecule;
  33 use AtomTypes::AtomicInvariantsAtomTypes;
  34 use AtomTypes::FunctionalClassAtomTypes;
  35 use AtomTypes::DREIDINGAtomTypes;
  36 use AtomTypes::EStateAtomTypes;
  37 use AtomTypes::MMFF94AtomTypes;
  38 use AtomTypes::SLogPAtomTypes;
  39 use AtomTypes::SYBYLAtomTypes;
  40 use AtomTypes::TPSAAtomTypes;
  41 use AtomTypes::UFFAtomTypes;
  42 
  43 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  44 
  45 @ISA = qw(Fingerprints::Fingerprints Exporter);
  46 @EXPORT = qw();
  47 @EXPORT_OK = qw();
  48 
  49 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  50 
  51 # Setup class variables...
  52 my($ClassName);
  53 _InitializeClass();
  54 
  55 # Overload Perl functions...
  56 use overload '""' => 'StringifyExtendedConnectivityFingerprints';
  57 
  58 # Class constructor...
  59 sub new {
  60   my($Class, %NamesAndValues) = @_;
  61 
  62   # Initialize object...
  63   my $This = $Class->SUPER::new();
  64   bless $This, ref($Class) || $Class;
  65   $This->_InitializeExtendedConnectivityFingerprints();
  66 
  67   $This->_InitializeExtendedConnectivityFingerprintsProperties(%NamesAndValues);
  68 
  69   return $This;
  70 }
  71 
  72 # Initialize object data...
  73 #
  74 sub _InitializeExtendedConnectivityFingerprints {
  75   my($This) = @_;
  76 
  77   # Type of fingerprint to generate:
  78   #
  79   # ExtendedConnectivity - Set of integer identifiers corresponding to structurally unique features
  80   # ExtendedConnectivityCount - Set of integer identifiers corresponding to structurally unique features and their count
  81   # ExtendedConnectivityBits - A bit vector indicating presence/absence of structurally unique features
  82   #
  83   $This->{Type} = 'ExtendedConnectivity';
  84 
  85   # Atomic neighborhoods radius for extended connectivity...
  86   $This->{NeighborhoodRadius} = 2;
  87 
  88   # Size of bit bector to use during generation of ExtendedConnectivityBits fingerprints...
  89   $This->{Size} = 1024;
  90 
  91   # Min and max size of bit bector to use during generation of ExtendedConnectivityBits fingerprints...
  92   $This->{MinSize} = 32;
  93   $This->{MaxSize} = 2**32;
  94 
  95   # Type of atom attributes to use for initial identifier assignment to non-hydrogen atoms
  96   # during the calculation of extended connectivity fingerprints [ Ref 48, Ref 52 ]...
  97   #
  98   # Currently supported values are: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes,
  99   # DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes,
 100   # TPSAAtomTypes, UFFAtomTypes
 101   #
 102   $This->{AtomIdentifierType} = '';
 103 
 104   # Random number generator to use during generation of fingerprints bit-vector
 105   # string: Perl CORE::rand or MayaChemTools MathUtil::random function.
 106   #
 107   # The random number generator implemented in MayaChemTools is a variant of
 108   # linear congruential generator (LCG) as described by Miller et al. [ Ref 120 ].
 109   # It is also referred to as Lehmer random number generator or Park-Miller
 110   # random number generator.
 111   #
 112   # Unlike Perl's core random number generator function rand, the random number
 113   # generator implemented in MayaChemTools, MathUtil::random,  generates consistent
 114   # random values across different platformsfor a specific random seed and leads
 115   # to generation of portable fingerprints bit-vector strings.
 116   #
 117   $This->{UsePerlCoreRandom} = 1;
 118 
 119   # Atom neighorhoods up to specified neighborhood radius...
 120   %{$This->{AtomNeighborhoods}} = ();
 121 
 122   # Atom identifiers at different neighborhoods up to specified neighborhood radius...
 123   %{$This->{AtomIdentifiers}} = ();
 124 
 125   # Structurally unique atom identifiers at different neighborhoods up to specified neighborhood radius...
 126   %{$This->{UniqueAtomIdentifiers}} = ();
 127   %{$This->{UniqueAtomIdentifiersCount}} = ();
 128 
 129   # Unique atom identifiers at different neighborhoods up to specified neighborhood radius...
 130   %{$This->{StructurallyUniqueAtomIdentifiers}} = ();
 131   %{$This->{StructurallyUniqueAtomIdentifiersCount}} = ();
 132 
 133   # Structure feature  information at different neighborhoods up to specified neighborhood
 134   # radius used during removal of atom indentifiers which are structually equivalent...
 135   %{$This->{StructureFeatures}} = ();
 136 }
 137 
 138 # Initialize class ...
 139 sub _InitializeClass {
 140   #Class name...
 141   $ClassName = __PACKAGE__;
 142 }
 143 
 144 # Initialize object properties....
 145 sub _InitializeExtendedConnectivityFingerprintsProperties {
 146   my($This, %NamesAndValues) = @_;
 147 
 148   my($Name, $Value, $MethodName);
 149   while (($Name, $Value) = each  %NamesAndValues) {
 150     $MethodName = "Set${Name}";
 151     $This->$MethodName($Value);
 152   }
 153 
 154   # Make sure molecule object was specified...
 155   if (!exists $NamesAndValues{Molecule}) {
 156     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 157   }
 158 
 159   # Make sure AtomIdentifierType was specified...
 160   if (!exists $NamesAndValues{AtomIdentifierType}) {
 161     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying AtomIdentifierType...";
 162   }
 163 
 164   # Make sure it's power of 2...
 165   if (exists $NamesAndValues{Size}) {
 166     if (!TextUtil::IsNumberPowerOfNumber($NamesAndValues{Size}, 2)) {
 167       croak "Error: ${ClassName}->New: Specified size value, $NamesAndValues{Size}, must be power of 2...";
 168     }
 169   }
 170 
 171   if ($This->{Type} =~ /^ExtendedConnectivity$/i) {
 172     $This->_InitializeExtendedConnectivityFingerprintsVector();
 173   }
 174   elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) {
 175     $This->_InitializeExtendedConnectivityCountFingerprintsVector();
 176   }
 177   elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
 178     $This->_InitializeExtendedConnectivityBitsFingerprintsBitVector();
 179   }
 180   else {
 181     croak "Error: ${ClassName}->_InitializeExtendedConnectivityFingerprintsProperties: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits...";
 182   }
 183 
 184   return $This;
 185 }
 186 
 187 # Initialize extended connectivity fingerprints vector...
 188 #
 189 sub _InitializeExtendedConnectivityFingerprintsVector {
 190   my($This) = @_;
 191 
 192   # Type of vector...
 193   $This->{VectorType} = 'FingerprintsVector';
 194 
 195   # Type of FingerprintsVector...
 196   $This->{FingerprintsVectorType} = 'AlphaNumericalValues';
 197 
 198   $This->_InitializeFingerprintsVector();
 199 
 200   return $This;
 201 }
 202 
 203 # Initialize extended connectivity count fingerprints vector...
 204 #
 205 sub _InitializeExtendedConnectivityCountFingerprintsVector {
 206   my($This) = @_;
 207 
 208   # Type of vector...
 209   $This->{VectorType} = 'FingerprintsVector';
 210 
 211   # Type of FingerprintsVector...
 212   $This->{FingerprintsVectorType} = 'NumericalValues';
 213 
 214   $This->_InitializeFingerprintsVector();
 215 
 216   return $This;
 217 }
 218 
 219 # Initialize extended connectivity bit fingerprints vector...
 220 #
 221 sub _InitializeExtendedConnectivityBitsFingerprintsBitVector {
 222   my($This) = @_;
 223 
 224   # Type of vector...
 225   $This->{VectorType} = 'FingerprintsBitVector';
 226 
 227   $This->_InitializeFingerprintsBitVector();
 228 
 229   return $This;
 230 }
 231 
 232 # Set type...
 233 #
 234 sub SetType {
 235   my($This, $Type) = @_;
 236 
 237   if ($Type =~ /^ExtendedConnectivity$/i) {
 238     $This->{Type} = 'ExtendedConnectivity';;
 239   }
 240   elsif ($Type =~ /^ExtendedConnectivityCount$/i) {
 241     $This->{Type} = 'ExtendedConnectivityCount';;
 242   }
 243   elsif ($Type =~ /^ExtendedConnectivityBits$/i) {
 244     $This->{Type} = 'ExtendedConnectivityBits';;
 245   }
 246   else {
 247     croak "Error: ${ClassName}->SetType: Unknown ExtendedConnectivity fingerprints type: $This->{Type}; Supported fingerprints types: ExtendedConnectivity, ExtendedConnectivityCount or ExtendedConnectivityBits...";
 248   }
 249   return $This;
 250 }
 251 
 252 # Disable vector type change...
 253 #
 254 sub SetVectorType {
 255   my($This, $Type) = @_;
 256 
 257   croak "Error: ${ClassName}->SetVectorType: Can't change vector type...";
 258 
 259   return $This;
 260 }
 261 
 262 # Disable vector type change...
 263 #
 264 sub SetFingerprintsVectorType {
 265   my($This, $Type) = @_;
 266 
 267   croak "Error: ${ClassName}->SetFingerprintsVectorType: Can't change fingerprints vector type...";
 268 
 269   return $This;
 270 }
 271 
 272 # Set intial atom identifier type..
 273 #
 274 sub SetAtomIdentifierType {
 275   my($This, $IdentifierType) = @_;
 276 
 277   if ($IdentifierType !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 278     croak "Error: ${ClassName}->SetAtomIdentifierType: Specified value, $IdentifierType, for AtomIdentifierType is not vaild. Supported types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes.";
 279   }
 280 
 281   if ($This->{AtomIdentifierType}) {
 282     croak "Error: ${ClassName}->SetAtomIdentifierType: Can't change intial atom identifier type:  It's already set...";
 283   }
 284 
 285   $This->{AtomIdentifierType} = $IdentifierType;
 286 
 287   # Initialize identifier type information...
 288   $This->_InitializeAtomIdentifierTypeInformation();
 289 
 290   return $This;
 291 }
 292 
 293 # Set atom neighborhood radius...
 294 #
 295 sub SetNeighborhoodRadius {
 296   my($This, $Value) = @_;
 297 
 298   if (!TextUtil::IsInteger($Value)) {
 299     croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid:  It must be an  integer...";
 300   }
 301 
 302   if ($Value < 0 ) {
 303     croak "Error: ${ClassName}->SetNeighborhoodRadius: NeighborhoodRadius value, $Value, is not valid:  It must be >= 0...";
 304   }
 305   $This->{NeighborhoodRadius} = $Value;
 306 
 307   return $This;
 308 }
 309 
 310 # Generate fingerprints description...
 311 #
 312 sub GetDescription {
 313   my($This) = @_;
 314 
 315   # Is description explicity set?
 316   if (exists $This->{Description}) {
 317     return $This->{Description};
 318   }
 319 
 320   # Generate fingerprints description...
 321 
 322   return "$This->{Type}:$This->{AtomIdentifierType}:Radius$This->{NeighborhoodRadius}";
 323 }
 324 
 325 # Generate fingerprints...
 326 #
 327 # Methodology:
 328 #   . Assign initial atom identfiers to all non-hydrogen atoms in the molecule
 329 #
 330 #   . Remove duplicates from the initial identifiers and add them to list corresponding
 331 #     to molecule fingerprint
 332 #
 333 #   . For NeighborhoodRadius value of 0, just return the molecule fingerprint list
 334 #
 335 #   . For each NeighborhoodRadius level
 336 #      . For each non-hydrogen CentralAtom at this NeighborhoodRadius level
 337 #         . For each non-hydrogen SuccessorNeighborAtom
 338 #           . Collect (BondOrder AtomIdentifier) pair of values corresponding to
 339 #             (CentralAtom SuccessorNeighborAtom)  and add it to a list
 340 #
 341 #         . Sort list containing (BondOrder AtomIdentifier) pairs first by BondOrder followed
 342 #            by AtomIdendifiers to make these values graph invariant
 343 #         . Generate a hash code for the values in the list
 344 #         . Assign hash code as new atom identifier at the current NeighborhoodRadius level
 345 #         . Save all atoms and bonds corresponding to the substructure involved in
 346 #           generating the hash code to be used for identifying structural duplicate hash code
 347 #
 348 #         . Add the new identifier to the molecule fingerprint list making sure it's not a duplicate
 349 #           identifier
 350 #
 351 #   Hash code atom identifier deduplication:
 352 #     . Track/remove the identifier generated at higher neighborhood radius level
 353 #
 354 #  Structural atom identifier deduplication:
 355 #    . For equivalent atoms and bonds corresponding to substructure at a NeighborhoodRadius level,
 356 #      track/remove the atom identifier with largest value
 357 #
 358 #
 359 sub GenerateFingerprints {
 360   my($This) = @_;
 361 
 362   # Cache appropriate molecule data...
 363   $This->_SetupMoleculeDataCache();
 364 
 365   # Assign intial atom identifers...
 366   if (!$This->_AssignInitialAtomIdentifiers()) {
 367     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{AtomIdentifierType} fingerprints generation didn't succeed: Couldn't assign valid $This->{AtomIdentifierType} to all atoms...";
 368     return $This;
 369   }
 370 
 371   # Identify atom neighborhoods up to specified radius...
 372   $This->_GetAtomNeighborhoods();
 373 
 374   # Assign atom identifiers to central atoms considering atom neighborhoods at each
 375   # radius level...
 376   $This->_AssignAtomIdentifiersToAtomNeighborhoods();
 377 
 378   # Remove duplicates identifiers...
 379   $This->_RemoveDuplicateAtomIdentifiers();
 380 
 381   # Set final fingerprints...
 382   $This->_SetFinalFingerprints();
 383 
 384   # Clear cached molecule data...
 385   $This->_ClearMoleculeDataCache();
 386 
 387   return $This;
 388 }
 389 
 390 # Assign appropriate initial atom identifiers...
 391 #
 392 #   Generation of initial identifier for a specific atom involves:
 393 #     . Values of the specified atom attributes are appended in a specific order to
 394 #       generate an initial atom identifier string
 395 #     . A 32 bit unsigned integer hash key, using TextUtil::HashCode function,  is
 396 #       generated for the atom indentifier and assigned to the atom as initial
 397 #       atom identifier.
 398 #
 399 sub _AssignInitialAtomIdentifiers {
 400   my($This) = @_;
 401   my($Atom, $AtomID, $Radius, $SpecifiedAtomTypes, $IgnoreHydrogens, $AtomType, $InitialAtomTypeString, $InitialAtomIdentifier);
 402 
 403   # Initialize atom identifiers...
 404   $This->_InitializeAtomIdentifiers();
 405 
 406   # Set up atom types...
 407   $IgnoreHydrogens = 1;
 408   $SpecifiedAtomTypes = undef;
 409 
 410   IDENTIFIERTYPE: {
 411     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 412       $SpecifiedAtomTypes = new AtomTypes::AtomicInvariantsAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'AtomicInvariantsToUse' => $This->{AtomicInvariantsToUse});
 413       last IDENTIFIERTYPE;
 414     }
 415 
 416     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 417       $SpecifiedAtomTypes = new AtomTypes::FunctionalClassAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens, 'FunctionalClassesToUse' => $This->{FunctionalClassesToUse});
 418       last IDENTIFIERTYPE;
 419     }
 420 
 421     if ($This->{AtomIdentifierType} =~ /^DREIDINGAtomTypes$/i) {
 422       $SpecifiedAtomTypes = new AtomTypes::DREIDINGAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 423       last IDENTIFIERTYPE;
 424     }
 425 
 426     if ($This->{AtomIdentifierType} =~ /^EStateAtomTypes$/i) {
 427       $SpecifiedAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 428       last IDENTIFIERTYPE;
 429     }
 430 
 431     if ($This->{AtomIdentifierType} =~ /^MMFF94AtomTypes$/i) {
 432       $SpecifiedAtomTypes = new AtomTypes::MMFF94AtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 433       last IDENTIFIERTYPE;
 434     }
 435 
 436     if ($This->{AtomIdentifierType} =~ /^SLogPAtomTypes$/i) {
 437       $SpecifiedAtomTypes = new AtomTypes::SLogPAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 438       last IDENTIFIERTYPE;
 439     }
 440 
 441     if ($This->{AtomIdentifierType} =~ /^SYBYLAtomTypes$/i) {
 442       $SpecifiedAtomTypes = new AtomTypes::SYBYLAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 443       last IDENTIFIERTYPE;
 444     }
 445 
 446     if ($This->{AtomIdentifierType} =~ /^TPSAAtomTypes$/i) {
 447       $SpecifiedAtomTypes = new AtomTypes::TPSAAtomTypes('Molecule' => $This->{Molecule}, 'IgnorePhosphorus' => 0, 'IgnoreSulfur' => 0);
 448       last IDENTIFIERTYPE;
 449     }
 450 
 451     if ($This->{AtomIdentifierType} =~ /^UFFAtomTypes$/i) {
 452       $SpecifiedAtomTypes = new AtomTypes::UFFAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => $IgnoreHydrogens);
 453       last IDENTIFIERTYPE;
 454     }
 455 
 456     croak "Error: ${ClassName}->_AssignInitialAtomIdentifiers: Couldn't assign intial atom identifiers: InitialAtomIdentifierType $This->{AtomIdentifierType} is not supported...";
 457   }
 458 
 459   # Assign atom types...
 460   $SpecifiedAtomTypes->AssignAtomTypes();
 461 
 462   # Make sure atom types assignment is successful...
 463   if (!$SpecifiedAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 464     return undef;
 465   }
 466 
 467   # Assign atom identifiers at radius 0...
 468   $Radius = 0;
 469   for $Atom (@{$This->{Atoms}}) {
 470     $AtomID = $Atom->GetID();
 471 
 472     $AtomType = $SpecifiedAtomTypes->GetAtomType($Atom);
 473     $InitialAtomTypeString = $AtomType ? $AtomType : 'None';
 474 
 475     $InitialAtomIdentifier = TextUtil::HashCode($InitialAtomTypeString);
 476     $This->{AtomIdentifiers}{$Radius}{$AtomID} = $InitialAtomIdentifier;
 477   }
 478 
 479   return $This;
 480 }
 481 
 482 # Initialize atom identifiers...
 483 #
 484 sub _InitializeAtomIdentifiers {
 485   my($This) = @_;
 486   my($Radius, $CurrentRadius);
 487 
 488   $Radius = $This->{NeighborhoodRadius};
 489 
 490   %{$This->{AtomIdentifiers}} = ();
 491   for $CurrentRadius (0 .. $Radius) {
 492     # Atom idenfiers key and value correspond to AtomID and AtomIdentifier
 493     %{$This->{AtomIdentifiers}{$CurrentRadius}} = ();
 494 
 495     # Unique and strcuturally unique idenfiers key and value correspond to AtomIdentifier and AtomID
 496     %{$This->{UniqueAtomIdentifiers}{$CurrentRadius}} = ();
 497     %{$This->{UniqueAtomIdentifiersCount}{$CurrentRadius}} = ();
 498 
 499     %{$This->{StructurallyUniqueAtomIdentifiers}{$CurrentRadius}} = ();
 500     %{$This->{StructurallyUniqueAtomIdentifiersCount}{$CurrentRadius}} = ();
 501   }
 502 
 503 }
 504 
 505 # Collect atom neighborhoods upto specified neighborhood radius...
 506 #
 507 sub _GetAtomNeighborhoods {
 508   my($This) = @_;
 509   my($Atom, $AtomID, $Radius, $CurrentRadius, $Molecule);
 510 
 511   %{$This->{AtomNeighborhoods}} = ();
 512 
 513   $Radius = $This->{NeighborhoodRadius};
 514   if ($Radius < 1) {
 515     # At radius level 0, it's just the atoms...
 516     return;
 517   }
 518 
 519   # Initialize neighborhood at different radii...
 520   for $CurrentRadius (0 .. $Radius) {
 521     %{$This->{AtomNeighborhoods}{$CurrentRadius}} = ();
 522   }
 523 
 524   $Molecule = $This->GetMolecule();
 525 
 526   # Collect available atom neighborhoods at different at different neighborhood level for each atom...
 527   my($AtomsNeighborhoodWithSuccessorAtomsRef);
 528 
 529   for $Atom (@{$This->{Atoms}}) {
 530     $AtomID = $Atom->GetID();
 531     $CurrentRadius = 0;
 532     for $AtomsNeighborhoodWithSuccessorAtomsRef ($Molecule->GetAtomNeighborhoodsWithSuccessorAtomsAndRadiusUpto($Atom, $Radius)) {
 533       $This->{AtomNeighborhoods}{$CurrentRadius}{$AtomID} = $AtomsNeighborhoodWithSuccessorAtomsRef;
 534       $CurrentRadius++;
 535     }
 536   }
 537   return $This;
 538 }
 539 
 540 # Assign atom identifiers to central atom at each neighborhood radius level...
 541 #
 542 sub _AssignAtomIdentifiersToAtomNeighborhoods {
 543   my($This) = @_;
 544   my($Radius, $NextRadius, $Atom, $AtomID, $NeighborhoodAtom, $SuccessorAtom, $SuccessorAtomID, $NeighborhoodAtomSuccessorAtomsRef, $NeighborhoodAtomsWithSuccessorAtomsRef, $Bond, $BondOrder, $SuccessorAtomCount);
 545 
 546   if ($This->{NeighborhoodRadius} < 1) {
 547     return;
 548   }
 549 
 550   # Go over the atom neighborhoods at each radius upto specified radius and assign atom
 551   # indentifiers using their connected successor atoms and their identifiers.
 552   #
 553   # For a neighborhood atom at a specified radius, the successor connected atoms correpond
 554   # to next radius level and the last set of neighorhood atoms don't have any successor connected
 555   # atoms. Additionally, radius level 0 just correspond to initial atom identifiers.
 556   #
 557   # So in order to process atom neighborhood upto specified radius level, the last atom neighborhood
 558   # doesn't need to be processed: it gets processed at previous radius level as successor connected
 559   # atoms.
 560   #
 561   RADIUS: for $Radius (0 .. ($This->{NeighborhoodRadius} - 1)) {
 562     ATOM: for $Atom (@{$This->{Atoms}}) {
 563       $AtomID = $Atom->GetID();
 564 
 565       # Are there any available atom neighborhoods at this radius?
 566       if (!exists $This->{AtomNeighborhoods}{$Radius}{$AtomID}) {
 567         next ATOM;
 568       }
 569       $NextRadius = $Radius + 1;
 570 
 571       # Go over neighborhood atoms and their successor connected atoms at this radius and collect
 572       # (BondOrder AtomIdentifier) values for bonded atom pairs. Additionally, keep track of atom and bonds
 573       # for the neighorhoods to remove identifieres generated from structurally duplicate features.
 574       #
 575       my(%BondOrdersAndAtomIdentifiers);
 576 
 577       %BondOrdersAndAtomIdentifiers = ();
 578       $SuccessorAtomCount = 0;
 579 
 580       NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) {
 581         ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef};
 582 
 583         # Any connected successors for the NeighborhoodAtom?
 584         if (!@{$NeighborhoodAtomSuccessorAtomsRef}) {
 585           next NEIGHBORHOODS;
 586         }
 587         SUCCESSORATOM: for $SuccessorAtom (@{$NeighborhoodAtomSuccessorAtomsRef}) {
 588           if ($SuccessorAtom->IsHydrogen()) {
 589             # Skip successor hydrogen atom...
 590             next SUCCESSORATOM;
 591           }
 592           $SuccessorAtomID = $SuccessorAtom->GetID();
 593           $SuccessorAtomCount++;
 594 
 595           $Bond = $NeighborhoodAtom->GetBondToAtom($SuccessorAtom);
 596           $BondOrder = $Bond->IsAromatic() ? "1.5" : $Bond->GetBondOrder();
 597 
 598           if (!exists $BondOrdersAndAtomIdentifiers{$BondOrder}) {
 599             @{$BondOrdersAndAtomIdentifiers{$BondOrder}} = ();
 600           }
 601           push @{$BondOrdersAndAtomIdentifiers{$BondOrder}}, $This->{AtomIdentifiers}{$Radius}{$SuccessorAtomID};
 602         }
 603       }
 604       if (!$SuccessorAtomCount) {
 605         next ATOM;
 606       }
 607       # Assign a new atom identifier at the NextRadius level...
 608       $This->_AssignAtomIdentifierToAtomNeighborhood($AtomID, $Radius, \%BondOrdersAndAtomIdentifiers);
 609     }
 610  }
 611   return $This;
 612 }
 613 
 614 # Generate and assign atom indentifier for AtomID using atom neighborhood at next radius level...
 615 #
 616 sub _AssignAtomIdentifierToAtomNeighborhood {
 617   my($This, $AtomID, $Radius, $BondOrdersAndAtomIdentifiersRef) = @_;
 618   my($NextRadius, $AtomIdentifier,  $SuccessorAtomIdentifier, $BondOrder, $AtomIdentifierString, @AtomIndentifiersInfo);
 619 
 620   $NextRadius = $Radius + 1;
 621 
 622   @AtomIndentifiersInfo = ();
 623 
 624   $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID};
 625   push @AtomIndentifiersInfo, ($NextRadius, $AtomIdentifier);
 626 
 627   # Sort out successor atom bond order and identifier pairs by bond order followed by atom identifiers
 628   # in order to make the final atom identifier graph invariant...
 629   #
 630   for $BondOrder (sort { $a <=> $b } keys %{$BondOrdersAndAtomIdentifiersRef}) {
 631     for $SuccessorAtomIdentifier (sort { $a <=> $b } @{$BondOrdersAndAtomIdentifiersRef->{$BondOrder}}) {
 632       push @AtomIndentifiersInfo, ($BondOrder, $SuccessorAtomIdentifier);
 633     }
 634   }
 635   $AtomIdentifierString = join("", @AtomIndentifiersInfo);
 636   $AtomIdentifier = TextUtil::HashCode($AtomIdentifierString);
 637 
 638   # Assign atom identifier to the atom at next radius level...
 639   $This->{AtomIdentifiers}{$NextRadius}{$AtomID} = $AtomIdentifier;
 640 
 641   return $This;
 642 }
 643 
 644 # Remove duplicates atom identifiers...
 645 #
 646 sub _RemoveDuplicateAtomIdentifiers {
 647   my($This) = @_;
 648 
 649   $This->_RemoveDuplicateIdentifiersByValue();
 650   $This->_RemoveStructurallyDuplicateIdenfiers();
 651 
 652   return $This;
 653 }
 654 
 655 # Remove duplicate identifiers at each radius level by just using their value...
 656 #
 657 sub _RemoveDuplicateIdentifiersByValue {
 658   my($This) = @_;
 659   my($Radius, $Atom, $AtomID, $AtomIdentifier);
 660 
 661   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 662     ATOM: for $Atom (@{$This->{Atoms}}) {
 663       $AtomID = $Atom->GetID();
 664       if (!exists $This->{AtomIdentifiers}{$Radius}{$AtomID}) {
 665         next ATOM;
 666       }
 667       $AtomIdentifier = $This->{AtomIdentifiers}{$Radius}{$AtomID};
 668       if (exists $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier}) {
 669         # It's a duplicate atom idenfier at this radius level...
 670         $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} += 1;
 671         next ATOM;
 672       }
 673       $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID;
 674       $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = 1;
 675     }
 676   }
 677   return $This;
 678 }
 679 
 680 # Remove structurally duplicate identifiers at each radius level...
 681 #
 682 # Methodology:
 683 #   . For unquie atom identifiers at each radius level, assign complete structure features
 684 #     in terms all the bonds involved to generate that identifier
 685 #   . Use the complete structure features to remover atom identifiers which are
 686 #     structurally equivalent which can also be at earlier radii levels
 687 #
 688 #
 689 sub _RemoveStructurallyDuplicateIdenfiers {
 690   my($This) = @_;
 691   my($Radius, $AtomID, $AtomIdentifier, $SimilarAtomIdentifierRadius, $SimilarAtomIdentifier);
 692 
 693   # Setup structure features...
 694   $This->_SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers();
 695 
 696   # Identify structurally unqiue identifiers...
 697   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 698     ATOMIDENTIFIER: for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) {
 699       $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
 700 
 701       ($SimilarAtomIdentifierRadius, $SimilarAtomIdentifier) = $This->_FindStructurallySimilarAtomIdentifier($Radius, $AtomID, $AtomIdentifier);
 702       if ($SimilarAtomIdentifier) {
 703         # Current atom identifier is similar to an earlier structurally unique atom identifier...
 704         $This->{StructurallyUniqueAtomIdentifiersCount}{$SimilarAtomIdentifierRadius}{$SimilarAtomIdentifier} += $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
 705         next ATOMIDENTIFIER;
 706       }
 707       $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier} = $AtomID;
 708 
 709       # Set structurally unique atom identifier count to the unique atom identifiers count...
 710       $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier} = $This->{UniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
 711     }
 712   }
 713   return $This;
 714 }
 715 
 716 # Set final fingerpritns vector...
 717 #
 718 sub _SetFinalFingerprints {
 719   my($This) = @_;
 720 
 721   # Mark successful generation of fingerprints...
 722   $This->{FingerprintsGenerated} = 1;
 723 
 724   if ($This->{Type} =~ /^ExtendedConnectivity$/i) {
 725     $This->_SetFinalExtendedConnectivityFingerprints();
 726   }
 727   elsif ($This->{Type} =~ /^ExtendedConnectivityCount$/i) {
 728     $This->_SetFinalExtendedConnectivityCountFingerprints();
 729   }
 730   elsif ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
 731     $This->_SetFinalExtendedConnectivityBitsFingerprints();
 732   }
 733 
 734   return $This;
 735 }
 736 
 737 # Set final extended connectivity fingerpritns vector...
 738 #
 739 sub _SetFinalExtendedConnectivityFingerprints {
 740   my($This) = @_;
 741   my($Radius, $AtomIdentifier, @AtomIdentifiers);
 742 
 743   @AtomIdentifiers = ();
 744 
 745   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 746     for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 747       push @AtomIdentifiers, $AtomIdentifier;
 748     }
 749   }
 750   # Add atom identifiers to fingerprint vector...
 751   $This->{FingerprintsVector}->AddValues(\@AtomIdentifiers);
 752 
 753   return $This;
 754 }
 755 
 756 # Set final extended connectivity count fingerpritns vector...
 757 #
 758 sub _SetFinalExtendedConnectivityCountFingerprints {
 759   my($This) = @_;
 760   my($Radius, $AtomIdentifier, $AtomIdentifierCount, @AtomIdentifiers, @AtomIdentifiersCount);
 761 
 762   @AtomIdentifiers = (); @AtomIdentifiersCount = ();
 763 
 764   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 765     for $AtomIdentifier (sort { $a <=> $b } keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 766       $AtomIdentifierCount = $This->{StructurallyUniqueAtomIdentifiersCount}{$Radius}{$AtomIdentifier};
 767       push @AtomIdentifiers, $AtomIdentifier;
 768       push @AtomIdentifiersCount, $AtomIdentifierCount;
 769     }
 770   }
 771   # Add atom identifiers to fingerprint vector as value IDs...
 772   $This->{FingerprintsVector}->AddValueIDs(\@AtomIdentifiers);
 773 
 774   # Add atom identifiers to count to fingerprint vector as values...
 775   $This->{FingerprintsVector}->AddValues(\@AtomIdentifiersCount);
 776 
 777   return $This;
 778 }
 779 
 780 # Set final extended connectivity bits fingerpritns vector...
 781 #
 782 sub _SetFinalExtendedConnectivityBitsFingerprints {
 783   my($This) = @_;
 784   my($Radius, $AtomIdentifier, $FingerprintsBitVector, $Size, $SkipBitPosCheck, $AtomIdentifierBitPos, $SetBitNum);
 785 
 786   $FingerprintsBitVector = $This->{FingerprintsBitVector};
 787 
 788   $Size = $This->{Size};
 789 
 790   $SkipBitPosCheck = 1;
 791 
 792   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 793     for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 794       # Set random number seed...
 795       if ($This->{UsePerlCoreRandom}) {
 796         CORE::srand($AtomIdentifier);
 797       }
 798       else {
 799         MathUtil::srandom($AtomIdentifier);
 800       }
 801 
 802       # Set bit position...
 803       $AtomIdentifierBitPos = $This->{UsePerlCoreRandom} ? int(CORE::rand($Size)) : int(MathUtil::random($Size));
 804       $FingerprintsBitVector->SetBit($AtomIdentifierBitPos, $SkipBitPosCheck);
 805     }
 806   }
 807   return $This;
 808 }
 809 
 810 
 811 # Identify structurally unique identifiers by comparing structure features involved in
 812 # generating identifiear by comparing it agains all the previous structurally unique
 813 # identifiers...
 814 #
 815 sub _FindStructurallySimilarAtomIdentifier {
 816   my($This, $SpecifiedRadius, $SpecifiedAtomID, $SpecifiedAtomIdentifier) = @_;
 817   my($Radius, $AtomID, $AtomIdentifier, $FeatureAtomCount, $FeatureAtomIDsRef,  $SpecifiedFeatureAtomID, $SpecifiedFeatureAtomCount, $SpecifiedFeatureAtomIDsRef);
 818 
 819   if ($SpecifiedRadius == 0) {
 820     # After duplicate removal by value, all identifier at radius level 0 would be structurally unique...
 821     return (undef, undef);
 822   }
 823 
 824   $SpecifiedFeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$SpecifiedRadius}{$SpecifiedAtomID};
 825   $SpecifiedFeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$SpecifiedRadius}{$SpecifiedAtomID};
 826 
 827   # No need to compare features at radius 0...
 828   for $Radius (1 .. $SpecifiedRadius) {
 829     ATOMIDENTIFIER: for $AtomIdentifier (keys %{$This->{StructurallyUniqueAtomIdentifiers}{$Radius}}) {
 830       $AtomID = $This->{StructurallyUniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
 831 
 832       $FeatureAtomCount = $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID};
 833       $FeatureAtomIDsRef = $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID};
 834 
 835       if ($SpecifiedFeatureAtomCount != $FeatureAtomCount) {
 836         # Couldn't be structurally equivalent...
 837         next ATOMIDENTIFIER;
 838       }
 839       for $SpecifiedFeatureAtomID (keys % {$SpecifiedFeatureAtomIDsRef}) {
 840         if (! exists $FeatureAtomIDsRef->{$SpecifiedFeatureAtomID}) {
 841           # For structural equivalency, all atom in specified feature must also be present in a previously
 842           # identified structurally unique structure feature...
 843           next ATOMIDENTIFIER;
 844         }
 845       }
 846       # Found structurally equivalent feature...
 847       return ($Radius, $AtomIdentifier);
 848     }
 849   }
 850   return (undef, undef);
 851 }
 852 
 853 # Setup structure features for atom IDs involved in unique atom identifiers at all
 854 # radii level...
 855 #
 856 sub _SetupStructureFeaturesForAtomIDsInvolvedInUniqueIdentifiers {
 857   my($This) = @_;
 858   my($Radius, $PreviousRadius, $Atom, $AtomID, $AtomIdentifier, $NeighborhoodAtomID, $NeighborhoodAtomsWithSuccessorAtomsRef, $NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef, %AtomIDs);
 859 
 860   $This->_InitializeStructureFeatures();
 861 
 862   # Collect atom IDs involved in unique atom identifiers...
 863   %AtomIDs = ();
 864   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 865     for $AtomIdentifier (keys %{$This->{UniqueAtomIdentifiers}{$Radius}}) {
 866       $AtomID = $This->{UniqueAtomIdentifiers}{$Radius}{$AtomIdentifier};
 867       $AtomIDs{$AtomID} = $AtomID;
 868     }
 869   }
 870 
 871   # Setup structure features...
 872   for $Radius (0 .. $This->{NeighborhoodRadius}) {
 873     for $AtomID (keys %AtomIDs) {
 874       my($StructureFeatureAtomCount, %StructureFeatureAtomIDs);
 875 
 876       $StructureFeatureAtomCount = 0;
 877       %StructureFeatureAtomIDs = ();
 878 
 879       # Get partial structure features for the atom at previous radius level...
 880       $PreviousRadius = $Radius - 1;
 881       if ($PreviousRadius >= 0) {
 882         $StructureFeatureAtomCount += $This->{StructureFeatures}{AtomCount}{$PreviousRadius}{$AtomID};
 883         %StructureFeatureAtomIDs = %{$This->{StructureFeatures}{AtomIDs}{$PreviousRadius}{$AtomID}};
 884       }
 885 
 886       # Get all neighborhood atom at this radius level...
 887       if (exists($This->{AtomNeighborhoods}{$Radius}) && exists($This->{AtomNeighborhoods}{$Radius}{$AtomID})) {
 888         NEIGHBORHOODS: for $NeighborhoodAtomsWithSuccessorAtomsRef (@{$This->{AtomNeighborhoods}{$Radius}{$AtomID}}) {
 889           ($NeighborhoodAtom, $NeighborhoodAtomSuccessorAtomsRef) = @{$NeighborhoodAtomsWithSuccessorAtomsRef};
 890           if ($NeighborhoodAtom->IsHydrogen()) {
 891             next NEIGHBORHOODS;
 892           }
 893           $NeighborhoodAtomID = $NeighborhoodAtom->GetID();
 894           $StructureFeatureAtomCount++;
 895           $StructureFeatureAtomIDs{$NeighborhoodAtomID} = $NeighborhoodAtomID;
 896         }
 897       }
 898 
 899       # Assign structure features to atom at this radius level...
 900       $This->{StructureFeatures}{AtomCount}{$Radius}{$AtomID} = $StructureFeatureAtomCount;
 901       $This->{StructureFeatures}{AtomIDs}{$Radius}{$AtomID} = \%StructureFeatureAtomIDs;
 902     }
 903   }
 904   return $This;
 905 }
 906 
 907 # Intialize structure features at each radius level...
 908 #
 909 sub _InitializeStructureFeatures {
 910   my($This) = @_;
 911   my($Radius, $CurrentRadius, $Atom, $AtomID);
 912 
 913   # Initialize all structure features...
 914 
 915   %{$This->{StructureFeatures}} = ();
 916   %{$This->{StructureFeatures}{AtomCount}} = ();
 917   %{$This->{StructureFeatures}{AtomIDs}} = ();
 918 
 919   $Radius = $This->{NeighborhoodRadius};
 920   for $CurrentRadius (0 .. $Radius) {
 921     # Structure features for at specific radii accessed using atom IDs...
 922     %{$This->{StructureFeatures}{AtomCount}{$CurrentRadius}} = ();
 923     %{$This->{StructureFeatures}{AtomIDs}{$CurrentRadius}} = ();
 924   }
 925   return $This;
 926 }
 927 
 928 # Cache  appropriate molecule data...
 929 #
 930 sub _SetupMoleculeDataCache {
 931   my($This) = @_;
 932 
 933   # Get all non-hydrogen atoms...
 934   my($NegateAtomCheckMethod);
 935   $NegateAtomCheckMethod = 1;
 936   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
 937 
 938   return $This;
 939 }
 940 
 941 # Clear cached molecule data...
 942 #
 943 sub _ClearMoleculeDataCache {
 944   my($This) = @_;
 945 
 946   @{$This->{Atoms}} = ();
 947 
 948   return $This;
 949 }
 950 
 951 # Initialize atom indentifier type information...
 952 #
 953 # Current supported values:
 954 #
 955 # AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes,
 956 # MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes
 957 #
 958 sub _InitializeAtomIdentifierTypeInformation {
 959   my($This) = @_;
 960 
 961   IDENTIFIERTYPE: {
 962     if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
 963       $This->_InitializeAtomicInvariantsAtomTypesInformation();
 964       last IDENTIFIERTYPE;
 965     }
 966     if ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
 967       $This->_InitializeFunctionalClassAtomTypesInformation();
 968       last IDENTIFIERTYPE;
 969     }
 970     if ($This->{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
 971       # Nothing to do for now...
 972       last IDENTIFIERTYPE;
 973     }
 974     carp "Warning: ${ClassName}->_InitializeAtomIdentifierTypeInformation: Unknown atom indentifier type $This->{AtomIdentifierType}...";
 975   }
 976   return $This;
 977 }
 978 
 979 # Initialize atomic invariants atom types, generated by AtomTypes::AtomicInvariantsAtomTypes
 980 # class, to use for generating initial atom identifiers...
 981 #
 982 # Let:
 983 #   AS = Atom symbol corresponding to element symbol
 984 #
 985 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 986 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 987 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 988 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 989 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 990 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 991 #   H<n>   = Number of implicit and explicit hydrogens for atom
 992 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 993 #   RA     = Ring atom annotation indicating whether atom is a ring
 994 #   FC<+n/-n> = Formal charge assigned to atom
 995 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 996 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 997 #
 998 # Then:
 999 #
1000 #   Atom type generated by AtomTypes::AtomicInvariantsAtomTypes class corresponds to:
1001 #
1002 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
1003 #
1004 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
1005 # optional.
1006 #
1007 # Default atomic invariants used for generating inital atom identifiers are [ Ref 24 ]:
1008 #
1009 #   AS, X<n>, BO<n>, H<n>, FC<+n/-n>, MN<n>
1010 #
1011 # In addition to usage of abbreviations for specifying atomic invariants, the following descriptive words
1012 # are also allowed:
1013 #
1014 # X : NumOfNonHydrogenAtomNeighbors or NumOfHeavyAtomNeighbors
1015 # BO : SumOfBondOrdersToNonHydrogenAtoms or SumOfBondOrdersToHeavyAtoms
1016 # LBO : LargestBondOrderToNonHydrogenAtoms or LargestBondOrderToHeavyAtoms
1017 # SB :  NumOfSingleBondsToNonHydrogenAtoms or NumOfSingleBondsToHeavyAtoms
1018 # DB : NumOfDoubleBondsToNonHydrogenAtoms or NumOfDoubleBondsToHeavyAtoms
1019 # TB : NumOfTripleBondsToNonHydrogenAtoms or NumOfTripleBondsToHeavyAtoms
1020 # H :  NumOfImplicitAndExplicitHydrogens
1021 # Ar : Aromatic
1022 # RA : RingAtom
1023 # FC : FormalCharge
1024 # MN : MassNumber
1025 # SM : SpinMultiplicity
1026 #
1027 sub _InitializeAtomicInvariantsAtomTypesInformation {
1028   my($This) = @_;
1029 
1030   # Default atomic invariants to use for generating initial atom identifiers are: AS, X, BO, LBO, H, FC
1031   #
1032   @{$This->{AtomicInvariantsToUse}} = ();
1033   @{$This->{AtomicInvariantsToUse}} = ('AS', 'X', 'BO', 'H', 'FC', 'MN');
1034 
1035   return $This;
1036 }
1037 
1038 # Initialize functional class atom types, generated by AtomTypes::FunctionalClassAtomTypes
1039 # class, to use for generating initial atom identifiers...
1040 #
1041 # Let:
1042 #   HBD: HydrogenBondDonor
1043 #   HBA: HydrogenBondAcceptor
1044 #   PI :  PositivelyIonizable
1045 #   NI : NegativelyIonizable
1046 #   Ar : Aromatic
1047 #   Hal : Halogen
1048 #   H : Hydrophobic
1049 #   RA : RingAtom
1050 #   CA : ChainAtom
1051 #
1052 # Then:
1053 #
1054 #   Functiononal class atom type specification for an atom corresponds to:
1055 #
1056 #     Ar.CA.H.HBA.HBD.Hal.NI.PI.RA
1057 #
1058 #   Default functional classes used are: HBD, HBA, PI, NI, Ar, Hal
1059 #
1060 #   FunctionalAtomTypes are assigned using the following definitions [ Ref 60-61, Ref 65-66 ]:
1061 #
1062 #     HydrogenBondDonor: NH, NH2, OH
1063 #     HydrogenBondAcceptor: N[!H], O
1064 #     PositivelyIonizable: +, NH2
1065 #     NegativelyIonizable: -, C(=O)OH, S(=O)OH, P(=O)OH
1066 #
1067 sub _InitializeFunctionalClassAtomTypesInformation {
1068   my($This) = @_;
1069 
1070   # Default functional class atom typess to use for generating initial atom identifiers
1071   # are: HBD, HBA, PI, NI, Ar, Hal
1072   #
1073   @{$This->{FunctionalClassesToUse}} = ();
1074   @{$This->{FunctionalClassesToUse}} = ('HBD', 'HBA', 'PI', 'NI', 'Ar', 'Hal');
1075 
1076   return $This;
1077 }
1078 
1079 # Set atomic invariants to use for generation of intial atom indentifiers...
1080 #
1081 sub SetAtomicInvariantsToUse {
1082   my($This, @Values) = @_;
1083   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, @SpecifiedAtomicInvariants, @AtomicInvariantsToUse);
1084 
1085   if (!@Values) {
1086     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
1087     return;
1088   }
1089 
1090   if ($This->{AtomIdentifierType} !~ /^AtomicInvariantsAtomTypes$/i) {
1091     carp "Warning: ${ClassName}->SetFunctionalAtomTypesToUse: AtomicInvariantsToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
1092     return;
1093   }
1094 
1095   $FirstValue = $Values[0];
1096   $TypeOfFirstValue = ref $FirstValue;
1097 
1098   @SpecifiedAtomicInvariants = ();
1099   @AtomicInvariantsToUse = ();
1100 
1101   if ($TypeOfFirstValue =~ /^ARRAY/) {
1102     push @SpecifiedAtomicInvariants, @{$FirstValue};
1103   }
1104   else {
1105     push @SpecifiedAtomicInvariants, @Values;
1106   }
1107 
1108   # Make sure specified AtomicInvariants are valid...
1109   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
1110     if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($SpecifiedAtomicInvariant)) {
1111       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
1112     }
1113     $AtomicInvariant = $SpecifiedAtomicInvariant;
1114     push @AtomicInvariantsToUse, $AtomicInvariant;
1115   }
1116 
1117   # Set atomic invariants to use...
1118   @{$This->{AtomicInvariantsToUse}} = ();
1119   push @{$This->{AtomicInvariantsToUse}}, @AtomicInvariantsToUse;
1120 
1121   return $This;
1122 }
1123 
1124 # Set functional classes to use for generation of intial atom indentifiers...
1125 #
1126 sub SetFunctionalClassesToUse {
1127   my($This, @Values) = @_;
1128   my($FirstValue, $TypeOfFirstValue, $FunctionalClass, $SpecifiedFunctionalClass, @SpecifiedFunctionalClasses, @FunctionalClassesToUse);
1129 
1130   if (!@Values) {
1131     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: No values specified...";
1132     return;
1133   }
1134 
1135   if ($This->{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
1136     carp "Warning: ${ClassName}->SetFunctionalClassesToUse: FunctionalClassesToUse can't be set for InitialAtomIdentifierType of $This->{AtomIdentifierType}...";
1137     return;
1138   }
1139 
1140   $FirstValue = $Values[0];
1141   $TypeOfFirstValue = ref $FirstValue;
1142 
1143   @SpecifiedFunctionalClasses = ();
1144   @FunctionalClassesToUse = ();
1145 
1146   if ($TypeOfFirstValue =~ /^ARRAY/) {
1147     push @SpecifiedFunctionalClasses, @{$FirstValue};
1148   }
1149   else {
1150     push @SpecifiedFunctionalClasses, @Values;
1151   }
1152 
1153   # Make sure specified FunctionalClasses are valid...
1154   for $SpecifiedFunctionalClass (@SpecifiedFunctionalClasses) {
1155     if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($SpecifiedFunctionalClass)) {
1156       croak "Error: ${ClassName}->SetFunctionalClassesToUse: Specified functional class, $SpecifiedFunctionalClass, is not supported...\n ";
1157     }
1158     push @FunctionalClassesToUse, $SpecifiedFunctionalClass;
1159   }
1160 
1161   # Set functional classes to use...
1162   @{$This->{FunctionalClassesToUse}} = ();
1163   push @{$This->{FunctionalClassesToUse}}, @FunctionalClassesToUse;
1164 
1165   return $This;
1166 }
1167 
1168 # Return a string containg data for ExtendedConnectivityFingerprints object...
1169 sub StringifyExtendedConnectivityFingerprints {
1170   my($This) = @_;
1171   my($ExtendedConnectivityFingerprintsString);
1172 
1173   $ExtendedConnectivityFingerprintsString = "InitialAtomIdentifierType: $This->{AtomIdentifierType}; NeighborhoodRadius: $This->{NeighborhoodRadius}";
1174 
1175   if ($This->{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
1176     my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsOrder, %AvailableAtomicInvariants);
1177 
1178     @AtomicInvariantsOrder = AtomTypes::AtomicInvariantsAtomTypes::GetAtomicInvariantsOrder();
1179     %AvailableAtomicInvariants = AtomTypes::AtomicInvariantsAtomTypes::GetAvailableAtomicInvariants();
1180 
1181     for $AtomicInvariant (@AtomicInvariantsOrder) {
1182       push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
1183     }
1184 
1185     $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@{$This->{AtomicInvariantsToUse}}, ", ", 0) . ">";
1186     $ExtendedConnectivityFingerprintsString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
1187     $ExtendedConnectivityFingerprintsString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
1188   }
1189   elsif ($This->{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
1190     my($FunctionalClass, @FunctionalClasses, @FunctionalClassesOrder, %AvailableFunctionalClasses);
1191 
1192     @FunctionalClassesOrder = AtomTypes::FunctionalClassAtomTypes::GetFunctionalClassesOrder();
1193     %AvailableFunctionalClasses = AtomTypes::FunctionalClassAtomTypes::GetAvailableFunctionalClasses();
1194 
1195     for $FunctionalClass (@FunctionalClassesOrder) {
1196       push @FunctionalClasses, "$FunctionalClass: $AvailableFunctionalClasses{$FunctionalClass}";
1197     }
1198 
1199     $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesToUse: <" . TextUtil::JoinWords(\@{$This->{FunctionalClassesToUse}}, ", ", 0) . ">";
1200     $ExtendedConnectivityFingerprintsString .= "; FunctionalClassesOrder: <" . TextUtil::JoinWords(\@FunctionalClassesOrder, ", ", 0) . ">";
1201     $ExtendedConnectivityFingerprintsString .= "; AvailableFunctionalClasses: <" . TextUtil::JoinWords(\@FunctionalClasses, ", ", 0) . ">";
1202   }
1203 
1204   if ($This->{Type} =~ /^ExtendedConnectivityBits$/i) {
1205     # Size...
1206     $ExtendedConnectivityFingerprintsString .= "; Size: $This->{Size}; MinSize: $This->{MinSize}; MaxSize: $This->{MaxSize}";
1207 
1208     # Fingerprint bit density and num of bits set...
1209     my($NumOfSetBits, $BitDensity);
1210     $NumOfSetBits = $This->{FingerprintsBitVector}->GetNumOfSetBits();
1211     $BitDensity = $This->{FingerprintsBitVector}->GetFingerprintsBitDensity();
1212     $ExtendedConnectivityFingerprintsString .= "; NumOfOnBits: $NumOfSetBits; BitDensity: $BitDensity";
1213 
1214     $ExtendedConnectivityFingerprintsString .= "; FingerprintsBitVector: < $This->{FingerprintsBitVector} >";
1215   }
1216   else {
1217     # Number of identifiers...
1218     $ExtendedConnectivityFingerprintsString .= "; NumOfIdentifiers: " . $This->{FingerprintsVector}->GetNumOfValues();
1219 
1220     # FingerprintsVector...
1221     $ExtendedConnectivityFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
1222   }
1223 
1224   return $ExtendedConnectivityFingerprintsString;
1225 }
1226