MayaChemTools

   1 package Fingerprints::EStateIndiciesFingerprints;
   2 #
   3 # File: EStateIndiciesFingerprints.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Text::ParseWords;
  30 use TextUtil ();
  31 use FileUtil ();
  32 use MathUtil ();
  33 use Fingerprints::Fingerprints;
  34 use Molecule;
  35 use AtomTypes::EStateAtomTypes;
  36 use AtomicDescriptors::EStateValuesDescriptors;
  37 
  38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  39 
  40 @ISA = qw(Fingerprints::Fingerprints Exporter);
  41 @EXPORT = qw();
  42 @EXPORT_OK = qw();
  43 
  44 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  45 
  46 # Setup class variables...
  47 my($ClassName);
  48 _InitializeClass();
  49 
  50 # Overload Perl functions...
  51 use overload '""' => 'StringifyEStateIndiciesFingerprints';
  52 
  53 # Class constructor...
  54 sub new {
  55   my($Class, %NamesAndValues) = @_;
  56 
  57   # Initialize object...
  58   my $This = $Class->SUPER::new();
  59   bless $This, ref($Class) || $Class;
  60   $This->_InitializeEStateIndiciesFingerprints();
  61 
  62   $This->_InitializeEStateIndiciesFingerprintsProperties(%NamesAndValues);
  63 
  64   return $This;
  65 }
  66 
  67 # Initialize object data...
  68 #
  69 sub _InitializeEStateIndiciesFingerprints {
  70   my($This) = @_;
  71 
  72   # EStateIndicies is a vector containing sum of E-state values for E-state atom types
  73   #
  74   $This->{Type} = 'EStateIndicies';
  75 
  76   # EStateAtomTypesSetToUse for EStateIndicies:
  77   #
  78   # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
  79   # FixedSize - Corresponds to fixed number of E-state atom types previously defined [ Ref 77 ]
  80   #
  81   # The default EStateAtomTypesSetToUse value for EStateIndicies fingerprints type: ArbitrarySize.
  82   # Possible values: ArbitrarySize or FixedSize.
  83   #
  84   $This->{EStateAtomTypesSetToUse} = '';
  85 
  86   # Assigned E-state atom types...
  87   %{$This->{EStateAtomTypes}} = ();
  88 
  89   # Vector values precision for real values during E-state indicies...
  90   $This->{ValuesPrecision} = 3;
  91 
  92   # Calculated E-state values and indicies for generating E-state indicies fingerprints...
  93   %{$This->{EStateValues}} = ();
  94   %{$This->{EStateIndicies}} = ();
  95 }
  96 
  97 # Initialize class ...
  98 sub _InitializeClass {
  99   #Class name...
 100   $ClassName = __PACKAGE__;
 101 
 102 }
 103 
 104 # Initialize object properties....
 105 sub _InitializeEStateIndiciesFingerprintsProperties {
 106   my($This, %NamesAndValues) = @_;
 107 
 108   my($Name, $Value, $MethodName);
 109   while (($Name, $Value) = each  %NamesAndValues) {
 110     $MethodName = "Set${Name}";
 111     $This->$MethodName($Value);
 112   }
 113 
 114   # Make sure molecule object was specified...
 115   if (!exists $NamesAndValues{Molecule}) {
 116     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 117   }
 118 
 119   $This->_InitializeEstateIndicies();
 120 
 121   return $This;
 122 }
 123 
 124 # Initialize E-state indicies...
 125 #
 126 sub _InitializeEstateIndicies {
 127   my($This) = @_;
 128 
 129   # Set default EStateAtomTypesSetToUse...
 130   if (!$This->{EStateAtomTypesSetToUse}) {
 131     $This->{EStateAtomTypesSetToUse} = 'ArbitrarySize';
 132   }
 133 
 134   # Vector type...
 135   $This->{VectorType} = 'FingerprintsVector';
 136 
 137   if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
 138     $This->{FingerprintsVectorType} = 'OrderedNumericalValues';
 139   }
 140   else {
 141     $This->{FingerprintsVectorType} = 'NumericalValues';
 142   }
 143 
 144   $This->_InitializeFingerprintsVector();
 145 
 146   return $This;
 147 }
 148 
 149 # Disable set size method...
 150 #
 151 sub SetSize {
 152   my($This, $Type) = @_;
 153 
 154   croak "Error: ${ClassName}->SetSize: Can't change size:  It's not allowed...";
 155 }
 156 
 157 # Set E-state atom types set to use...
 158 #
 159 sub SetEStateAtomTypesSetToUse {
 160   my($This, $Value) = @_;
 161 
 162   if ($This->{EStateAtomTypesSetToUse}) {
 163     croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Can't change size:  It's already set...";
 164   }
 165 
 166   if ($Value !~ /^(ArbitrarySize|FixedSize)/i) {
 167     croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Unknown EStateAtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize";
 168   }
 169 
 170   $This->{EStateAtomTypesSetToUse} = $Value;
 171 
 172   return $This;
 173 }
 174 
 175 # Set vector values precision for real values for E-state indicies...
 176 #
 177 sub SetValuesPrecision {
 178   my($This, $Value) = @_;
 179 
 180   if (!TextUtil::IsPositiveInteger($Value)) {
 181     croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid:  It must be a positive integer...";
 182   }
 183   $This->{ValuesPrecision} = $Value;
 184 
 185   return $This;
 186 }
 187 
 188 # Generate fingerprints description...
 189 #
 190 sub GetDescription {
 191   my($This) = @_;
 192 
 193   # Is description explicity set?
 194   if (exists $This->{Description}) {
 195     return $This->{Description};
 196   }
 197 
 198   # Generate fingerprints description...
 199 
 200   return "$This->{Type}:$This->{EStateAtomTypesSetToUse}";
 201 }
 202 
 203 # Generate electrotopological state indicies (E-state) [ Ref 75-78 ] fingerprints for
 204 # non-hydrogen atoms in a molecule...
 205 #
 206 # EStateIndicies fingerprints constitute a vector containing sum of E-state values
 207 # for E-state atom types. Two types of E-state atom types set size are allowed:
 208 #
 209 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule
 210 # FixedSize - Corresponds to fixed number of E-state atom types previously defined
 211 #
 212 # Module AtomTypes::EStateAtomTypes.pm is used to assign E-state atom types to
 213 # non-hydrogen atoms in the molecule which is able to assign atom types to any valid
 214 # atom group. However, for FixedSize value of EStateAtomTypesSetToUse, only a fixed
 215 # set of E-state atom types corresponding to specific atom groups [ Appendix III in
 216 # Ref 77 ] are used for fingerprints.
 217 #
 218 # The fixed size E-state atom type set size used during generation of fingerprints corresponding
 219 # FixedSize value of EStateAtomTypesSetToUse contains 87 E-state non-hydrogen atom types
 220 # in EStateAtomTypes.csv data file distributed with MayaChemTools.
 221 #
 222 # Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of
 223 # E-state indicies fingerprints:
 224 #
 225 # Type                        EStateAtomTypesSetToUse
 226 #
 227 # EStateIndicies               ArbitrarySize      [ default fingerprints ]
 228 # EStateIndicies               FixedSize
 229 #
 230 # The default is generate EStateIndicies type fingeprints corresponding to ArbitrarySize as
 231 # EStateAtomTypesSetToUse value.
 232 #
 233 #
 234 sub GenerateFingerprints {
 235   my($This) = @_;
 236 
 237   # Cache appropriate molecule data...
 238   $This->_SetupMoleculeDataCache();
 239 
 240   # Assign E-state atom types...
 241   if (!$This->_AssignEStateAtomTypes()) {
 242     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't assign valid E-state atom types to all atoms...";
 243     return $This;
 244   }
 245 
 246   # Calculate E-state indicies...
 247   if (!$This->_CalculateEStateIndicies()) {
 248     carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't calculate E-state values for all atoms...";
 249     return $This;
 250   }
 251 
 252   # Set final fingerprints...
 253   $This->_SetFinalFingerprints();
 254 
 255   # Clear cached molecule data...
 256   $This->_ClearMoleculeDataCache();
 257 
 258   return $This;
 259 }
 260 
 261 # Assign E-state atom types...
 262 #
 263 sub _AssignEStateAtomTypes {
 264   my($This) = @_;
 265   my($EStateAtomTypes, $Atom, $AtomID, $AtomType);
 266 
 267   %{$This->{EStateAtomTypes}} = ();
 268 
 269   # Assign E-state atom types...
 270   $EStateAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1);
 271   $EStateAtomTypes->AssignAtomTypes();
 272 
 273   # Make sure atom types assignment is successful...
 274   if (!$EStateAtomTypes->IsAtomTypesAssignmentSuccessful()) {
 275     return undef;
 276   }
 277 
 278   # Collect assigned atom types...
 279   for $Atom (@{$This->{Atoms}}) {
 280     $AtomID = $Atom->GetID();
 281 
 282     $AtomType = $EStateAtomTypes->GetAtomType($Atom);
 283     $This->{EStateAtomTypes}{$AtomID} = $AtomType;
 284   }
 285   return $This;
 286 }
 287 
 288 # Calculate E-state indicies by summing up E-state values for specific
 289 # E-state atom types...
 290 #
 291 sub _CalculateEStateIndicies {
 292   my($This) = @_;
 293   my($Atom, $AtomID, $AtomType, $EStateValue);
 294 
 295   # Calculate E-state values to generate E-state indicies...
 296   if (!$This->_CalculateEStateValuesDescriptors()) {
 297     return undef;
 298   }
 299 
 300   # Calculate E-state indicies...
 301   for $Atom (@{$This->{Atoms}}) {
 302     $AtomID = $Atom->GetID();
 303 
 304     $AtomType = $This->{EStateAtomTypes}{$AtomID};
 305     $EStateValue = $This->{EStateValues}{$AtomID};
 306 
 307     if (!exists $This->{EStateIndicies}{$AtomType}) {
 308       $This->{EStateIndicies}{$AtomType} = 0;
 309     }
 310 
 311     $This->{EStateIndicies}{$AtomType} += $EStateValue;
 312   }
 313   return $This;
 314 }
 315 
 316 # Calculate E-state values for E-state indicies...
 317 #
 318 sub _CalculateEStateValuesDescriptors {
 319   my($This) = @_;
 320   my($EStateValuesDescriptors, $Atom, $AtomID, $EStateValue);
 321 
 322   %{$This->{EStateValues}} = ();
 323 
 324   # Calculate and assign E-state values...
 325   $EStateValuesDescriptors = new AtomicDescriptors::EStateValuesDescriptors('Molecule' => $This->{Molecule});
 326   $EStateValuesDescriptors->GenerateDescriptors();
 327 
 328   # Make sure E-state values calculation is successful...
 329   if (!$EStateValuesDescriptors->IsDescriptorsGenerationSuccessful()) {
 330     return undef;
 331   }
 332 
 333   # Collect assigned E-state values...
 334   for $Atom (@{$This->{Atoms}}) {
 335     $AtomID = $Atom->GetID();
 336     $EStateValue = $EStateValuesDescriptors->GetDescriptorValue($Atom);
 337     $This->{EStateValues}{$AtomID} = $EStateValue;
 338   }
 339   return $This;
 340 }
 341 
 342 # Set final final fingerpritns for E-state indicies...
 343 #
 344 sub _SetFinalFingerprints {
 345   my($This) = @_;
 346   my($AtomType, $ValuesPrecision, $EStateAtomTypesDataRef, @Values, @IDs);
 347 
 348   # Mark successful generation of fingerprints...
 349   $This->{FingerprintsGenerated} = 1;
 350 
 351   @Values = ();
 352   @IDs = ();
 353 
 354   $ValuesPrecision = $This->{ValuesPrecision};
 355 
 356   if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) {
 357     # Use fixed size E-state atom types set for non-hydrogen atoms...
 358     for $AtomType (@{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()}) {
 359       push @IDs, "S${AtomType}";
 360       push @Values, exists($This->{EStateIndicies}{$AtomType}) ? MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision) : 0;
 361     }
 362   }
 363   else {
 364     for $AtomType (sort keys %{$This->{EStateIndicies}}) {
 365       push @IDs, "S${AtomType}";
 366       push @Values, MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision);
 367     }
 368   }
 369 
 370   # Add IDs and values to fingerprint vector...
 371   if (@IDs) {
 372     $This->{FingerprintsVector}->AddValueIDs(\@IDs);
 373   }
 374   $This->{FingerprintsVector}->AddValues(\@Values);
 375 
 376   return $This;
 377 }
 378 
 379 # Cache  appropriate molecule data...
 380 #
 381 sub _SetupMoleculeDataCache {
 382   my($This) = @_;
 383 
 384   # Get all non-hydrogen atoms...
 385   my($NegateAtomCheckMethod);
 386   $NegateAtomCheckMethod = 1;
 387   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod);
 388 
 389   return $This;
 390 }
 391 
 392 # Clear cached molecule data...
 393 #
 394 sub _ClearMoleculeDataCache {
 395   my($This) = @_;
 396 
 397   @{$This->{Atoms}} = ();
 398 
 399   return $This;
 400 }
 401 
 402 # Return a string containg data for EStateIndiciesFingerprints object...
 403 sub StringifyEStateIndiciesFingerprints {
 404   my($This) = @_;
 405   my($EStateIndiciesFingerprintsString);
 406 
 407   # Type of Keys...
 408   $EStateIndiciesFingerprintsString = "Type: $This->{Type}; EStateAtomTypesSetToUse: $This->{EStateAtomTypesSetToUse}";
 409 
 410   # Fingerprint vector...
 411   $EStateIndiciesFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >";
 412 
 413   return $EStateIndiciesFingerprintsString;
 414 }
 415