MayaChemTools

   1 package AtomicDescriptors::EStateValuesDescriptors;
   2 #
   3 # File: EStateValuesDescriptors.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use Matrix;
  31 use Constants;
  32 use TextUtil ();
  33 use MathUtil ();
  34 use StatisticsUtil ();
  35 use Atom;
  36 use Molecule;
  37 use AtomicDescriptors::AtomicDescriptors;
  38 
  39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  40 
  41 @ISA = qw(AtomicDescriptors::AtomicDescriptors Exporter);
  42 @EXPORT = qw();
  43 @EXPORT_OK = qw();
  44 
  45 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  46 
  47 # Setup class variables...
  48 my($ClassName);
  49 _InitializeClass();
  50 
  51 # Overload Perl functions...
  52 use overload '""' => 'StringifyEStateValuesDescriptors';
  53 
  54 # Class constructor...
  55 sub new {
  56   my($Class, %NamesAndValues) = @_;
  57 
  58   # Initialize object...
  59   my $This = $Class->SUPER::new();
  60   bless $This, ref($Class) || $Class;
  61   $This->_InitializeEStateValuesDescriptors();
  62 
  63   $This->_InitializeEStateValuesDescriptorsProperties(%NamesAndValues);
  64 
  65   return $This;
  66 }
  67 
  68 # Initialize class ...
  69 sub _InitializeClass {
  70   #Class name...
  71   $ClassName = __PACKAGE__;
  72 }
  73 
  74 
  75 # Initialize object data...
  76 #
  77 sub _InitializeEStateValuesDescriptors {
  78   my($This) = @_;
  79 
  80   # Type of AtomicDescriptor...
  81   $This->{Type} = 'EStateValue';
  82 
  83   # Intrinsic state values calculated for non-hydrogen atom...
  84   #
  85   %{$This->{IStateValues}} = ();
  86 
  87   # Calculatetion of E-state values for types for hydrogens is not supported...
  88   $This->{IgnoreHydrogens} = 1;
  89 
  90   # Perturbation to intrinsic state values of atoms corresponding to all non-hydrogen
  91   # atom pairs...
  92   #
  93   %{$This->{DeltaIStateMatrix}} = ();
  94 
  95   # E-state values calculated for non-hydrogen atoms...
  96   #
  97   %{$This->{EStateValues}} = ();
  98 
  99   return $This;
 100 }
 101 
 102 # Initialize object properties...
 103 #
 104 sub _InitializeEStateValuesDescriptorsProperties {
 105   my($This, %NamesAndValues) = @_;
 106 
 107   my($Name, $Value, $MethodName);
 108   while (($Name, $Value) = each  %NamesAndValues) {
 109     $MethodName = "Set${Name}";
 110     $This->$MethodName($Value);
 111   }
 112 
 113   # Make sure molecule object was specified...
 114   if (!exists $NamesAndValues{Molecule}) {
 115     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 116   }
 117 
 118   # Intialize atomic descriptor values...
 119   $This->_InitializeDescriptorValues();
 120 
 121   return $This;
 122 }
 123 
 124 # Disable change of ignore hydrogens...
 125 #
 126 sub SetIgnoreHydrogens {
 127   my($This, $IgnoreHydrogens) = @_;
 128 
 129   carp "Warning: ${ClassName}->SetIgnoreHydrogens: Ignore hydrogens value can't be changed: It's not supported...";
 130 
 131   return $This;
 132 }
 133 
 134 # Generate electrotopological state (E-state) values [ Ref 75-78 ] for all atoms
 135 # in the molecule...
 136 #
 137 # Calculation of E-state values for non-hydrogen atoms:
 138 #
 139 # Let:
 140 #
 141 #   N = Principal quantum number or period number corresponding to element symbol
 142 #
 143 #   Sigma = Number of sigma electrons involves in bonds to hydrogen and non-hydrogen atoms
 144 #           attached to atom
 145 #         = Number of sigma bonds to hydrogen and non-hydrogen atoms attached to atom
 146 #   PI = Number of PI electrons involved in bonds to non-hydrogen atoms attached to atom
 147 #       = Number of PI bonds to non-hydrogen atoms attached to atom
 148 #
 149 #   LP = Number of lone pair electrons on atom
 150 #
 151 #   Zv = Number of electrons in valence shell of atom
 152 #
 153 #   X = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 154 #   H = Number of implicit and explicit hydrogens for atom
 155 #
 156 #   Delta = Number of sigma electrons involved to bonds to non-hydrogen atoms
 157 #   DeltaV = ValenceDelta = Number of valence shell electrons not involved in bonding to hydrogen atoms
 158 #
 159 #   Ii = Intrinsic state value for atom i
 160 #
 161 #   DeltaIi = Sum of perturbations to intrinsic state value Ii of atom i by all other atoms besides atom i
 162 #
 163 #   DeltaIij = Perturbation to intrinsic state value Ii of atom i by atom j
 164 #
 165 #   Dij = Graph/bond distance between atom i and j
 166 #   Rij = Dij + 1
 167 #
 168 #   Si = E-state value for atom i
 169 #
 170 #
 171 # Then:
 172 #
 173 #   Delta = Sigma - H = X
 174 #
 175 #   DeltaV = Zv - H
 176 #      = Sigma + PI + LP - H
 177 #
 178 #   Ii = ( ( ( 2 / N ) ** 2 ) * DeltaV + 1 ) / Delta
 179 #
 180 #   Si = Ii + DeltaIi
 181 #
 182 #   DeltaIij = (Ii - Ij) / (Rij ** 2) for j not equal to i
 183 #
 184 #   DeltaIji = - DeltaIij
 185 #
 186 #   DeltaIi = SUM ( (Ii - Ij) / (Rij ** 2) ) for j = 1 to num of atoms skipping atom i
 187 #
 188 # Methodology:
 189 #   . Calculate intrinsic state values for atoms.
 190 #   . Generate a distance matrix.
 191 #   . Use distance matrix to calculate DeltaIij matrix with each row i containing
 192 #     DeltaIij values corresponding to perturbation to intrinsic state value of atom
 193 #     i by atom j.
 194 #   . Calculate E-state values using DeltaIij matrix.
 195 #   . Assign E-state values to atoms.
 196 #
 197 # Notes:
 198 #   . The current release of MayaChemTools doesn't support calculation of Hydrogen
 199 #     E-state values.
 200 #
 201 sub GenerateDescriptors {
 202   my($This) = @_;
 203 
 204   # Cache appropriate molecule data...
 205   $This->_SetupMoleculeDataCache();
 206 
 207   # Generate distance matrix...
 208   if (!$This->_SetupDistanceMatrix()) {
 209     carp "Warning: ${ClassName}->GenerateDescriptors: E-state values description generation didn't succeed: Couldn't generate a distance matrix...";
 210     return $This;
 211   }
 212 
 213   # Calculate EState values..
 214   if (!$This->_CalculateEStateValuesDescriptors()) {
 215     carp "Warning: ${ClassName}->GenerateDescriptors: E-state values description generation didn't succeed: Couldn't calculate IState values for all atoms...";
 216     return $This;
 217   }
 218 
 219   # Set final descriptor values...
 220   $This->_SetFinalValues();
 221 
 222   # Clear cached molecule data...
 223   $This->_ClearMoleculeDataCache();
 224 
 225   return $This;
 226 }
 227 
 228 # Calculate E-state values for non-hydrogen atoms...
 229 #
 230 sub _CalculateEStateValuesDescriptors {
 231   my($This) = @_;
 232   my($DeltaIStateMatrix, $NumOfRows, $NumOfCols, $RowIndex, $AtomID, $EStateValue, $IStateValue, $DeltaIStateValue, @DeltaIStateMatrixRowValues);
 233 
 234   %{$This->{EStateValues}} = ();
 235 
 236   # Calculate intrinsic state values for non-hydrogen atoms...
 237   if (!$This->_CalculateIStateValues()) {
 238     return undef;
 239   }
 240 
 241   # Calculate delta intrinsic state matrix for non-hydrogen atoms...
 242   $This->_CalculateDeltaIStateMatrix();
 243 
 244   # Get DeltaIState matrix information...
 245   $DeltaIStateMatrix = $This->{DeltaIStateMatrix};
 246   ($NumOfRows, $NumOfCols) = $DeltaIStateMatrix->GetSize();
 247 
 248   # Calculate EState values...
 249   ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
 250     $AtomID = $This->{AtomIndexToID}{$RowIndex};
 251     if ( !(exists($This->{IStateValues}{$AtomID})) ) {
 252       next ROWINDEX;
 253     }
 254     $IStateValue = $This->{IStateValues}{$AtomID};
 255 
 256     @DeltaIStateMatrixRowValues = $DeltaIStateMatrix->GetRowValues($RowIndex);
 257     $DeltaIStateValue = StatisticsUtil::Sum(\@DeltaIStateMatrixRowValues);
 258 
 259     $EStateValue = $IStateValue + $DeltaIStateValue;
 260 
 261     $This->{EStateValues}{$AtomID} = $EStateValue;
 262   }
 263   return $This;
 264 }
 265 
 266 # Calculate intrinsic state values for non-hydrogen atoms...
 267 #
 268 sub _CalculateIStateValues {
 269   my($This) = @_;
 270   my($Atom, $AtomID);
 271 
 272   %{$This->{IStateValues}} = ();
 273 
 274   ATOM: for $Atom (@{$This->{Atoms}}) {
 275     # Irrespective of IgoreHydrogens value, just ignore hydrogens...
 276     if ($Atom->IsHydrogen()) {
 277       next ATOM;
 278     }
 279     $AtomID = $Atom->GetID();
 280     $This->{IStateValues}{$AtomID} = $This->_CalculateIStateValue($Atom);
 281     if (!defined($This->{IStateValues}{$AtomID})) {
 282       return undef;
 283     }
 284   }
 285   return $This;
 286 }
 287 
 288 # Calculation intrinsic state value for non-hydrogen...
 289 #
 290 sub _CalculateIStateValue {
 291   my($This, $Atom) = @_;
 292   my($IStateValue, $Delta, $DeltaV, $PeriodNumber);
 293 
 294   $PeriodNumber = $Atom->GetPeriodNumber();
 295   ($Delta, $DeltaV) = $This->_GetDeltaValues($Atom);
 296 
 297   if (!(defined($Delta) && defined($PeriodNumber) && defined($DeltaV))) {
 298     return undef;
 299   }
 300 
 301   $IStateValue = ($PeriodNumber && $Delta) ? (((2/$PeriodNumber)**2)*$DeltaV + 1)/$Delta : 0;
 302 
 303   return $IStateValue;
 304 }
 305 
 306 # Get Delta and DeltaV values for atom...
 307 #
 308 sub _GetDeltaValues {
 309   my($This, $Atom) = @_;
 310   my($Delta, $DeltaV, $ValenceElectrons, $NumOfHydrogens);
 311 
 312   ($Delta, $DeltaV) = (undef, undef);
 313 
 314   $ValenceElectrons = $Atom->GetValenceElectrons();
 315   $NumOfHydrogens = $Atom->GetAtomicInvariantValue('H');
 316 
 317   $Delta = $Atom->GetAtomicInvariantValue('X');
 318   if (defined($ValenceElectrons) && defined($NumOfHydrogens)) {
 319     $DeltaV = $ValenceElectrons - $NumOfHydrogens;
 320   }
 321 
 322   return ($Delta, $DeltaV);
 323 }
 324 
 325 # Calculate DeltaIState matrix for atoms with each row i containing DeltaIij values
 326 # corresponding atom atoms i and j.
 327 #
 328 # Notes:
 329 #   . Matrix elements corresponding to hydrogen atoms or unconnected
 330 #     are assigned zero value.
 331 #
 332 sub _CalculateDeltaIStateMatrix {
 333   my($This) = @_;
 334   my($DistanceMatrix, $NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $AtomID1, $AtomID2, $DeltaIStateMatrix, $IStateValue1, $IStateValue2, $GraphDistance, $DeltaIState12, $DeltaIState21, $SkipIndexCheck);
 335 
 336   # Get distance matrix information...
 337   $DistanceMatrix = $This->{DistanceMatrix};
 338   ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize();
 339 
 340   # Initialize DeltaIState matrix...
 341   $This->{DeltaIStateMatrix} = new Matrix($NumOfRows, $NumOfCols);
 342   $DeltaIStateMatrix = $This->{DeltaIStateMatrix};
 343 
 344   $SkipIndexCheck = 1;
 345 
 346   # Calculate DeltaIState matrix values...
 347   ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) {
 348     $AtomID1 = $This->{AtomIndexToID}{$RowIndex};
 349     if (!exists($This->{IStateValues}{$AtomID1})) {
 350       next ROWINDEX;
 351     }
 352     $IStateValue1 = $This->{IStateValues}{$AtomID1};
 353 
 354     COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) {
 355       $AtomID2 = $This->{AtomIndexToID}{$ColIndex};
 356       if (!exists($This->{IStateValues}{$AtomID2})) {
 357         next COLINDEX;
 358       }
 359       $IStateValue2 = $This->{IStateValues}{$AtomID2};
 360 
 361       # Make sure it's a connected atom...
 362       $GraphDistance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck);
 363       if ($GraphDistance >= BigNumber) {
 364         next COLINDEX;
 365       }
 366 
 367       $DeltaIState12 = ($IStateValue1 - $IStateValue2)/(($GraphDistance + 1)**2);
 368       $DeltaIState21 = -$DeltaIState12;
 369 
 370       # Set DeltaIState values...
 371       $DeltaIStateMatrix->SetValue($RowIndex, $ColIndex, $DeltaIState12, $SkipIndexCheck);
 372       $DeltaIStateMatrix->SetValue($ColIndex, $RowIndex, $DeltaIState21, $SkipIndexCheck);
 373     }
 374   }
 375 }
 376 
 377 # Setup distance matrix...
 378 #
 379 sub _SetupDistanceMatrix {
 380   my($This) = @_;
 381 
 382   $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix();
 383 
 384   if (!defined($This->{DistanceMatrix})) {
 385     return undef;
 386   }
 387 
 388   return $This;
 389 }
 390 
 391 # Setup final descriptor values...
 392 #
 393 sub _SetFinalValues {
 394   my($This) = @_;
 395   my($Atom, $AtomID, $EStateValue);
 396 
 397   ATOM: for $Atom (@{$This->{Atoms}}) {
 398     $AtomID = $Atom->GetID();
 399     if (!exists $This->{EStateValues}{$AtomID}) {
 400       next ATOM;
 401     }
 402     $EStateValue = $This->{EStateValues}{$AtomID};
 403     $This->SetDescriptorValue($Atom, $EStateValue);
 404   }
 405 
 406   return $This;
 407 }
 408 
 409 # Cache  appropriate molecule data...
 410 #
 411 sub _SetupMoleculeDataCache {
 412   my($This) = @_;
 413 
 414   # Get all atoms including hydrogens to correctly map atom indicies to atom IDs for
 415   # usage of distance matrix.
 416   #
 417   @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms();
 418 
 419   # Get all atom IDs...
 420   my(@AtomIDs);
 421   @AtomIDs = ();
 422   @AtomIDs =  map { $_->GetID() } @{$This->{Atoms}};
 423 
 424   # Set AtomIndex to AtomID hash...
 425   %{$This->{AtomIndexToID}} = ();
 426   @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs;
 427 
 428   return $This;
 429 }
 430 
 431 # Clear cached molecule data...
 432 #
 433 sub _ClearMoleculeDataCache {
 434   my($This) = @_;
 435 
 436   @{$This->{Atoms}} = ();
 437 
 438   return $This;
 439 }
 440 
 441 # Return a string containg data for EStateValuesDescriptors object...
 442 #
 443 sub StringifyEStateValuesDescriptors {
 444   my($This) = @_;
 445   my($EStateValuesDescriptorsString);
 446 
 447   # Type of AtomicValues...
 448   $EStateValuesDescriptorsString = "AtomicDescriptorType: $This->{Type}; IgnoreHydrogens: " . ($This->{IgnoreHydrogens} ? "Yes" : "No");
 449 
 450   # Setup atomic descriptor information...
 451   my($AtomID, $DescriptorValue, @DescriptorValuesInfo, %DescriptorValues);
 452 
 453   @DescriptorValuesInfo = ();
 454   %DescriptorValues = $This->GetDescriptorValues();
 455 
 456   for $AtomID (sort { $a <=> $b } keys %DescriptorValues) {
 457     $DescriptorValue = $DescriptorValues{$AtomID};
 458     $DescriptorValue = (TextUtil::IsEmpty($DescriptorValue) || $DescriptorValue =~ /^None$/i) ? 'None' : MathUtil::round($DescriptorValue, 3) + 0;
 459     push @DescriptorValuesInfo, "$AtomID:$DescriptorValue";
 460   }
 461   $EStateValuesDescriptorsString .= "; AtomIDs:EStateValuesDescriptors: <" . TextUtil::JoinWords(\@DescriptorValuesInfo, ", ", 0) . ">";
 462 
 463   return $EStateValuesDescriptorsString;
 464 }
 465 
 466 # Is it a EStateValuesDescriptors object?
 467 sub _IsEStateValuesDescriptors {
 468   my($Object) = @_;
 469 
 470   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 471 }
 472