1 package AtomicDescriptors::EStateValuesDescriptors; 2 # 3 # File: EStateValuesDescriptors.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Scalar::Util (); 30 use Matrix; 31 use Constants; 32 use TextUtil (); 33 use MathUtil (); 34 use StatisticsUtil (); 35 use Atom; 36 use Molecule; 37 use AtomicDescriptors::AtomicDescriptors; 38 39 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 40 41 @ISA = qw(AtomicDescriptors::AtomicDescriptors Exporter); 42 @EXPORT = qw(); 43 @EXPORT_OK = qw(); 44 45 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 46 47 # Setup class variables... 48 my($ClassName); 49 _InitializeClass(); 50 51 # Overload Perl functions... 52 use overload '""' => 'StringifyEStateValuesDescriptors'; 53 54 # Class constructor... 55 sub new { 56 my($Class, %NamesAndValues) = @_; 57 58 # Initialize object... 59 my $This = $Class->SUPER::new(); 60 bless $This, ref($Class) || $Class; 61 $This->_InitializeEStateValuesDescriptors(); 62 63 $This->_InitializeEStateValuesDescriptorsProperties(%NamesAndValues); 64 65 return $This; 66 } 67 68 # Initialize class ... 69 sub _InitializeClass { 70 #Class name... 71 $ClassName = __PACKAGE__; 72 } 73 74 75 # Initialize object data... 76 # 77 sub _InitializeEStateValuesDescriptors { 78 my($This) = @_; 79 80 # Type of AtomicDescriptor... 81 $This->{Type} = 'EStateValue'; 82 83 # Intrinsic state values calculated for non-hydrogen atom... 84 # 85 %{$This->{IStateValues}} = (); 86 87 # Calculatetion of E-state values for types for hydrogens is not supported... 88 $This->{IgnoreHydrogens} = 1; 89 90 # Perturbation to intrinsic state values of atoms corresponding to all non-hydrogen 91 # atom pairs... 92 # 93 %{$This->{DeltaIStateMatrix}} = (); 94 95 # E-state values calculated for non-hydrogen atoms... 96 # 97 %{$This->{EStateValues}} = (); 98 99 return $This; 100 } 101 102 # Initialize object properties... 103 # 104 sub _InitializeEStateValuesDescriptorsProperties { 105 my($This, %NamesAndValues) = @_; 106 107 my($Name, $Value, $MethodName); 108 while (($Name, $Value) = each %NamesAndValues) { 109 $MethodName = "Set${Name}"; 110 $This->$MethodName($Value); 111 } 112 113 # Make sure molecule object was specified... 114 if (!exists $NamesAndValues{Molecule}) { 115 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 116 } 117 118 # Intialize atomic descriptor values... 119 $This->_InitializeDescriptorValues(); 120 121 return $This; 122 } 123 124 # Disable change of ignore hydrogens... 125 # 126 sub SetIgnoreHydrogens { 127 my($This, $IgnoreHydrogens) = @_; 128 129 carp "Warning: ${ClassName}->SetIgnoreHydrogens: Ignore hydrogens value can't be changed: It's not supported..."; 130 131 return $This; 132 } 133 134 # Generate electrotopological state (E-state) values [ Ref 75-78 ] for all atoms 135 # in the molecule... 136 # 137 # Calculation of E-state values for non-hydrogen atoms: 138 # 139 # Let: 140 # 141 # N = Principal quantum number or period number corresponding to element symbol 142 # 143 # Sigma = Number of sigma electrons involves in bonds to hydrogen and non-hydrogen atoms 144 # attached to atom 145 # = Number of sigma bonds to hydrogen and non-hydrogen atoms attached to atom 146 # PI = Number of PI electrons involved in bonds to non-hydrogen atoms attached to atom 147 # = Number of PI bonds to non-hydrogen atoms attached to atom 148 # 149 # LP = Number of lone pair electrons on atom 150 # 151 # Zv = Number of electrons in valence shell of atom 152 # 153 # X = Number of non-hydrogen atom neighbors or heavy atoms attached to atom 154 # H = Number of implicit and explicit hydrogens for atom 155 # 156 # Delta = Number of sigma electrons involved to bonds to non-hydrogen atoms 157 # DeltaV = ValenceDelta = Number of valence shell electrons not involved in bonding to hydrogen atoms 158 # 159 # Ii = Intrinsic state value for atom i 160 # 161 # DeltaIi = Sum of perturbations to intrinsic state value Ii of atom i by all other atoms besides atom i 162 # 163 # DeltaIij = Perturbation to intrinsic state value Ii of atom i by atom j 164 # 165 # Dij = Graph/bond distance between atom i and j 166 # Rij = Dij + 1 167 # 168 # Si = E-state value for atom i 169 # 170 # 171 # Then: 172 # 173 # Delta = Sigma - H = X 174 # 175 # DeltaV = Zv - H 176 # = Sigma + PI + LP - H 177 # 178 # Ii = ( ( ( 2 / N ) ** 2 ) * DeltaV + 1 ) / Delta 179 # 180 # Si = Ii + DeltaIi 181 # 182 # DeltaIij = (Ii - Ij) / (Rij ** 2) for j not equal to i 183 # 184 # DeltaIji = - DeltaIij 185 # 186 # DeltaIi = SUM ( (Ii - Ij) / (Rij ** 2) ) for j = 1 to num of atoms skipping atom i 187 # 188 # Methodology: 189 # . Calculate intrinsic state values for atoms. 190 # . Generate a distance matrix. 191 # . Use distance matrix to calculate DeltaIij matrix with each row i containing 192 # DeltaIij values corresponding to perturbation to intrinsic state value of atom 193 # i by atom j. 194 # . Calculate E-state values using DeltaIij matrix. 195 # . Assign E-state values to atoms. 196 # 197 # Notes: 198 # . The current release of MayaChemTools doesn't support calculation of Hydrogen 199 # E-state values. 200 # 201 sub GenerateDescriptors { 202 my($This) = @_; 203 204 # Cache appropriate molecule data... 205 $This->_SetupMoleculeDataCache(); 206 207 # Generate distance matrix... 208 if (!$This->_SetupDistanceMatrix()) { 209 carp "Warning: ${ClassName}->GenerateDescriptors: E-state values description generation didn't succeed: Couldn't generate a distance matrix..."; 210 return $This; 211 } 212 213 # Calculate EState values.. 214 if (!$This->_CalculateEStateValuesDescriptors()) { 215 carp "Warning: ${ClassName}->GenerateDescriptors: E-state values description generation didn't succeed: Couldn't calculate IState values for all atoms..."; 216 return $This; 217 } 218 219 # Set final descriptor values... 220 $This->_SetFinalValues(); 221 222 # Clear cached molecule data... 223 $This->_ClearMoleculeDataCache(); 224 225 return $This; 226 } 227 228 # Calculate E-state values for non-hydrogen atoms... 229 # 230 sub _CalculateEStateValuesDescriptors { 231 my($This) = @_; 232 my($DeltaIStateMatrix, $NumOfRows, $NumOfCols, $RowIndex, $AtomID, $EStateValue, $IStateValue, $DeltaIStateValue, @DeltaIStateMatrixRowValues); 233 234 %{$This->{EStateValues}} = (); 235 236 # Calculate intrinsic state values for non-hydrogen atoms... 237 if (!$This->_CalculateIStateValues()) { 238 return undef; 239 } 240 241 # Calculate delta intrinsic state matrix for non-hydrogen atoms... 242 $This->_CalculateDeltaIStateMatrix(); 243 244 # Get DeltaIState matrix information... 245 $DeltaIStateMatrix = $This->{DeltaIStateMatrix}; 246 ($NumOfRows, $NumOfCols) = $DeltaIStateMatrix->GetSize(); 247 248 # Calculate EState values... 249 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) { 250 $AtomID = $This->{AtomIndexToID}{$RowIndex}; 251 if ( !(exists($This->{IStateValues}{$AtomID})) ) { 252 next ROWINDEX; 253 } 254 $IStateValue = $This->{IStateValues}{$AtomID}; 255 256 @DeltaIStateMatrixRowValues = $DeltaIStateMatrix->GetRowValues($RowIndex); 257 $DeltaIStateValue = StatisticsUtil::Sum(\@DeltaIStateMatrixRowValues); 258 259 $EStateValue = $IStateValue + $DeltaIStateValue; 260 261 $This->{EStateValues}{$AtomID} = $EStateValue; 262 } 263 return $This; 264 } 265 266 # Calculate intrinsic state values for non-hydrogen atoms... 267 # 268 sub _CalculateIStateValues { 269 my($This) = @_; 270 my($Atom, $AtomID); 271 272 %{$This->{IStateValues}} = (); 273 274 ATOM: for $Atom (@{$This->{Atoms}}) { 275 # Irrespective of IgoreHydrogens value, just ignore hydrogens... 276 if ($Atom->IsHydrogen()) { 277 next ATOM; 278 } 279 $AtomID = $Atom->GetID(); 280 $This->{IStateValues}{$AtomID} = $This->_CalculateIStateValue($Atom); 281 if (!defined($This->{IStateValues}{$AtomID})) { 282 return undef; 283 } 284 } 285 return $This; 286 } 287 288 # Calculation intrinsic state value for non-hydrogen... 289 # 290 sub _CalculateIStateValue { 291 my($This, $Atom) = @_; 292 my($IStateValue, $Delta, $DeltaV, $PeriodNumber); 293 294 $PeriodNumber = $Atom->GetPeriodNumber(); 295 ($Delta, $DeltaV) = $This->_GetDeltaValues($Atom); 296 297 if (!(defined($Delta) && defined($PeriodNumber) && defined($DeltaV))) { 298 return undef; 299 } 300 301 $IStateValue = ($PeriodNumber && $Delta) ? (((2/$PeriodNumber)**2)*$DeltaV + 1)/$Delta : 0; 302 303 return $IStateValue; 304 } 305 306 # Get Delta and DeltaV values for atom... 307 # 308 sub _GetDeltaValues { 309 my($This, $Atom) = @_; 310 my($Delta, $DeltaV, $ValenceElectrons, $NumOfHydrogens); 311 312 ($Delta, $DeltaV) = (undef, undef); 313 314 $ValenceElectrons = $Atom->GetValenceElectrons(); 315 $NumOfHydrogens = $Atom->GetAtomicInvariantValue('H'); 316 317 $Delta = $Atom->GetAtomicInvariantValue('X'); 318 if (defined($ValenceElectrons) && defined($NumOfHydrogens)) { 319 $DeltaV = $ValenceElectrons - $NumOfHydrogens; 320 } 321 322 return ($Delta, $DeltaV); 323 } 324 325 # Calculate DeltaIState matrix for atoms with each row i containing DeltaIij values 326 # corresponding atom atoms i and j. 327 # 328 # Notes: 329 # . Matrix elements corresponding to hydrogen atoms or unconnected 330 # are assigned zero value. 331 # 332 sub _CalculateDeltaIStateMatrix { 333 my($This) = @_; 334 my($DistanceMatrix, $NumOfRows, $NumOfCols, $RowIndex, $ColIndex, $AtomID1, $AtomID2, $DeltaIStateMatrix, $IStateValue1, $IStateValue2, $GraphDistance, $DeltaIState12, $DeltaIState21, $SkipIndexCheck); 335 336 # Get distance matrix information... 337 $DistanceMatrix = $This->{DistanceMatrix}; 338 ($NumOfRows, $NumOfCols) = $DistanceMatrix->GetSize(); 339 340 # Initialize DeltaIState matrix... 341 $This->{DeltaIStateMatrix} = new Matrix($NumOfRows, $NumOfCols); 342 $DeltaIStateMatrix = $This->{DeltaIStateMatrix}; 343 344 $SkipIndexCheck = 1; 345 346 # Calculate DeltaIState matrix values... 347 ROWINDEX: for $RowIndex (0 .. ($NumOfRows - 1) ) { 348 $AtomID1 = $This->{AtomIndexToID}{$RowIndex}; 349 if (!exists($This->{IStateValues}{$AtomID1})) { 350 next ROWINDEX; 351 } 352 $IStateValue1 = $This->{IStateValues}{$AtomID1}; 353 354 COLINDEX: for $ColIndex (($RowIndex + 1) .. ($NumOfCols - 1) ) { 355 $AtomID2 = $This->{AtomIndexToID}{$ColIndex}; 356 if (!exists($This->{IStateValues}{$AtomID2})) { 357 next COLINDEX; 358 } 359 $IStateValue2 = $This->{IStateValues}{$AtomID2}; 360 361 # Make sure it's a connected atom... 362 $GraphDistance = $DistanceMatrix->GetValue($RowIndex, $ColIndex, $SkipIndexCheck); 363 if ($GraphDistance >= BigNumber) { 364 next COLINDEX; 365 } 366 367 $DeltaIState12 = ($IStateValue1 - $IStateValue2)/(($GraphDistance + 1)**2); 368 $DeltaIState21 = -$DeltaIState12; 369 370 # Set DeltaIState values... 371 $DeltaIStateMatrix->SetValue($RowIndex, $ColIndex, $DeltaIState12, $SkipIndexCheck); 372 $DeltaIStateMatrix->SetValue($ColIndex, $RowIndex, $DeltaIState21, $SkipIndexCheck); 373 } 374 } 375 } 376 377 # Setup distance matrix... 378 # 379 sub _SetupDistanceMatrix { 380 my($This) = @_; 381 382 $This->{DistanceMatrix} = $This->GetMolecule()->GetDistanceMatrix(); 383 384 if (!defined($This->{DistanceMatrix})) { 385 return undef; 386 } 387 388 return $This; 389 } 390 391 # Setup final descriptor values... 392 # 393 sub _SetFinalValues { 394 my($This) = @_; 395 my($Atom, $AtomID, $EStateValue); 396 397 ATOM: for $Atom (@{$This->{Atoms}}) { 398 $AtomID = $Atom->GetID(); 399 if (!exists $This->{EStateValues}{$AtomID}) { 400 next ATOM; 401 } 402 $EStateValue = $This->{EStateValues}{$AtomID}; 403 $This->SetDescriptorValue($Atom, $EStateValue); 404 } 405 406 return $This; 407 } 408 409 # Cache appropriate molecule data... 410 # 411 sub _SetupMoleculeDataCache { 412 my($This) = @_; 413 414 # Get all atoms including hydrogens to correctly map atom indicies to atom IDs for 415 # usage of distance matrix. 416 # 417 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms(); 418 419 # Get all atom IDs... 420 my(@AtomIDs); 421 @AtomIDs = (); 422 @AtomIDs = map { $_->GetID() } @{$This->{Atoms}}; 423 424 # Set AtomIndex to AtomID hash... 425 %{$This->{AtomIndexToID}} = (); 426 @{$This->{AtomIndexToID}}{ (0 .. $#AtomIDs) } = @AtomIDs; 427 428 return $This; 429 } 430 431 # Clear cached molecule data... 432 # 433 sub _ClearMoleculeDataCache { 434 my($This) = @_; 435 436 @{$This->{Atoms}} = (); 437 438 return $This; 439 } 440 441 # Return a string containg data for EStateValuesDescriptors object... 442 # 443 sub StringifyEStateValuesDescriptors { 444 my($This) = @_; 445 my($EStateValuesDescriptorsString); 446 447 # Type of AtomicValues... 448 $EStateValuesDescriptorsString = "AtomicDescriptorType: $This->{Type}; IgnoreHydrogens: " . ($This->{IgnoreHydrogens} ? "Yes" : "No"); 449 450 # Setup atomic descriptor information... 451 my($AtomID, $DescriptorValue, @DescriptorValuesInfo, %DescriptorValues); 452 453 @DescriptorValuesInfo = (); 454 %DescriptorValues = $This->GetDescriptorValues(); 455 456 for $AtomID (sort { $a <=> $b } keys %DescriptorValues) { 457 $DescriptorValue = $DescriptorValues{$AtomID}; 458 $DescriptorValue = (TextUtil::IsEmpty($DescriptorValue) || $DescriptorValue =~ /^None$/i) ? 'None' : MathUtil::round($DescriptorValue, 3) + 0; 459 push @DescriptorValuesInfo, "$AtomID:$DescriptorValue"; 460 } 461 $EStateValuesDescriptorsString .= "; AtomIDs:EStateValuesDescriptors: <" . TextUtil::JoinWords(\@DescriptorValuesInfo, ", ", 0) . ">"; 462 463 return $EStateValuesDescriptorsString; 464 } 465 466 # Is it a EStateValuesDescriptors object? 467 sub _IsEStateValuesDescriptors { 468 my($Object) = @_; 469 470 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 471 } 472