1 package Fingerprints::EStateIndiciesFingerprints; 2 # 3 # File: EStateIndiciesFingerprints.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Text::ParseWords; 30 use TextUtil (); 31 use FileUtil (); 32 use MathUtil (); 33 use Fingerprints::Fingerprints; 34 use Molecule; 35 use AtomTypes::EStateAtomTypes; 36 use AtomicDescriptors::EStateValuesDescriptors; 37 38 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 39 40 @ISA = qw(Fingerprints::Fingerprints Exporter); 41 @EXPORT = qw(); 42 @EXPORT_OK = qw(); 43 44 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 45 46 # Setup class variables... 47 my($ClassName); 48 _InitializeClass(); 49 50 # Overload Perl functions... 51 use overload '""' => 'StringifyEStateIndiciesFingerprints'; 52 53 # Class constructor... 54 sub new { 55 my($Class, %NamesAndValues) = @_; 56 57 # Initialize object... 58 my $This = $Class->SUPER::new(); 59 bless $This, ref($Class) || $Class; 60 $This->_InitializeEStateIndiciesFingerprints(); 61 62 $This->_InitializeEStateIndiciesFingerprintsProperties(%NamesAndValues); 63 64 return $This; 65 } 66 67 # Initialize object data... 68 # 69 sub _InitializeEStateIndiciesFingerprints { 70 my($This) = @_; 71 72 # EStateIndicies is a vector containing sum of E-state values for E-state atom types 73 # 74 $This->{Type} = 'EStateIndicies'; 75 76 # EStateAtomTypesSetToUse for EStateIndicies: 77 # 78 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule 79 # FixedSize - Corresponds to fixed number of E-state atom types previously defined [ Ref 77 ] 80 # 81 # The default EStateAtomTypesSetToUse value for EStateIndicies fingerprints type: ArbitrarySize. 82 # Possible values: ArbitrarySize or FixedSize. 83 # 84 $This->{EStateAtomTypesSetToUse} = ''; 85 86 # Assigned E-state atom types... 87 %{$This->{EStateAtomTypes}} = (); 88 89 # Vector values precision for real values during E-state indicies... 90 $This->{ValuesPrecision} = 3; 91 92 # Calculated E-state values and indicies for generating E-state indicies fingerprints... 93 %{$This->{EStateValues}} = (); 94 %{$This->{EStateIndicies}} = (); 95 } 96 97 # Initialize class ... 98 sub _InitializeClass { 99 #Class name... 100 $ClassName = __PACKAGE__; 101 102 } 103 104 # Initialize object properties.... 105 sub _InitializeEStateIndiciesFingerprintsProperties { 106 my($This, %NamesAndValues) = @_; 107 108 my($Name, $Value, $MethodName); 109 while (($Name, $Value) = each %NamesAndValues) { 110 $MethodName = "Set${Name}"; 111 $This->$MethodName($Value); 112 } 113 114 # Make sure molecule object was specified... 115 if (!exists $NamesAndValues{Molecule}) { 116 croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule..."; 117 } 118 119 $This->_InitializeEstateIndicies(); 120 121 return $This; 122 } 123 124 # Initialize E-state indicies... 125 # 126 sub _InitializeEstateIndicies { 127 my($This) = @_; 128 129 # Set default EStateAtomTypesSetToUse... 130 if (!$This->{EStateAtomTypesSetToUse}) { 131 $This->{EStateAtomTypesSetToUse} = 'ArbitrarySize'; 132 } 133 134 # Vector type... 135 $This->{VectorType} = 'FingerprintsVector'; 136 137 if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) { 138 $This->{FingerprintsVectorType} = 'OrderedNumericalValues'; 139 } 140 else { 141 $This->{FingerprintsVectorType} = 'NumericalValues'; 142 } 143 144 $This->_InitializeFingerprintsVector(); 145 146 return $This; 147 } 148 149 # Disable set size method... 150 # 151 sub SetSize { 152 my($This, $Type) = @_; 153 154 croak "Error: ${ClassName}->SetSize: Can't change size: It's not allowed..."; 155 } 156 157 # Set E-state atom types set to use... 158 # 159 sub SetEStateAtomTypesSetToUse { 160 my($This, $Value) = @_; 161 162 if ($This->{EStateAtomTypesSetToUse}) { 163 croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Can't change size: It's already set..."; 164 } 165 166 if ($Value !~ /^(ArbitrarySize|FixedSize)/i) { 167 croak "Error: ${ClassName}->SetEStateAtomTypesSetToUse: Unknown EStateAtomTypesSetToUse value: $Value; Supported values: ArbitrarySize or FixedSize"; 168 } 169 170 $This->{EStateAtomTypesSetToUse} = $Value; 171 172 return $This; 173 } 174 175 # Set vector values precision for real values for E-state indicies... 176 # 177 sub SetValuesPrecision { 178 my($This, $Value) = @_; 179 180 if (!TextUtil::IsPositiveInteger($Value)) { 181 croak "Error: ${ClassName}->SetValuesPrecision: ValuesPrecision value, $Value, is not valid: It must be a positive integer..."; 182 } 183 $This->{ValuesPrecision} = $Value; 184 185 return $This; 186 } 187 188 # Generate fingerprints description... 189 # 190 sub GetDescription { 191 my($This) = @_; 192 193 # Is description explicity set? 194 if (exists $This->{Description}) { 195 return $This->{Description}; 196 } 197 198 # Generate fingerprints description... 199 200 return "$This->{Type}:$This->{EStateAtomTypesSetToUse}"; 201 } 202 203 # Generate electrotopological state indicies (E-state) [ Ref 75-78 ] fingerprints for 204 # non-hydrogen atoms in a molecule... 205 # 206 # EStateIndicies fingerprints constitute a vector containing sum of E-state values 207 # for E-state atom types. Two types of E-state atom types set size are allowed: 208 # 209 # ArbitrarySize - Corrresponds to only E-state atom types detected in molecule 210 # FixedSize - Corresponds to fixed number of E-state atom types previously defined 211 # 212 # Module AtomTypes::EStateAtomTypes.pm is used to assign E-state atom types to 213 # non-hydrogen atoms in the molecule which is able to assign atom types to any valid 214 # atom group. However, for FixedSize value of EStateAtomTypesSetToUse, only a fixed 215 # set of E-state atom types corresponding to specific atom groups [ Appendix III in 216 # Ref 77 ] are used for fingerprints. 217 # 218 # The fixed size E-state atom type set size used during generation of fingerprints corresponding 219 # FixedSize value of EStateAtomTypesSetToUse contains 87 E-state non-hydrogen atom types 220 # in EStateAtomTypes.csv data file distributed with MayaChemTools. 221 # 222 # Combination of Type and EStateAtomTypesSetToUse allow generation of 2 different types of 223 # E-state indicies fingerprints: 224 # 225 # Type EStateAtomTypesSetToUse 226 # 227 # EStateIndicies ArbitrarySize [ default fingerprints ] 228 # EStateIndicies FixedSize 229 # 230 # The default is generate EStateIndicies type fingeprints corresponding to ArbitrarySize as 231 # EStateAtomTypesSetToUse value. 232 # 233 # 234 sub GenerateFingerprints { 235 my($This) = @_; 236 237 # Cache appropriate molecule data... 238 $This->_SetupMoleculeDataCache(); 239 240 # Assign E-state atom types... 241 if (!$This->_AssignEStateAtomTypes()) { 242 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't assign valid E-state atom types to all atoms..."; 243 return $This; 244 } 245 246 # Calculate E-state indicies... 247 if (!$This->_CalculateEStateIndicies()) { 248 carp "Warning: ${ClassName}->GenerateFingerprints: $This->{Type} fingerprints generation didn't succeed: Couldn't calculate E-state values for all atoms..."; 249 return $This; 250 } 251 252 # Set final fingerprints... 253 $This->_SetFinalFingerprints(); 254 255 # Clear cached molecule data... 256 $This->_ClearMoleculeDataCache(); 257 258 return $This; 259 } 260 261 # Assign E-state atom types... 262 # 263 sub _AssignEStateAtomTypes { 264 my($This) = @_; 265 my($EStateAtomTypes, $Atom, $AtomID, $AtomType); 266 267 %{$This->{EStateAtomTypes}} = (); 268 269 # Assign E-state atom types... 270 $EStateAtomTypes = new AtomTypes::EStateAtomTypes('Molecule' => $This->{Molecule}, 'IgnoreHydrogens' => 1); 271 $EStateAtomTypes->AssignAtomTypes(); 272 273 # Make sure atom types assignment is successful... 274 if (!$EStateAtomTypes->IsAtomTypesAssignmentSuccessful()) { 275 return undef; 276 } 277 278 # Collect assigned atom types... 279 for $Atom (@{$This->{Atoms}}) { 280 $AtomID = $Atom->GetID(); 281 282 $AtomType = $EStateAtomTypes->GetAtomType($Atom); 283 $This->{EStateAtomTypes}{$AtomID} = $AtomType; 284 } 285 return $This; 286 } 287 288 # Calculate E-state indicies by summing up E-state values for specific 289 # E-state atom types... 290 # 291 sub _CalculateEStateIndicies { 292 my($This) = @_; 293 my($Atom, $AtomID, $AtomType, $EStateValue); 294 295 # Calculate E-state values to generate E-state indicies... 296 if (!$This->_CalculateEStateValuesDescriptors()) { 297 return undef; 298 } 299 300 # Calculate E-state indicies... 301 for $Atom (@{$This->{Atoms}}) { 302 $AtomID = $Atom->GetID(); 303 304 $AtomType = $This->{EStateAtomTypes}{$AtomID}; 305 $EStateValue = $This->{EStateValues}{$AtomID}; 306 307 if (!exists $This->{EStateIndicies}{$AtomType}) { 308 $This->{EStateIndicies}{$AtomType} = 0; 309 } 310 311 $This->{EStateIndicies}{$AtomType} += $EStateValue; 312 } 313 return $This; 314 } 315 316 # Calculate E-state values for E-state indicies... 317 # 318 sub _CalculateEStateValuesDescriptors { 319 my($This) = @_; 320 my($EStateValuesDescriptors, $Atom, $AtomID, $EStateValue); 321 322 %{$This->{EStateValues}} = (); 323 324 # Calculate and assign E-state values... 325 $EStateValuesDescriptors = new AtomicDescriptors::EStateValuesDescriptors('Molecule' => $This->{Molecule}); 326 $EStateValuesDescriptors->GenerateDescriptors(); 327 328 # Make sure E-state values calculation is successful... 329 if (!$EStateValuesDescriptors->IsDescriptorsGenerationSuccessful()) { 330 return undef; 331 } 332 333 # Collect assigned E-state values... 334 for $Atom (@{$This->{Atoms}}) { 335 $AtomID = $Atom->GetID(); 336 $EStateValue = $EStateValuesDescriptors->GetDescriptorValue($Atom); 337 $This->{EStateValues}{$AtomID} = $EStateValue; 338 } 339 return $This; 340 } 341 342 # Set final final fingerpritns for E-state indicies... 343 # 344 sub _SetFinalFingerprints { 345 my($This) = @_; 346 my($AtomType, $ValuesPrecision, $EStateAtomTypesDataRef, @Values, @IDs); 347 348 # Mark successful generation of fingerprints... 349 $This->{FingerprintsGenerated} = 1; 350 351 @Values = (); 352 @IDs = (); 353 354 $ValuesPrecision = $This->{ValuesPrecision}; 355 356 if ($This->{EStateAtomTypesSetToUse} =~ /^FixedSize$/i) { 357 # Use fixed size E-state atom types set for non-hydrogen atoms... 358 for $AtomType (@{AtomTypes::EStateAtomTypes::GetAllPossibleEStateNonHydrogenAtomTypes()}) { 359 push @IDs, "S${AtomType}"; 360 push @Values, exists($This->{EStateIndicies}{$AtomType}) ? MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision) : 0; 361 } 362 } 363 else { 364 for $AtomType (sort keys %{$This->{EStateIndicies}}) { 365 push @IDs, "S${AtomType}"; 366 push @Values, MathUtil::round($This->{EStateIndicies}{$AtomType}, $ValuesPrecision); 367 } 368 } 369 370 # Add IDs and values to fingerprint vector... 371 if (@IDs) { 372 $This->{FingerprintsVector}->AddValueIDs(\@IDs); 373 } 374 $This->{FingerprintsVector}->AddValues(\@Values); 375 376 return $This; 377 } 378 379 # Cache appropriate molecule data... 380 # 381 sub _SetupMoleculeDataCache { 382 my($This) = @_; 383 384 # Get all non-hydrogen atoms... 385 my($NegateAtomCheckMethod); 386 $NegateAtomCheckMethod = 1; 387 @{$This->{Atoms}} = $This->GetMolecule()->GetAtoms("IsHydrogen", $NegateAtomCheckMethod); 388 389 return $This; 390 } 391 392 # Clear cached molecule data... 393 # 394 sub _ClearMoleculeDataCache { 395 my($This) = @_; 396 397 @{$This->{Atoms}} = (); 398 399 return $This; 400 } 401 402 # Return a string containg data for EStateIndiciesFingerprints object... 403 sub StringifyEStateIndiciesFingerprints { 404 my($This) = @_; 405 my($EStateIndiciesFingerprintsString); 406 407 # Type of Keys... 408 $EStateIndiciesFingerprintsString = "Type: $This->{Type}; EStateAtomTypesSetToUse: $This->{EStateAtomTypesSetToUse}"; 409 410 # Fingerprint vector... 411 $EStateIndiciesFingerprintsString .= "; FingerprintsVector: < $This->{FingerprintsVector} >"; 412 413 return $EStateIndiciesFingerprintsString; 414 } 415