1 package AtomTypes::AtomTypes; 2 # 3 # File: AtomTypes.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Scalar::Util (); 30 use Text::ParseWords; 31 use ObjectProperty; 32 use TextUtil (); 33 34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 35 36 @ISA = qw(ObjectProperty Exporter); 37 @EXPORT = qw(LoadAtomTypesData); 38 @EXPORT_OK = qw(); 39 40 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 41 42 # Setup class variables... 43 my($ClassName); 44 _InitializeClass(); 45 46 # Class constructor... 47 sub new { 48 my($Class, %NamesAndValues) = @_; 49 50 # Initialize object... 51 my $This = {}; 52 bless $This, ref($Class) || $Class; 53 $This->_InitializeAtomTypes(); 54 55 $This->_InitializeAtomTypesProperties(%NamesAndValues); 56 57 return $This; 58 } 59 60 # Initialize object data... 61 # 62 sub _InitializeAtomTypes { 63 my($This) = @_; 64 65 # Molecule object... 66 $This->{Molecule} = ''; 67 68 # Type of AtomType... 69 $This->{Type} = ''; 70 71 # By default, atom types are also assigned to hydrogens... 72 $This->{IgnoreHydrogens} = 0; 73 74 } 75 76 # Initialize class ... 77 sub _InitializeClass { 78 #Class name... 79 $ClassName = __PACKAGE__; 80 } 81 82 83 # Initialize object properties.... 84 sub _InitializeAtomTypesProperties { 85 my($This, %NamesAndValues) = @_; 86 87 my($Name, $Value, $MethodName); 88 while (($Name, $Value) = each %NamesAndValues) { 89 $MethodName = "Set${Name}"; 90 $This->$MethodName($Value); 91 } 92 93 return $This; 94 } 95 96 # Set molecule object and make sure it's not already set... 97 # 98 sub SetMolecule { 99 my($This, $Molecule) = @_; 100 101 if ($This->{Molecule}) { 102 croak "Error: ${ClassName}->SetMolecule: Can't change molecule object: It's already set..."; 103 } 104 $This->{Molecule} = $Molecule; 105 106 # Weaken the reference to disable increment of reference count... 107 Scalar::Util::weaken($This->{Molecule}); 108 109 return $This; 110 } 111 112 # Set type and make sure it's not already set... 113 # 114 sub SetType { 115 my($This, $Type) = @_; 116 117 if ($This->{Type}) { 118 croak "Error: ${ClassName}->SetType: Can't change AtomType type: It's already set..."; 119 } 120 $This->{Type} = $Type; 121 122 return $This; 123 } 124 125 # Set specific atom type... 126 # 127 sub SetAtomType { 128 my($This, $Atom, $AtomType) = @_; 129 my($MethodName); 130 131 # Assign AtomType to Atom... 132 $MethodName = "Set" . $This->{Type} . "AtomType"; 133 $Atom->$MethodName($AtomType); 134 135 return $This; 136 } 137 138 # Get specific atom type... 139 # 140 sub GetAtomType { 141 my($This, $Atom) = @_; 142 my($MethodName, $AtomType); 143 144 $MethodName = "Get" . $This->{Type} . "AtomType"; 145 $AtomType = $Atom->$MethodName(); 146 147 return defined $AtomType ? $AtomType : 'None'; 148 } 149 150 # Get atom types for all atoms as a hash with atom ID and atom types as 151 # key/value pairs. 152 # 153 # Notes: 154 # . Irrespective of ignore hydrogens value, atom type for hydrogens are also 155 # returned. Based on value of ignore hydrogens, atom type assignment methodology 156 # might igonore hydrogens and value of None is returned for the hydrogens. 157 # 158 sub GetAtomTypes { 159 my($This) = @_; 160 my($Atom, $AtomID, %AtomTypesMap); 161 162 %AtomTypesMap = (); 163 164 if (!$This->{Molecule}) { 165 return %AtomTypesMap; 166 } 167 168 for $Atom ($This->{Molecule}->GetAtoms()) { 169 $AtomID = $Atom->GetID(); 170 $AtomTypesMap{$AtomID} = $This->GetAtomType($Atom); 171 } 172 173 return %AtomTypesMap; 174 } 175 176 # Are all atoms types successfully assigned? 177 # 178 # Notes: 179 # . Dynamic checking of atom types assignment for atoms eliminates the need 180 # to check and synchronize valid atom types during SetAtomType. 181 # 182 sub IsAtomTypesAssignmentSuccessful { 183 my($This) = @_; 184 my($Atom, $AtomType); 185 186 ATOM: for $Atom ($This->{Molecule}->GetAtoms()) { 187 if ($Atom->IsHydrogen() && $This->{IgnoreHydrogens}) { 188 next ATOM; 189 } 190 $AtomType = $This->GetAtomType($Atom); 191 if ($AtomType =~ /^None$/i) { 192 return 0; 193 } 194 } 195 196 return 1; 197 } 198 199 # Load atom types data from the specified CSV atom type file into the specified 200 # hash reference. 201 # 202 # The lines starting with # are treated as comments and ignored. First line 203 # not starting with # must contain column labels and the number of columns in 204 # all other data rows must match the number of column labels. 205 # 206 # The first column is assumed to contain atom types; all other columns contain data 207 # as indicated in their column labels. 208 # 209 # In order to avoid dependence of data access on the specified column labels, the 210 # column data is loaded into hash with Column<Num> and AtomType as hash keys; 211 # however, the data for the first column which is treated as AtomTypes is also loaded 212 # into an array with AtomTypes as hash key. The format of the data structure loaded 213 # into a specified hash reference is: 214 # 215 # @{$AtomTypesDataMapRef->{AtomTypes}} - Array of all possible atom types for all atoms 216 # @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} - Array of all possible atom types for non-hydrogen atoms 217 # @{$AtomTypesDataMapRef->{ColLabels}} - Array of column labels 218 # %{$AtomTypesDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, AtomType> 219 # 220 # Caveats: 221 # . The column number start from 1. 222 # . Column data for first column is not loaded into <Column<Num>, AtomType> hash keys pairs. 223 # 224 # AtomType file examples: SYBYLAtomTypes.csv, DREIDINGAtomTypes.csv, 225 # MMFF94AtomTypes.csv etc. 226 # 227 # This functionality can be either invoked as a class function or an 228 # object method. 229 # 230 sub LoadAtomTypesData { 231 my($FirstParameter, @OtherParamaters) = @_; 232 my($AtomTypesDataFile, $AtomTypesDataMapRef, $InDelim, $Line, $NumOfCols, $ColIndex, $ColNum, $ColLabel, $ColValue, $AtomType, %AtomTypes, @LineWords, @ColLabels, @ColDataLabels); 233 234 if (Scalar::Util::blessed($FirstParameter)) { 235 ($AtomTypesDataFile, $AtomTypesDataMapRef) = @OtherParamaters; 236 } 237 else { 238 ($AtomTypesDataFile, $AtomTypesDataMapRef) = @_; 239 } 240 241 $InDelim = "\,"; 242 open ATOMTYPESDATAFILE, "$AtomTypesDataFile" or croak "Couldn't open $AtomTypesDataFile: $! ..."; 243 244 # Skip lines up to column labels... 245 LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) { 246 if ($Line !~ /^#/) { 247 last LINE; 248 } 249 } 250 251 # Initialize data map... 252 %{$AtomTypesDataMapRef} = (); 253 @{$AtomTypesDataMapRef->{AtomTypes}} = (); 254 @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} = (); 255 @{$AtomTypesDataMapRef->{ColLabels}} = (); 256 257 %AtomTypes = (); 258 259 # Process column labels... 260 @ColLabels= quotewords($InDelim, 0, $Line); 261 $NumOfCols = @ColLabels; 262 push @{$AtomTypesDataMapRef->{ColLabels}}, @ColLabels; 263 264 # Set up column data labels for storing the data... 265 @ColDataLabels = (); 266 for $ColNum (1 .. $NumOfCols) { 267 $ColLabel = "DataCol${ColNum}"; 268 push @ColDataLabels, $ColLabel; 269 } 270 271 # Initialize column data hash... 272 for $ColIndex (1 .. $#ColDataLabels) { 273 $ColLabel = $ColDataLabels[$ColIndex]; 274 %{$AtomTypesDataMapRef->{$ColLabel}} = (); 275 } 276 277 # Process atom types data assuming first column to be atom type.. 278 LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) { 279 if ($Line =~ /^#/) { 280 next LINE; 281 } 282 @LineWords = quotewords($InDelim, 0, $Line); 283 if (@LineWords != $NumOfCols) { 284 croak "Error: The number of data fields, @LineWords, in $AtomTypesDataFile must be $NumOfCols.\nLine: $Line..."; 285 } 286 $AtomType = $LineWords[0]; 287 if (exists $AtomTypes{$AtomType}) { 288 carp "Warning: Ignoring data for atom type, $AtomType, in file $AtomTypesDataFile: It has already been loaded.\nLine: $Line...."; 289 next LINE; 290 } 291 292 $AtomTypes{$AtomType} = $AtomType; 293 push @{$AtomTypesDataMapRef->{AtomTypes}}, $AtomType; 294 295 # Is it a non-hydrogen atom type? 296 if ($AtomType !~ /^H/i || $AtomType =~ /^(HAL|HET|HEV)$/i || $AtomType =~ /^(He4|Ho6|Hf3|Hg1)/) { 297 # Non-hydrogen SYBYL atom types starting with H: HAL, HET, HEV 298 # Non-hydrogen UFF atom types starting with H: He4+4, Ho6+3, Hf3+4, Hg1+2 299 # 300 push @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}}, $AtomType; 301 } 302 303 # Track column data values... 304 for $ColIndex (1 .. $#LineWords) { 305 $ColLabel = $ColDataLabels[$ColIndex]; $ColValue = $LineWords[$ColIndex]; 306 $AtomTypesDataMapRef->{$ColLabel}{$AtomType} = $ColValue; 307 } 308 } 309 close ATOMTYPESDATAFILE; 310 } 311