MayaChemTools

   1 package AtomTypes::AtomTypes;
   2 #
   3 # File: AtomTypes.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use Text::ParseWords;
  31 use ObjectProperty;
  32 use TextUtil ();
  33 
  34 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  35 
  36 @ISA = qw(ObjectProperty Exporter);
  37 @EXPORT = qw(LoadAtomTypesData);
  38 @EXPORT_OK = qw();
  39 
  40 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  41 
  42 # Setup class variables...
  43 my($ClassName);
  44 _InitializeClass();
  45 
  46 # Class constructor...
  47 sub new {
  48   my($Class, %NamesAndValues) = @_;
  49 
  50   # Initialize object...
  51   my $This = {};
  52   bless $This, ref($Class) || $Class;
  53   $This->_InitializeAtomTypes();
  54 
  55   $This->_InitializeAtomTypesProperties(%NamesAndValues);
  56 
  57   return $This;
  58 }
  59 
  60 # Initialize object data...
  61 #
  62 sub _InitializeAtomTypes {
  63   my($This) = @_;
  64 
  65   # Molecule object...
  66   $This->{Molecule} = '';
  67 
  68   # Type of AtomType...
  69   $This->{Type} = '';
  70 
  71   # By default, atom types are also assigned to hydrogens...
  72   $This->{IgnoreHydrogens} = 0;
  73 
  74 }
  75 
  76 # Initialize class ...
  77 sub _InitializeClass {
  78   #Class name...
  79   $ClassName = __PACKAGE__;
  80 }
  81 
  82 
  83 # Initialize object properties....
  84 sub _InitializeAtomTypesProperties {
  85   my($This, %NamesAndValues) = @_;
  86 
  87   my($Name, $Value, $MethodName);
  88   while (($Name, $Value) = each  %NamesAndValues) {
  89     $MethodName = "Set${Name}";
  90     $This->$MethodName($Value);
  91   }
  92 
  93   return $This;
  94 }
  95 
  96 # Set molecule object and make sure it's not already set...
  97 #
  98 sub SetMolecule {
  99   my($This, $Molecule) = @_;
 100 
 101   if ($This->{Molecule}) {
 102     croak "Error: ${ClassName}->SetMolecule: Can't change molecule object:  It's already set...";
 103   }
 104   $This->{Molecule} = $Molecule;
 105 
 106   # Weaken the reference to disable increment of reference count...
 107   Scalar::Util::weaken($This->{Molecule});
 108 
 109   return $This;
 110 }
 111 
 112 # Set type and make sure it's not already set...
 113 #
 114 sub SetType {
 115   my($This, $Type) = @_;
 116 
 117   if ($This->{Type}) {
 118     croak "Error: ${ClassName}->SetType: Can't change AtomType type:  It's already set...";
 119   }
 120   $This->{Type} = $Type;
 121 
 122   return $This;
 123 }
 124 
 125 # Set specific atom type...
 126 #
 127 sub SetAtomType {
 128   my($This, $Atom, $AtomType) = @_;
 129   my($MethodName);
 130 
 131   # Assign AtomType to Atom...
 132   $MethodName = "Set" . $This->{Type} . "AtomType";
 133   $Atom->$MethodName($AtomType);
 134 
 135   return $This;
 136 }
 137 
 138 # Get specific atom type...
 139 #
 140 sub GetAtomType {
 141   my($This, $Atom) = @_;
 142   my($MethodName, $AtomType);
 143 
 144   $MethodName = "Get" . $This->{Type} . "AtomType";
 145   $AtomType = $Atom->$MethodName();
 146 
 147   return defined $AtomType ? $AtomType : 'None';
 148 }
 149 
 150 # Get atom types for all atoms as a hash with atom ID and atom types as
 151 # key/value pairs.
 152 #
 153 # Notes:
 154 #   . Irrespective of ignore hydrogens value, atom type for hydrogens are also
 155 #     returned. Based on value of ignore hydrogens, atom type assignment methodology
 156 #     might igonore hydrogens and value of None is returned for the hydrogens.
 157 #
 158 sub GetAtomTypes {
 159   my($This) = @_;
 160   my($Atom, $AtomID,  %AtomTypesMap);
 161 
 162   %AtomTypesMap = ();
 163 
 164   if (!$This->{Molecule}) {
 165     return %AtomTypesMap;
 166   }
 167 
 168   for $Atom ($This->{Molecule}->GetAtoms()) {
 169     $AtomID = $Atom->GetID();
 170     $AtomTypesMap{$AtomID} = $This->GetAtomType($Atom);
 171   }
 172 
 173   return %AtomTypesMap;
 174 }
 175 
 176 # Are all atoms types successfully assigned?
 177 #
 178 # Notes:
 179 #   . Dynamic checking of atom types assignment for atoms eliminates the need
 180 #     to check and synchronize valid atom types during SetAtomType.
 181 #
 182 sub IsAtomTypesAssignmentSuccessful {
 183   my($This) = @_;
 184   my($Atom, $AtomType);
 185 
 186   ATOM: for $Atom ($This->{Molecule}->GetAtoms()) {
 187     if ($Atom->IsHydrogen() && $This->{IgnoreHydrogens}) {
 188       next ATOM;
 189     }
 190     $AtomType = $This->GetAtomType($Atom);
 191     if ($AtomType =~ /^None$/i) {
 192       return 0;
 193     }
 194   }
 195 
 196   return 1;
 197 }
 198 
 199 # Load atom types data from the specified CSV atom type file into the specified
 200 # hash reference.
 201 #
 202 # The lines starting with # are treated as comments and ignored. First line
 203 # not starting with # must contain column labels and the number of columns in
 204 # all other data rows must match the number of column labels.
 205 #
 206 # The first column is assumed to contain atom types; all other columns contain data
 207 # as indicated in their column labels.
 208 #
 209 # In order to avoid dependence of data access on the specified column labels, the
 210 # column data is loaded into hash with Column<Num> and AtomType as hash keys;
 211 # however, the data for the first column which is treated as AtomTypes is also loaded
 212 # into an array with AtomTypes as hash key. The format of the data structure loaded
 213 # into a specified hash reference is:
 214 #
 215 # @{$AtomTypesDataMapRef->{AtomTypes}} - Array of all possible atom types for all atoms
 216 # @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} - Array of all possible atom types for non-hydrogen atoms
 217 # @{$AtomTypesDataMapRef->{ColLabels}} - Array of column labels
 218 # %{$AtomTypesDataMapRef->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, AtomType>
 219 #
 220 # Caveats:
 221 #   . The column number start from 1.
 222 #   . Column data for first column is not loaded into <Column<Num>, AtomType> hash keys pairs.
 223 #
 224 # AtomType file examples: SYBYLAtomTypes.csv, DREIDINGAtomTypes.csv,
 225 # MMFF94AtomTypes.csv etc.
 226 #
 227 # This functionality can be either invoked as a class function or an
 228 # object method.
 229 #
 230 sub LoadAtomTypesData {
 231   my($FirstParameter, @OtherParamaters) = @_;
 232   my($AtomTypesDataFile, $AtomTypesDataMapRef, $InDelim, $Line, $NumOfCols, $ColIndex, $ColNum, $ColLabel, $ColValue, $AtomType, %AtomTypes, @LineWords, @ColLabels, @ColDataLabels);
 233 
 234   if (Scalar::Util::blessed($FirstParameter)) {
 235     ($AtomTypesDataFile, $AtomTypesDataMapRef) = @OtherParamaters;
 236   }
 237   else {
 238     ($AtomTypesDataFile, $AtomTypesDataMapRef) = @_;
 239   }
 240 
 241   $InDelim = "\,";
 242   open ATOMTYPESDATAFILE, "$AtomTypesDataFile" or croak "Couldn't open $AtomTypesDataFile: $! ...";
 243 
 244   # Skip lines up to column labels...
 245   LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) {
 246     if ($Line !~ /^#/) {
 247       last LINE;
 248     }
 249   }
 250 
 251   # Initialize data map...
 252   %{$AtomTypesDataMapRef} = ();
 253   @{$AtomTypesDataMapRef->{AtomTypes}} = ();
 254   @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}} = ();
 255   @{$AtomTypesDataMapRef->{ColLabels}} = ();
 256 
 257   %AtomTypes = ();
 258 
 259   # Process column labels...
 260   @ColLabels= quotewords($InDelim, 0, $Line);
 261   $NumOfCols = @ColLabels;
 262   push @{$AtomTypesDataMapRef->{ColLabels}}, @ColLabels;
 263 
 264   # Set up column data labels for storing the data...
 265   @ColDataLabels = ();
 266   for $ColNum (1 .. $NumOfCols) {
 267     $ColLabel = "DataCol${ColNum}";
 268     push @ColDataLabels, $ColLabel;
 269   }
 270 
 271   # Initialize column data hash...
 272   for $ColIndex (1 .. $#ColDataLabels) {
 273     $ColLabel = $ColDataLabels[$ColIndex];
 274     %{$AtomTypesDataMapRef->{$ColLabel}} = ();
 275   }
 276 
 277   # Process atom types data assuming first column to be atom type..
 278   LINE: while ($Line = TextUtil::GetTextLine(\*ATOMTYPESDATAFILE)) {
 279     if ($Line =~ /^#/) {
 280       next LINE;
 281     }
 282     @LineWords = quotewords($InDelim, 0, $Line);
 283     if (@LineWords != $NumOfCols) {
 284       croak "Error: The number of data fields, @LineWords, in $AtomTypesDataFile must be $NumOfCols.\nLine: $Line...";
 285     }
 286     $AtomType = $LineWords[0];
 287     if (exists $AtomTypes{$AtomType}) {
 288       carp "Warning: Ignoring data for atom type, $AtomType, in file $AtomTypesDataFile: It has already been loaded.\nLine: $Line....";
 289       next LINE;
 290     }
 291 
 292     $AtomTypes{$AtomType} = $AtomType;
 293     push @{$AtomTypesDataMapRef->{AtomTypes}}, $AtomType;
 294 
 295     # Is it a non-hydrogen atom type?
 296     if ($AtomType !~ /^H/i || $AtomType =~ /^(HAL|HET|HEV)$/i || $AtomType =~ /^(He4|Ho6|Hf3|Hg1)/) {
 297       # Non-hydrogen SYBYL atom types starting with H: HAL, HET, HEV
 298       # Non-hydrogen UFF atom types starting with H: He4+4, Ho6+3, Hf3+4, Hg1+2
 299       #
 300       push @{$AtomTypesDataMapRef->{NonHydrogenAtomTypes}}, $AtomType;
 301     }
 302 
 303     # Track column data values...
 304     for $ColIndex (1 .. $#LineWords) {
 305       $ColLabel = $ColDataLabels[$ColIndex]; $ColValue = $LineWords[$ColIndex];
 306       $AtomTypesDataMapRef->{$ColLabel}{$AtomType} = $ColValue;
 307     }
 308   }
 309   close ATOMTYPESDATAFILE;
 310 }
 311