MayaChemTools

   1 package AtomTypes::AtomicInvariantsAtomTypes;
   2 #
   3 # File: AtomicInvariantsAtomTypes.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use AtomTypes::AtomTypes;
  31 use Molecule;
  32 
  33 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  34 
  35 @ISA = qw(AtomTypes::AtomTypes Exporter);
  36 @EXPORT = qw();
  37 @EXPORT_OK = qw(IsAtomicInvariantAvailable GetAvailableAtomicInvariants);
  38 
  39 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  40 
  41 # Setup class variables...
  42 my($ClassName, @AtomicInvariantsOrder, %AvailableAtomicInvariants, %AvailableAtomicInvariantsByDescription);
  43 _InitializeClass();
  44 
  45 # Overload Perl functions...
  46 use overload '""' => 'StringifyAtomicInvariantsAtomTypes';
  47 
  48 # Class constructor...
  49 sub new {
  50   my($Class, %NamesAndValues) = @_;
  51 
  52   # Initialize object...
  53   my $This = $Class->SUPER::new();
  54   bless $This, ref($Class) || $Class;
  55   $This->_InitializeAtomicInvariantsAtomTypes();
  56 
  57   $This->_InitializeAtomicInvariantsAtomTypesProperties(%NamesAndValues);
  58 
  59   return $This;
  60 }
  61 
  62 # Initialize class ...
  63 sub _InitializeClass {
  64   #Class name...
  65   $ClassName = __PACKAGE__;
  66 
  67   # Initialize class atomic invariants...
  68   _InitializeClassAtomicInvariants();
  69 }
  70 
  71 # Initialize class level atomic invariants information which doesn't change during
  72 # instantiations of objects...
  73 #
  74 sub _InitializeClassAtomicInvariants {
  75   # Available atomic invariants for generating atom types...
  76   #
  77   %AvailableAtomicInvariants = ();
  78   %AvailableAtomicInvariants = ('AS' => 'AtomSymbol|ElementSymbol',
  79                                 'X' => 'NumOfNonHydrogenAtomNeighbors|NumOfHeavyAtomNeighbors',
  80                                 'BO' => 'SumOfBondOrdersToNonHydrogenAtoms|SumOfBondOrdersToHeavyAtoms',
  81                                 'LBO' => 'LargestBondOrderToNonHydrogenAtoms|LargestBondOrderToHeavyAtoms',
  82                                 'SB' => 'NumOfSingleBondsToNonHydrogenAtoms|NumOfSingleBondsToHeavyAtoms',
  83                                 'DB' => 'NumOfDoubleBondsToNonHydrogenAtoms|NumOfDoubleBondsToHeavyAtoms',
  84                                 'TB' => 'NumOfTripleBondsToNonHydrogenAtoms|NumOfTripleBondsToHeavyAtoms',
  85                                 'H' => 'NumOfImplicitAndExplicitHydrogens',
  86                                 'Ar' => 'Aromatic',
  87                                 'RA' => 'RingAtom',
  88                                 'FC' => 'FormalCharge',
  89                                 'MN' => 'MassNumber',
  90                                 'SM' => 'SpinMultiplicity');
  91 
  92   # Setup available atomic invariants description to abbreviation map...
  93   #
  94   my($Key, $Value, $Description, @Descriptions);
  95   %AvailableAtomicInvariantsByDescription = ();
  96   while (($Key, $Value) = each %AvailableAtomicInvariants) {
  97     @Descriptions = ($Value =~ /|/) ? (split /\|/, $Value) : ($Value);
  98     for $Description (@Descriptions) {
  99       $AvailableAtomicInvariantsByDescription{$Description} = $Key;
 100     }
 101   }
 102 
 103   # Atomic invariants order used for generating atom types...
 104   #
 105   @AtomicInvariantsOrder = ();
 106   @AtomicInvariantsOrder = ('AS', 'X', 'BO', 'LBO', 'SB', 'DB', 'TB', 'H', 'Ar', 'RA', 'FC', 'MN', 'SM');
 107 }
 108 
 109 # Initialize object data...
 110 #
 111 sub _InitializeAtomicInvariantsAtomTypes {
 112   my($This) = @_;
 113 
 114   # Type of AtomTypes...
 115   $This->{Type} = 'AtomicInvariants';
 116 
 117   # By default hydrogens are also assigned atom types...
 118   $This->{IgnoreHydrogens} = 0;
 119 
 120   # Initialize atom types information...
 121   $This->_InitializeAtomTypesInformation();
 122 
 123   return $This;
 124 }
 125 
 126 # Inialize atomic invariants information used for generating atom types...
 127 #
 128 sub _InitializeAtomTypesInformation {
 129   my($This) = @_;
 130 
 131   # Default atomic invariants to use for generating atom types: AS, X, BO, H, FC
 132   #
 133   %{$This->{AtomicInvariantsToUse}} = ();
 134   %{$This->{AtomicInvariantsToUse}} = ('AS' => 1, 'X' => 1, 'BO' => 1, 'LBO' => 0,
 135                                        'SB' => 0, 'DB' => 0, 'TB' => 0,
 136                                        'H' => 1, 'Ar' => 0, 'RA' => 0, 'FC' => 1, 'MN' => 0, 'SM' => 0);
 137 
 138   return $This;
 139 }
 140 
 141 # Initialize object properties...
 142 #
 143 sub _InitializeAtomicInvariantsAtomTypesProperties {
 144   my($This, %NamesAndValues) = @_;
 145 
 146   my($Name, $Value, $MethodName);
 147   while (($Name, $Value) = each  %NamesAndValues) {
 148     $MethodName = "Set${Name}";
 149     $This->$MethodName($Value);
 150   }
 151 
 152   # Make sure molecule object was specified...
 153   if (!exists $NamesAndValues{Molecule}) {
 154     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 155   }
 156 
 157   return $This;
 158 }
 159 
 160 # Disable change of AvailableAtomicInvariants...
 161 #
 162 sub SetAvailableAtomicInvariants {
 163   my($This) = @_;
 164 
 165   carp "Warning: ${ClassName}->SetAtomicInvariantsOrder: Available atomic invariants can't be changed...";
 166 
 167   return $This;
 168 }
 169 
 170 # Disable change of atomic invariants order used for generation of atom types...
 171 #
 172 sub SetAtomicInvariantsOrder {
 173   my($This) = @_;
 174 
 175   carp "Warning: ${ClassName}->SetAtomicInvariantsOrder: Atomic invariants order can't be changed...";
 176 
 177   return $This;
 178 }
 179 
 180 # Set atom invariants to use for atom types...
 181 #
 182 sub SetAtomicInvariantsToUse {
 183   my($This, @Values) = @_;
 184   my($FirstValue, $TypeOfFirstValue, $AtomicInvariant, $SpecifiedAtomicInvariant, $AtomicInvariantValue, @SpecifiedAtomicInvariants, %AtomicInvariantsToUse);
 185 
 186   if (!@Values) {
 187     carp "Warning: ${ClassName}->SetAtomicInvariantsToUse: No values specified...";
 188     return;
 189   }
 190 
 191   $FirstValue = $Values[0];
 192   $TypeOfFirstValue = ref $FirstValue;
 193   @SpecifiedAtomicInvariants = ();
 194 
 195   if ($TypeOfFirstValue =~ /^ARRAY/) {
 196     push @SpecifiedAtomicInvariants, @{$FirstValue};
 197   }
 198   else {
 199     push @SpecifiedAtomicInvariants, @Values;
 200   }
 201 
 202   # Make sure specified AtomicInvariants are valid...
 203   for $SpecifiedAtomicInvariant (@SpecifiedAtomicInvariants) {
 204     if (exists $AvailableAtomicInvariants{$SpecifiedAtomicInvariant}) {
 205       $AtomicInvariant = $SpecifiedAtomicInvariant;
 206     }
 207     elsif ($AvailableAtomicInvariantsByDescription{$SpecifiedAtomicInvariant}) {
 208       $AtomicInvariant = $AvailableAtomicInvariantsByDescription{$SpecifiedAtomicInvariant};
 209     }
 210     else {
 211       croak "Error: ${ClassName}->SetAtomicInvariantsToUse: Specified atomic invariant, $SpecifiedAtomicInvariant, is not supported...\n ";
 212     }
 213     $AtomicInvariantsToUse{$AtomicInvariant} = 1;
 214   }
 215 
 216   # Make sure AtomSymbol is always used...
 217   if (!(exists($AtomicInvariantsToUse{AS}) && $AtomicInvariantsToUse{AS} == 1)) {
 218     croak "Error: ${ClassName}->SetAtomicInvariantsToUse: AtomicInvariant AtomSymbol must be specified...\n ";
 219   }
 220 
 221   # Set atomic invariants...
 222   for $AtomicInvariant (keys %{$This->{AtomicInvariantsToUse}}) {
 223     $This->{AtomicInvariantsToUse}{$AtomicInvariant} = 0;
 224     if (exists $AtomicInvariantsToUse{$AtomicInvariant}) {
 225       $This->{AtomicInvariantsToUse}{$AtomicInvariant} = 1;
 226     }
 227   }
 228 
 229   return $This;
 230 }
 231 
 232 # Is it an available AtomicInvariant?
 233 #
 234 sub IsAtomicInvariantAvailable {
 235   my($FirstParameter, $SecondParameter) = @_;
 236   my($This, $AtomicInvariant, $Status);
 237 
 238   if ((@_ == 2) && (_IsAtomicInvariantsAtomTypes($FirstParameter))) {
 239     ($This, $AtomicInvariant) = ($FirstParameter, $SecondParameter);
 240   }
 241   else {
 242     $AtomicInvariant = $FirstParameter;
 243   }
 244   $Status = exists($AvailableAtomicInvariants{$AtomicInvariant}) || exists($AvailableAtomicInvariantsByDescription{$AtomicInvariant}) ? 1 : 0;
 245 
 246   return $Status;
 247 }
 248 
 249 # Get a hash containing available atomic invariants and their description
 250 # as key/value pairs.
 251 #
 252 sub GetAvailableAtomicInvariants {
 253   return %AvailableAtomicInvariants;
 254 }
 255 
 256 # Get an array containing order of atomic invariants used to generate atom types...
 257 #
 258 sub GetAtomicInvariantsOrder {
 259   return @AtomicInvariantsOrder;
 260 }
 261 
 262 # Assign atom types to all atoms...
 263 #
 264 # Let:
 265 #   AS = Atom symbol corresponding to element symbol
 266 #
 267 #   X<n>   = Number of non-hydrogen atom neighbors or heavy atoms attached to atom
 268 #   BO<n> = Sum of bond orders to non-hydrogen atom neighbors or heavy atoms attached to atom
 269 #   LBO<n> = Largest bond order of non-hydrogen atom neighbors or heavy atoms attached to atom
 270 #   SB<n> = Number of single bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 271 #   DB<n> = Number of double bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 272 #   TB<n> = Number of triple bonds to non-hydrogen atom neighbors or heavy atoms attached to atom
 273 #   H<n>   = Number of implicit and explicit hydrogens for atom
 274 #   Ar     = Aromatic annotation indicating whether atom is aromatic
 275 #   RA     = Ring atom annotation indicating whether atom is a ring
 276 #   FC<+n/-n> = Formal charge assigned to atom
 277 #   MN<n> = Mass number indicating isotope other than most abundant isotope
 278 #   SM<n> = Spin multiplicity of atom. Possible values: 1 (singlet), 2 (doublet) or 3 (triplet)
 279 #
 280 # Then:
 281 #
 282 #   AtomType specification corresponds to:
 283 #
 284 #     AS.X<n>.BO<n>.LBO<n>.<SB><n>.<DB><n>.<TB><n>.H<n>.Ar.RA.FC<+n/-n>.MN<n>.SM<n>
 285 #
 286 # Except for AS which is a required atomic invariant in atom types, all other atomic invariants are
 287 # optional. Default atomic invariants used for AtomID are: AS, X<n>, BO<n>, H<n>, FC<+n/-n>.
 288 # AtomID specification doesn't include atomic invariants with zero or undefined values.
 289 #
 290 # Notes:
 291 #   . AtomicInvariants with zero or undefined values are not shown.
 292 #   . LBO with value of 1 is not shown. And absence of LBO in AtomTypes implies the largest
 293 #     bond order value is one.
 294 #   . SB, DB and TB with values of zero are not shown.
 295 #   . The difference in BO and X values corresponds to numbed of pi electrons [ Ref 57 ].
 296 #
 297 # Examples of atomic invariant atom types:
 298 #
 299 #   O.X1.BO1.H1 - Hydroxyl oxygen in carboxylate with attached hydrogen and no explicit charge
 300 #   O.X1.BO1.FC-1 - Hydroxyl ozygen in carboxylate with explicit negative charge
 301 #   O.X1.BO2 - Carbonyl oxygen in carboxylate with double bond to carbon
 302 #   O.X2.BO2 - Hydroxyl ozygen in carboxylate attached to carbonyl carbon and another heavy atom
 303 #
 304 #   C.X2.BO3.H1.Ar - Aromatic carbon
 305 #
 306 sub AssignAtomTypes {
 307   my($This) = @_;
 308   my($Atom, $AtomType, $AtomicInvariant, $AtomicInvariantValue, @AtomicInvariants);
 309 
 310   ATOM: for $Atom ($This->GetMolecule()->GetAtoms()) {
 311     if ($This->{IgnoreHydrogens} && $Atom->IsHydrogen()) {
 312       next ATOM;
 313     }
 314     @AtomicInvariants = ();
 315 
 316     # Go over atomic invariants...
 317     ATOMICINVARIANT: for $AtomicInvariant (@AtomicInvariantsOrder) {
 318       if (!$This->{AtomicInvariantsToUse}{$AtomicInvariant}) {
 319         next ATOMICINVARIANT;
 320       }
 321       $AtomicInvariantValue = $Atom->GetAtomicInvariantValue($AtomicInvariant);
 322       if (!(defined($AtomicInvariantValue) && $AtomicInvariantValue)) {
 323         next ATOMICINVARIANT;
 324       }
 325       if ($AtomicInvariant =~ /^AS$/i) {
 326         push @AtomicInvariants, $AtomicInvariantValue;
 327       }
 328       elsif ($AtomicInvariant =~ /^Ar$/i) {
 329         push @AtomicInvariants, "Ar";
 330       }
 331       elsif ($AtomicInvariant =~ /^RA$/i) {
 332         push @AtomicInvariants, "RA";
 333       }
 334       elsif ($AtomicInvariant =~ /^FC$/i) {
 335         push @AtomicInvariants, ($AtomicInvariantValue > 0) ? "FC+${AtomicInvariantValue}" : "FC${AtomicInvariantValue}";
 336       }
 337       elsif ($AtomicInvariant =~ /^LBO$/i) {
 338         if ($AtomicInvariantValue > 1) {
 339           push @AtomicInvariants, "${AtomicInvariant}${AtomicInvariantValue}";
 340         }
 341       }
 342       else {
 343         push @AtomicInvariants, "${AtomicInvariant}${AtomicInvariantValue}";
 344       }
 345     }
 346     # Create and assign atom type to atom...
 347     $AtomType = TextUtil::JoinWords(\@AtomicInvariants, ".", 0);
 348     $This->SetAtomType($Atom, $AtomType);
 349   }
 350   return $This;
 351 }
 352 
 353 # Are all atoms types successfully assigned?
 354 #
 355 # Notes:
 356 #   . Base class method is overridden to always return 1: An appropriate value, atomic invariant
 357 #     atom types delimited by dot, is always assigned to atoms.
 358 #
 359 sub IsAtomTypesAssignmentSuccessful {
 360   my($This) = @_;
 361 
 362   return 1;
 363 }
 364 
 365 # Return a string containg data for AtomicInvariantsAtomTypes object...
 366 #
 367 sub StringifyAtomicInvariantsAtomTypes {
 368   my($This) = @_;
 369   my($AtomTypesString);
 370 
 371   # Type of AtomTypes...
 372   $AtomTypesString = "AtomTypes: $This->{Type}; IgnoreHydrogens: " . ($This->{IgnoreHydrogens} ? "Yes" : "No");
 373 
 374   # AvailableAtomicInvariants and AtomicInvariantsToUse...
 375   my($AtomicInvariant, @AtomicInvariants, @AtomicInvariantsToUse);
 376 
 377   @AtomicInvariantsToUse = ();
 378   @AtomicInvariants = ();
 379   for $AtomicInvariant (@AtomicInvariantsOrder) {
 380     push @AtomicInvariants, "$AtomicInvariant: $AvailableAtomicInvariants{$AtomicInvariant}";
 381     if ($This->{AtomicInvariantsToUse}{$AtomicInvariant}) {
 382       push @AtomicInvariantsToUse, $AtomicInvariant;
 383     }
 384   }
 385   $AtomTypesString .= "; AtomicInvariantsToUse: <" . TextUtil::JoinWords(\@AtomicInvariantsToUse, ", ", 0) . ">";
 386   $AtomTypesString .= "; AtomicInvariantsOrder: <" . TextUtil::JoinWords(\@AtomicInvariantsOrder, ", ", 0) . ">";
 387   $AtomTypesString .= "; AvailableAtomicInvariants: <" . TextUtil::JoinWords(\@AtomicInvariants, ", ", 0) . ">";
 388 
 389   # Setup atom types information...
 390   my($AtomID, $AtomType, @AtomTypesInfo, %AssignedAtomTypes);
 391 
 392   @AtomTypesInfo = ();
 393   %AssignedAtomTypes = $This->GetAtomTypes();
 394 
 395   for $AtomID (sort { $a <=> $b } keys %AssignedAtomTypes) {
 396     $AtomType = $AssignedAtomTypes{$AtomID} ? $AssignedAtomTypes{$AtomID} : 'None';
 397     push @AtomTypesInfo, "$AtomID:$AtomType";
 398   }
 399   $AtomTypesString .= "; AtomIDs:AtomTypes: <" . TextUtil::JoinWords(\@AtomTypesInfo, ", ", 0) . ">";
 400 
 401   return $AtomTypesString;
 402 }
 403 
 404 # Is it a AtomicInvariantsAtomTypes object?
 405 sub _IsAtomicInvariantsAtomTypes {
 406   my($Object) = @_;
 407 
 408   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 409 }
 410