MayaChemTools

   1 package AtomTypes::DREIDINGAtomTypes;
   2 #
   3 # File: DREIDINGAtomTypes.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use AtomTypes::AtomTypes;
  31 use Molecule;
  32 
  33 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  34 
  35 @ISA = qw(AtomTypes::AtomTypes Exporter);
  36 @EXPORT = qw(GetDREIDINGAtomTypesData GetAllPossibleDREIDINGAtomTypes GetAllPossibleDREIDINGNonHydrogenAtomTypes);
  37 @EXPORT_OK = qw();
  38 
  39 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  40 
  41 # Setup class variables...
  42 my($ClassName, %DREIDINGAtomTypesDataMap);
  43 _InitializeClass();
  44 
  45 # Overload Perl functions...
  46 use overload '""' => 'StringifyDREIDINGAtomTypes';
  47 
  48 # Class constructor...
  49 sub new {
  50   my($Class, %NamesAndValues) = @_;
  51 
  52   # Initialize object...
  53   my $This = $Class->SUPER::new();
  54   bless $This, ref($Class) || $Class;
  55   $This->_InitializeDREIDINGAtomTypes();
  56 
  57   $This->_InitializeDREIDINGAtomTypesProperties(%NamesAndValues);
  58 
  59   return $This;
  60 }
  61 
  62 # Initialize class ...
  63 sub _InitializeClass {
  64   #Class name...
  65   $ClassName = __PACKAGE__;
  66 
  67   # Initialize the data hash. It'll be loaded on demand later...
  68   %DREIDINGAtomTypesDataMap = ();
  69 }
  70 
  71 
  72 # Initialize object data...
  73 #
  74 sub _InitializeDREIDINGAtomTypes {
  75   my($This) = @_;
  76 
  77   # Type of AtomTypes...
  78   $This->{Type} = 'DREIDING';
  79 
  80   # By default, DREIDING atom types are also assigned to hydrogens...
  81   $This->{IgnoreHydrogens} = 0;
  82 
  83   return $This;
  84 }
  85 
  86 # Initialize object properties...
  87 #
  88 sub _InitializeDREIDINGAtomTypesProperties {
  89   my($This, %NamesAndValues) = @_;
  90 
  91   my($Name, $Value, $MethodName);
  92   while (($Name, $Value) = each  %NamesAndValues) {
  93     $MethodName = "Set${Name}";
  94     $This->$MethodName($Value);
  95   }
  96 
  97   # Make sure molecule object was specified...
  98   if (!exists $NamesAndValues{Molecule}) {
  99     croak "Error: ${ClassName}->New: Object can't be instantiated without specifying molecule...";
 100   }
 101 
 102   return $This;
 103 }
 104 
 105 # Get DREIDING atom types and associated data loaded from DREIDING data file as
 106 # a reference to hash with the following hash data format:
 107 #
 108 # @{$DREIDINGAtomTypesDataMap{AtomTypes}} - Array of all possible atom types for all atoms
 109 # @{$DREIDINGAtomTypesDataMap{NonHydrogenAtomTypes}} - Array of all possible atom types for non-hydrogen atoms
 110 # @{$DREIDINGAtomTypesDataMap->{ColLabels}} - Array of column labels
 111 # %{$DREIDINGAtomTypesDataMap->{DataCol<Num>}} - Hash keys pair: <DataCol<Num>, AtomType>
 112 #
 113 # This functionality can be either invoked as a class function or an
 114 # object method.
 115 #
 116 sub GetDREIDINGAtomTypesData {
 117 
 118   # Make sure data is loaded...
 119   _CheckAndLoadDREIDINGAtomTypesData();
 120 
 121   return \%DREIDINGAtomTypesDataMap;
 122 }
 123 
 124 # Get all possible DREIDING atom types corresponding to hydrogen and non-hydrogen
 125 # atoms as an array reference...
 126 #
 127 # This functionality can be either invoked as a class function or an
 128 # object method.
 129 #
 130 sub GetAllPossibleDREIDINGAtomTypes {
 131   return _GetAllPossibleDREIDINGAtomTypes();
 132 }
 133 
 134 # Get all possible DREIDING atom types corresponding to non-hydrogen atoms
 135 # as an array reference...
 136 #
 137 # This functionality can be either invoked as a class function or an
 138 # object method.
 139 #
 140 sub GetAllPossibleDREIDINGNonHydrogenAtomTypes {
 141   my($NonHydrogensOnly);
 142 
 143   $NonHydrogensOnly = 1;
 144   return _GetAllPossibleDREIDINGAtomTypes($NonHydrogensOnly);
 145 }
 146 
 147 # Get all possible DREIDING atom types as an array reference...
 148 #
 149 sub _GetAllPossibleDREIDINGAtomTypes {
 150   my($NonHydrogensOnly) = @_;
 151   my($DREIDINGAtomTypesDataRef);
 152 
 153   $NonHydrogensOnly = defined $NonHydrogensOnly ? $NonHydrogensOnly : 0;
 154 
 155   $DREIDINGAtomTypesDataRef = GetDREIDINGAtomTypesData();
 156 
 157   return $NonHydrogensOnly ? \@{$DREIDINGAtomTypesDataRef->{NonHydrogenAtomTypes}}: \@{$DREIDINGAtomTypesDataRef->{AtomTypes}};
 158 }
 159 
 160 # Assign DREIDING [ Ref 88 ] atom types to all atoms...
 161 #
 162 # Notes:
 163 #     o 37 DREIDING atom types are listed
 164 #     o AtomTypes::DREIDINGAtomTypes.pm module is used to assign DREIDING atom types
 165 #     o Units:
 166 #         o ValenceBondRadius and NonBondRadius: Angstroms
 167 #         o ValenceAngle: Degrees
 168 #     o Five-character mnemonic label for DREIDING atom types
 169 #         o First two characters correspond to chemical symbol with an underscore as second
 170 #           character for elements with one character symbol
 171 #         o Third character describes hybridization: 1 - linear (sp); 2 - trigonal (sp2);
 172 #           3 = tetrahedral (sp3); R - sp2 involved in resonance situation
 173 #         o Fourth character used to indicate number of implicit hydrogens
 174 #         o Fourth and fifth chracters are used as indicators of alternate parameters: formal oxidation
 175 #           state, bridging hydrogens and so on. The _HB type denotes a hydrogen atom capable
 176 #           of forming hdyrogen bonds attached to (N, O, F). The H_b is the bridging hydrogen
 177 #           of diborane.
 178 #
 179 #
 180 sub AssignAtomTypes {
 181   my($This) = @_;
 182   my($Atom, $AtomType);
 183 
 184   ATOM: for $Atom ($This->GetMolecule()->GetAtoms()) {
 185     if ($This->{IgnoreHydrogens} && $Atom->IsHydrogen()) {
 186       next ATOM;
 187     }
 188     $AtomType = $This->_GetAtomType($Atom);
 189     $This->SetAtomType($Atom, $AtomType);
 190   }
 191   return $This;
 192 }
 193 
 194 # Get DREIDING atom type for atom...
 195 #
 196 sub _GetAtomType {
 197   my($This, $Atom) = @_;
 198   my($AtomType);
 199 
 200   $AtomType = '';
 201 
 202   ATOM: {
 203     if ($Atom->IsCarbon()) {
 204       $AtomType = $This->_GetAtomTypeForCarbon($Atom);
 205       last ATOM;
 206     }
 207     if ($Atom->IsNitrogen()) {
 208       $AtomType = $This->_GetAtomTypeForNitrogen($Atom);
 209       last ATOM;
 210     }
 211     if ($Atom->IsOxygen()) {
 212       $AtomType = $This->_GetAtomTypeForOxygen($Atom);
 213       last ATOM;
 214     }
 215     if ($Atom->IsPhosphorus()) {
 216       $AtomType = $This->_GetAtomTypeForPhosphorus($Atom);
 217       last ATOM;
 218     }
 219     if ($Atom->IsSulfur()) {
 220       $AtomType = $This->_GetAtomTypeForSulfur($Atom);
 221       last ATOM;
 222     }
 223     if ($Atom->IsHydrogen()) {
 224       $AtomType = $This->_GetAtomTypeForHydrogen($Atom);
 225       last ATOM;
 226     }
 227     $AtomType = $This->_GetAtomTypeForOtherAtoms($Atom);
 228   }
 229 
 230   return $AtomType;
 231 }
 232 
 233 # Get DREIDING atom type for Carbon atom...
 234 #
 235 sub _GetAtomTypeForCarbon {
 236   my($This, $Atom) = @_;
 237   my($AtomType, $NumOfSigmaBonds, $NumOfPiBonds);
 238 
 239   $AtomType = 'None';
 240 
 241   ($NumOfSigmaBonds, $NumOfPiBonds) = ('0') x 2;
 242 
 243   ($NumOfSigmaBonds, $NumOfPiBonds) = $Atom->GetNumOfSigmaAndPiBondsToNonHydrogenAtoms();
 244   $NumOfSigmaBonds += $Atom->GetAtomicInvariantValue('H');
 245 
 246   ATOMTYPE: {
 247     if ($Atom->IsAromatic()) {
 248       $AtomType = 'C_R';
 249       last ATOMTYPE;
 250     }
 251 
 252     # Only single bonds...
 253     if ($NumOfPiBonds == 0) {
 254       $AtomType = 'C_3';
 255       last ATOMTYPE;
 256     }
 257 
 258     # One double bond...
 259     if ($NumOfPiBonds == 1) {
 260       $AtomType = 'C_2';
 261       last ATOMTYPE;
 262     }
 263 
 264     # One triple bond or two double bonds...
 265     if ($NumOfPiBonds == 2) {
 266       $AtomType = 'C_1';
 267       last ATOMTYPE;
 268     }
 269 
 270     $AtomType = 'None';
 271     carp "Warning: ${ClassName}->_GetAtomTypeForCarbon: DREIDING atom types for Carbon cann't be assigned...";
 272   }
 273 
 274   return $AtomType;
 275 }
 276 
 277 # Get DREIDING atom type for Nitrogen atom...
 278 #
 279 sub _GetAtomTypeForNitrogen {
 280   my($This, $Atom) = @_;
 281   my($AtomType, $NumOfSigmaBonds, $NumOfPiBonds);
 282 
 283   $AtomType = 'None';
 284 
 285   ($NumOfSigmaBonds, $NumOfPiBonds) = ('0') x 2;
 286 
 287   ($NumOfSigmaBonds, $NumOfPiBonds) = $Atom->GetNumOfSigmaAndPiBondsToNonHydrogenAtoms();
 288   $NumOfSigmaBonds += $Atom->GetAtomicInvariantValue('H');
 289 
 290   ATOMTYPE: {
 291     if ($Atom->IsAromatic()) {
 292       $AtomType = 'N_R';
 293       last ATOMTYPE;
 294     }
 295 
 296     # Only single bonds...
 297     if ($NumOfPiBonds == 0) {
 298       $AtomType = 'N_3';
 299       last ATOMTYPE;
 300     }
 301 
 302     # One double bond...
 303     if ($NumOfPiBonds == 1) {
 304       $AtomType = 'N_2';
 305       last ATOMTYPE;
 306     }
 307 
 308     # One triple bond or two double bonds...
 309     if ($NumOfPiBonds == 2) {
 310       $AtomType = 'N_1';
 311       last ATOMTYPE;
 312     }
 313 
 314     $AtomType = 'None';
 315     carp "Warning: ${ClassName}->_GetAtomTypeForNitrogen: DREIDING atom types for Nitrogen cann't be assigned...";
 316   }
 317 
 318   return $AtomType;
 319 }
 320 
 321 # Get DREIDING atom type for Oxygen atom...
 322 #
 323 sub _GetAtomTypeForOxygen {
 324   my($This, $Atom) = @_;
 325   my($AtomType, $NumOfSigmaBonds, $NumOfPiBonds);
 326 
 327   $AtomType = 'None';
 328 
 329   ($NumOfSigmaBonds, $NumOfPiBonds) = ('0') x 2;
 330 
 331   ($NumOfSigmaBonds, $NumOfPiBonds) = $Atom->GetNumOfSigmaAndPiBondsToNonHydrogenAtoms();
 332   $NumOfSigmaBonds += $Atom->GetAtomicInvariantValue('H');
 333 
 334   ATOMTYPE: {
 335     if ($Atom->IsAromatic()) {
 336       $AtomType = 'O_R';
 337       last ATOMTYPE;
 338     }
 339 
 340     # Only single bonds...
 341     if ($NumOfPiBonds == 0) {
 342       $AtomType = 'O_3';
 343       last ATOMTYPE;
 344     }
 345 
 346     # One double bond...
 347     if ($NumOfPiBonds == 1) {
 348       $AtomType = 'O_2';
 349       last ATOMTYPE;
 350     }
 351 
 352     # One triple bond or two double bonds...
 353     if ($NumOfPiBonds == 2) {
 354       $AtomType = 'O_1';
 355       last ATOMTYPE;
 356     }
 357 
 358     $AtomType = 'None';
 359     carp "Warning: ${ClassName}->_GetAtomTypeForOxygen: DREIDING atom types for Oxygen cann't be assigned...";
 360   }
 361 
 362   return $AtomType;
 363 }
 364 
 365 # Get DREIDING atom type for Phosphorus atom...
 366 #
 367 sub _GetAtomTypeForPhosphorus {
 368   my($This, $Atom) = @_;
 369   my($AtomType);
 370 
 371   $AtomType = 'P_3';
 372 
 373   return $AtomType;
 374 }
 375 
 376 # Get DREIDING atom type for Sulfur atom...
 377 #
 378 sub _GetAtomTypeForSulfur {
 379   my($This, $Atom) = @_;
 380   my($AtomType);
 381 
 382   $AtomType = 'S_3';
 383 
 384   return $AtomType;
 385 }
 386 
 387 # Get DREIDING atom type for Hydrogen atom...
 388 #
 389 sub _GetAtomTypeForHydrogen {
 390   my($This, $Atom) = @_;
 391   my($AtomType, $NumOfNeighbors, $NeighborAtom, @NonHydrogenAtomNeighbors);
 392 
 393   @NonHydrogenAtomNeighbors = $Atom->GetNonHydrogenAtomNeighbors();
 394 
 395   $NumOfNeighbors = scalar @NonHydrogenAtomNeighbors;
 396   $NeighborAtom = $NonHydrogenAtomNeighbors[0];
 397 
 398   ATOMTYPE: {
 399     if ($NumOfNeighbors > 1) {
 400       # Bridging hydrogen as in B2H6
 401       $AtomType = 'H___b';
 402       last ATOMTYPE;
 403     }
 404 
 405     if ($NeighborAtom->GetAtomicNumber() =~ /^(7|8|9)$/) {
 406       # Involved in hydrogen bonding due to its attachment to N, O, or F
 407       $AtomType = 'H__HB';
 408       last ATOMTYPE;
 409     }
 410     $AtomType = 'H_';
 411   }
 412 
 413   return $AtomType;
 414 }
 415 
 416 # Get DREIDING atom type for atoms other than Carbon, Nitrogen, Oxygen, Phosporus,
 417 # Sulfur and Hydrogen...
 418 #
 419 sub _GetAtomTypeForOtherAtoms {
 420   my($This, $Atom) = @_;
 421   my($AtomType, $AtomicNumber, $AtomSymbol);
 422 
 423   $AtomType = 'None';
 424 
 425   $AtomicNumber = $Atom->GetAtomicNumber();
 426   $AtomSymbol = $Atom->GetAtomSymbol();
 427 
 428   ATOMICNUMBER: {
 429     if ($AtomicNumber =~ /^(9|17|35|53)$/i) {
 430       # F, Cl, Br, I
 431       $AtomType = length($AtomSymbol) == 1 ? "${AtomSymbol}_" : $AtomSymbol;
 432       last ATOMICNUMBER;
 433     }
 434 
 435     if ($AtomicNumber =~ /^5$/i) {
 436       # B: B_2 and B_3
 437       $AtomType = (($Atom->GetNumOfNonHydrogenAtomNeighbors() + $Atom->GetAtomicInvariantValue('H')) == 4) ? "B_3" : "B_2";
 438       last ATOMICNUMBER;
 439     }
 440 
 441     if ($AtomicNumber =~ /^(13|14|31|32|33|34|49|50|51|52)$/i) {
 442       # Al, Si, Ga, Ge, As, Se, In, Sn, Sb, Te
 443       $AtomType = "${AtomSymbol}3";
 444       last ATOMICNUMBER;
 445     }
 446 
 447     if ($AtomicNumber =~ /^(11|20|26|30)$/i) {
 448       # Na, Ca, Fe, Zn
 449       $AtomType = $AtomSymbol;
 450       last ATOMICNUMBER;
 451     }
 452 
 453     $AtomType = 'None';
 454     carp "Warning: ${ClassName}->_GetAtomTypeForOtherAtoms: DREIDING atom types for atom, $AtomSymbol, with atomic number, $AtomicNumber, cann't be assigned...";
 455   }
 456 
 457   return $AtomType;
 458 }
 459 
 460 # Return a string containg data for DREIDINGAtomTypes object...
 461 #
 462 sub StringifyDREIDINGAtomTypes {
 463   my($This) = @_;
 464   my($AtomTypesString);
 465 
 466   # Type of AtomTypes...
 467   $AtomTypesString = "AtomTypes: $This->{Type}; IgnoreHydrogens: " . ($This->{IgnoreHydrogens} ? "Yes" : "No");
 468 
 469   # Setup atom types information...
 470   my($AtomID, $AtomType, @AtomTypesInfo, %AssignedAtomTypes);
 471 
 472   @AtomTypesInfo = ();
 473   %AssignedAtomTypes = $This->GetAtomTypes();
 474 
 475   for $AtomID (sort { $a <=> $b } keys %AssignedAtomTypes) {
 476     $AtomType = $AssignedAtomTypes{$AtomID} ? $AssignedAtomTypes{$AtomID} : 'None';
 477     push @AtomTypesInfo, "$AtomID:$AtomType";
 478   }
 479   $AtomTypesString .= "; AtomIDs:AtomTypes: <" . TextUtil::JoinWords(\@AtomTypesInfo, ", ", 0) . ">";
 480 
 481   return $AtomTypesString;
 482 }
 483 
 484 # Is it a DREIDINGAtomTypes object?
 485 sub _IsDREIDINGAtomTypes {
 486   my($Object) = @_;
 487 
 488   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 489 }
 490 
 491 # Check and load DREIDING atom types data...
 492 #
 493 sub _CheckAndLoadDREIDINGAtomTypesData {
 494 
 495   # Is it already loaded?
 496   if (exists $DREIDINGAtomTypesDataMap{AtomTypes}) {
 497     return;
 498   }
 499 
 500   _LoadDREIDINGAtomTypesData();
 501 }
 502 
 503 # Load DREIDING atom types data from the file assuming first column to be atom type symbol..
 504 #
 505 # Format:
 506 #
 507 # "AtomType","ValenceBondRadius","ValenceAngle"
 508 # "H_","0.330","180.0"
 509 # "C_3","0.770","109.471"
 510 # "C_R","0.700","120.0"
 511 # "C_2","0.670","120.0"
 512 # "C_1","0.602","180.0"
 513 # "N_3","0.702","106.7"
 514 #
 515 sub _LoadDREIDINGAtomTypesData {
 516   my($AtomTypesDataFile, $MayaChemToolsLibDir);
 517 
 518   $MayaChemToolsLibDir = FileUtil::GetMayaChemToolsLibDirName();
 519 
 520   $AtomTypesDataFile =  "$MayaChemToolsLibDir" . "/data/DREIDINGAtomTypes.csv";
 521   if (! -e "$AtomTypesDataFile") {
 522     croak "Error: MayaChemTools package file, $AtomTypesDataFile, is missing: Possible installation problems...";
 523   }
 524 
 525   %DREIDINGAtomTypesDataMap = ();
 526   AtomTypes::AtomTypes::LoadAtomTypesData($AtomTypesDataFile, \%DREIDINGAtomTypesDataMap);
 527 }
 528