1 #!/bin/env python 2 # 3 # File: Psi4CalculateProperties.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 import shutil 38 import math 39 import multiprocessing as mp 40 41 # Psi4 imports... 42 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 43 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 44 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 45 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 46 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 47 sys.stderr.write("Psi4 environment and try again.\n\n") 48 49 # RDKit imports... 50 try: 51 from rdkit import rdBase 52 from rdkit import Chem 53 from rdkit.Chem import AllChem 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 57 sys.exit(1) 58 59 # MayaChemTools imports... 60 try: 61 from docopt import docopt 62 import MiscUtil 63 import Psi4Util 64 import RDKitUtil 65 except ImportError as ErrMsg: 66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 68 sys.exit(1) 69 70 ScriptName = os.path.basename(sys.argv[0]) 71 Options = {} 72 OptionsInfo = {} 73 74 PropertyNamesMap = {} 75 76 def main(): 77 """Start execution of the script.""" 78 79 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 80 81 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 82 83 # Retrieve command line arguments and options... 84 RetrieveOptions() 85 86 # Process and validate command line arguments and options... 87 ProcessOptions() 88 89 # Perform actions required by the script... 90 CalculateProperties() 91 92 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 93 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 94 95 def CalculateProperties(): 96 """Calculate properties.""" 97 98 # Setup a molecule reader... 99 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 100 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 101 102 # Set up a molecule writer... 103 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 104 if Writer is None: 105 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 106 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 107 108 MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer) 109 110 if Writer is not None: 111 Writer.close() 112 113 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 114 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 115 MiscUtil.PrintInfo("Number of molecules failed during property calculation: %d" % CalcFailedCount) 116 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount)) 117 118 def ProcessMolecules(Mols, Writer): 119 """Process and calculate properties of molecules.""" 120 121 if OptionsInfo["MPMode"]: 122 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 123 else: 124 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 125 126 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 127 """Process and calculate properties of molecules using a single process.""" 128 129 # Intialize Psi4... 130 MiscUtil.PrintInfo("\nInitializing Psi4...") 131 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 132 OptionsInfo["psi4"] = Psi4Handle 133 134 # Setup conversion factor for energy units... 135 SetupEnergyConversionFactor(Psi4Handle) 136 137 MiscUtil.PrintInfo("\nCalculating properties...") 138 139 (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3 140 for Mol in Mols: 141 MolCount += 1 142 143 if not CheckAndValidateMolecule(Mol, MolCount): 144 continue 145 146 # Setup a Psi4 molecule... 147 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount) 148 if Psi4Mol is None: 149 continue 150 151 ValidMolCount += 1 152 153 CalcStatus, CalculatedValues = CalculateMolProperties(Psi4Handle, Psi4Mol, Mol, MolCount) 154 155 if not CalcStatus: 156 if not OptionsInfo["QuietMode"]: 157 MiscUtil.PrintWarning("Failed to calculate properties for molecule %s" % RDKitUtil.GetMolName(Mol, MolCount)) 158 159 CalcFailedCount += 1 160 continue 161 162 WritePropertyValues(Writer, Mol, CalculatedValues) 163 164 return (MolCount, ValidMolCount, CalcFailedCount) 165 166 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 167 """Process and calculate properties of molecules using process.""" 168 169 MiscUtil.PrintInfo("\nCalculating properties using multiprocessing...") 170 171 MPParams = OptionsInfo["MPParams"] 172 173 # Setup data for initializing a worker process... 174 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 175 176 # Setup a encoded mols data iterable for a worker process... 177 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 178 179 # Setup process pool along with data initialization for each process... 180 if not OptionsInfo["QuietMode"]: 181 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 182 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 183 184 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 185 186 # Start processing... 187 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 188 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 189 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 190 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 191 else: 192 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 193 194 # Print out Psi4 version in the main process... 195 MiscUtil.PrintInfo("\nInitializing Psi4...\n") 196 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False) 197 OptionsInfo["psi4"] = Psi4Handle 198 199 (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3 200 for Result in Results: 201 MolCount += 1 202 MolIndex, EncodedMol, CalcStatus, CalculatedValues = Result 203 204 if EncodedMol is None: 205 continue 206 207 ValidMolCount += 1 208 209 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 210 211 if not CalcStatus: 212 if not OptionsInfo["QuietMode"]: 213 MolName = RDKitUtil.GetMolName(Mol, MolCount) 214 MiscUtil.PrintWarning("Failed to calculate properties for molecule %s" % MolName) 215 216 CalcFailedCount += 1 217 continue 218 219 WritePropertyValues(Writer, Mol, CalculatedValues) 220 221 return (MolCount, ValidMolCount, CalcFailedCount) 222 223 def InitializeWorkerProcess(*EncodedArgs): 224 """Initialize data for a worker process.""" 225 226 global Options, OptionsInfo 227 228 if not OptionsInfo["QuietMode"]: 229 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 230 231 # Decode Options and OptionInfo... 232 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 233 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 234 235 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 236 # output files for each process... 237 OptionsInfo["Psi4Initialized"] = False 238 239 SetupPropertyNamesInfo(PrintInfo = False) 240 241 def InitializePsi4ForWorkerProcess(): 242 """Initialize Psi4 for a worker process.""" 243 244 if OptionsInfo["Psi4Initialized"]: 245 return 246 247 OptionsInfo["Psi4Initialized"] = True 248 249 # Update output file... 250 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 251 252 # Intialize Psi4... 253 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 254 255 # Setup conversion factor for energy units... 256 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 257 258 def WorkerProcess(EncodedMolInfo): 259 """Process data for a worker process.""" 260 261 if not OptionsInfo["Psi4Initialized"]: 262 InitializePsi4ForWorkerProcess() 263 264 MolIndex, EncodedMol = EncodedMolInfo 265 266 CalcStatus = False 267 CalculatedValues = None 268 269 if EncodedMol is None: 270 return [MolIndex, None, CalcStatus, CalculatedValues] 271 272 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 273 MolCount = MolIndex + 1 274 275 if not CheckAndValidateMolecule(Mol, MolCount): 276 return [MolIndex, None, CalcStatus, CalculatedValues] 277 278 # Setup a Psi4 molecule... 279 Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount) 280 if Psi4Mol is None: 281 return [MolIndex, None, CalcStatus, CalculatedValues] 282 283 CalcStatus, CalculatedValues = CalculateMolProperties(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount) 284 285 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, CalculatedValues] 286 287 def CalculateMolProperties(Psi4Handle, Psi4Mol, Mol, MolNum = None): 288 """Calculate properties.""" 289 290 Status = False 291 CalculatedValues = [] 292 293 # Setup reference wave function... 294 Reference = SetupReferenceWavefunction(Mol) 295 Psi4Handle.set_options({'Reference': Reference}) 296 297 # Setup method name and basis set... 298 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 299 300 # Calculate single point energy to setup a wavefunction... 301 Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 302 303 if not Status: 304 PerformPsi4Cleanup(Psi4Handle) 305 return (False, CalculatedValues) 306 307 # Calculate properties... 308 CalculatedValues = CalculatePropertyValues(Psi4Handle, WaveFunction, Mol, MolNum) 309 310 # Clean up 311 PerformPsi4Cleanup(Psi4Handle) 312 313 return (Status, CalculatedValues) 314 315 def CalculatePropertyValues(Psi4Handle, WaveFunction, Mol, MolNum): 316 """Calculate property values.""" 317 318 return [PropertyNamesMap["CalcFunction"][Name](Name, Psi4Handle, WaveFunction, Mol, MolNum) for Name in OptionsInfo["SpecifiedPropertyNames"]] 319 320 def SetupPsi4Mol(Psi4Handle, Mol, MolCount = None): 321 """Setup a Psi4 molecule object.""" 322 323 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom = True, NoReorient = True) 324 325 try: 326 Psi4Mol = Psi4Handle.geometry(MolGeometry) 327 except Exception as ErrMsg: 328 Psi4Mol = None 329 if not OptionsInfo["QuietMode"]: 330 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 331 MolName = RDKitUtil.GetMolName(Mol, MolCount) 332 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 333 334 return Psi4Mol 335 336 def PerformPsi4Cleanup(Psi4Handle): 337 """Perform clean up.""" 338 339 # Clean up after Psi4 run ... 340 Psi4Handle.core.clean() 341 342 # Clean up any leftover scratch files... 343 if OptionsInfo["MPMode"]: 344 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 345 346 def CheckAndValidateMolecule(Mol, MolCount = None): 347 """Validate molecule for Psi4 calculations.""" 348 349 if Mol is None: 350 if not OptionsInfo["QuietMode"]: 351 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 352 return False 353 354 MolName = RDKitUtil.GetMolName(Mol, MolCount) 355 if not OptionsInfo["QuietMode"]: 356 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 357 358 if RDKitUtil.IsMolEmpty(Mol): 359 if not OptionsInfo["QuietMode"]: 360 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 361 return False 362 363 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 364 if not OptionsInfo["QuietMode"]: 365 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 366 return False 367 368 # Check for 3D flag... 369 if not Mol.GetConformer().Is3D(): 370 if not OptionsInfo["QuietMode"]: 371 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName) 372 373 # Check for missing hydrogens... 374 if RDKitUtil.AreHydrogensMissingInMolecule(Mol): 375 if not OptionsInfo["QuietMode"]: 376 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName) 377 378 return True 379 380 def SetupMethodNameAndBasisSet(Mol): 381 """Setup method name and basis set.""" 382 383 MethodName = OptionsInfo["MethodName"] 384 if OptionsInfo["MethodNameAuto"]: 385 MethodName = "B3LYP" 386 387 BasisSet = OptionsInfo["BasisSet"] 388 if OptionsInfo["BasisSetAuto"]: 389 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 390 391 return (MethodName, BasisSet) 392 393 def SetupReferenceWavefunction(Mol): 394 """Setup reference wavefunction.""" 395 396 Reference = OptionsInfo["Reference"] 397 if OptionsInfo["ReferenceAuto"]: 398 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 399 400 return Reference 401 402 def SetupEnergyConversionFactor(Psi4Handle): 403 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 404 405 EnergyUnits = OptionsInfo["EnergyUnits"] 406 407 ApplyConversionFactor = True 408 if re.match("^kcal\/mol$", EnergyUnits, re.I): 409 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 410 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 411 ConversionFactor = Psi4Handle.constants.hartree2kJmol 412 elif re.match("^eV$", EnergyUnits, re.I): 413 ConversionFactor = Psi4Handle.constants.hartree2ev 414 else: 415 ApplyConversionFactor = False 416 ConversionFactor = 1.0 417 418 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 419 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 420 421 def WritePropertyValues(Writer, Mol, CalculatedValues): 422 """Write out property values for a molecule.""" 423 424 if CalculatedValues is None: 425 return 426 427 for NameIndex, Name in enumerate(OptionsInfo["SpecifiedPropertyNames"]): 428 PropertyValues = CalculatedValues[NameIndex] 429 if re.match("^(MayerIndices|WibergLowdinIndices)$", Name, re.I): 430 # Handle multiple lines values... 431 Label = PropertyNamesMap["CalcValueLabels"][Name][0] 432 FormattedValues = [" ".join(Values) for Values in PropertyValues] 433 Mol.SetProp(Label, "\n".join(FormattedValues)) 434 else: 435 for LabelIndex, Label in enumerate(PropertyNamesMap["CalcValueLabels"][Name]): 436 Mol.SetProp(Label, PropertyValues[LabelIndex]) 437 438 Writer.write(Mol) 439 440 def CalculateDipole(PropertyName, Psi4Handle, WaveFunction, Mol, MolNum): 441 """Calculate dipole values.""" 442 443 Values = ["NA"] * 4 444 445 try: 446 Title = "OEPropDipole" 447 Psi4Handle.oeprop(WaveFunction, "DIPOLE", title = Title) 448 449 DipoleX = WaveFunction.variable("%s DIPOLE X" % Title) 450 DipoleY = WaveFunction.variable("%s DIPOLE Y" % Title) 451 DipoleZ = WaveFunction.variable("%s DIPOLE Z" % Title) 452 Dipole = math.sqrt(DipoleX*DipoleX + DipoleY*DipoleY + DipoleZ*DipoleZ) 453 454 Values = [DipoleX, DipoleY, DipoleZ, Dipole] 455 Values = ["%.*f" % (OptionsInfo["Precision"], Value) for Value in Values] 456 except Exception as ErrMsg: 457 Values = ["NA"] * 4 458 if not OptionsInfo["QuietMode"]: 459 MiscUtil.PrintWarning("Psi4 failed to calculate %s values for molecule %s:\n%s\n" % (PropertyName, RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 460 461 return (Values) 462 463 def CalculateQuadrupole(PropertyName, Psi4Handle, WaveFunction, Mol, MolNum): 464 """Calculate quadrupole values.""" 465 466 Values = ["NA"] * 6 467 468 try: 469 Title = "OEPropQuadrupole" 470 Psi4Handle.oeprop(WaveFunction, "QUADRUPOLE", title = Title) 471 472 QuadrupoleXX = WaveFunction.variable("%s QUADRUPOLE XX" % Title) 473 QuadrupoleYY = WaveFunction.variable("%s QUADRUPOLE YY" % Title) 474 QuadrupoleZZ = WaveFunction.variable("%s QUADRUPOLE ZZ" % Title) 475 QuadrupoleXY = WaveFunction.variable("%s QUADRUPOLE XY" % Title) 476 QuadrupoleXZ = WaveFunction.variable("%s QUADRUPOLE XZ" % Title) 477 QuadrupoleYZ = WaveFunction.variable("%s QUADRUPOLE YZ" % Title) 478 479 Values = [QuadrupoleXX, QuadrupoleYY, QuadrupoleZZ, QuadrupoleXY, QuadrupoleXZ, QuadrupoleYZ] 480 Values = ["%.*f" % (OptionsInfo["Precision"], Value) for Value in Values] 481 except Exception as ErrMsg: 482 Values = ["NA"] * 6 483 if not OptionsInfo["QuietMode"]: 484 MiscUtil.PrintWarning("Psi4 failed to calculate %s values for molecule %s:\n%s\n" % (PropertyName, RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 485 486 return (Values) 487 488 def CalculateLumoHomoEnergyGap(PropertyName, Psi4Handle, WaveFunction, Mol, MolNum): 489 """Calculate HOMO and LUMO energy values along with the energy gap.""" 490 491 Values = ["NA"] * 3 492 493 try: 494 HOMOEnergy = WaveFunction.epsilon_a_subset("AO", "ALL").np[WaveFunction.nalpha()] 495 LUMOEnergy = WaveFunction.epsilon_a_subset("AO", "ALL").np[WaveFunction.nalpha() + 1] 496 EnergyGap = LUMOEnergy - HOMOEnergy 497 498 Values = [HOMOEnergy, LUMOEnergy, EnergyGap] 499 500 if OptionsInfo["ApplyEnergyConversionFactor"]: 501 Values = [ Value * OptionsInfo["EnergyConversionFactor"] for Value in Values] 502 Values = ["%.*f" % (OptionsInfo["Precision"], Value) for Value in Values] 503 except Exception as ErrMsg: 504 Values = ["NA"] * 3 505 if not OptionsInfo["QuietMode"]: 506 MiscUtil.PrintWarning("Psi4 failed to calculate %s values for molecule %s:\n%s\n" % (PropertyName, RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 507 508 return (Values) 509 510 def CalculateMayerIndices(PropertyName, Psi4Handle, WaveFunction, Mol, MolNum): 511 """Calculate mayer indices.""" 512 513 Values = ["NA"] 514 515 try: 516 Psi4Handle.oeprop(WaveFunction, "MAYER_INDICES") 517 MayerIndicesList = WaveFunction.array_variable("MAYER_INDICES").np.tolist() 518 519 # MayerIndicesList a list of lists corresponding to n x n matrix corresponding 520 # to number of atoms in a molecule... 521 Values = [] 522 for IndicesRow in MayerIndicesList: 523 IndicesRowFormatted = ["%.*f" % (OptionsInfo["Precision"], Value) for Value in IndicesRow] 524 Values.append(IndicesRowFormatted) 525 except Exception as ErrMsg: 526 Values = ["NA"] 527 if not OptionsInfo["QuietMode"]: 528 MiscUtil.PrintWarning("Psi4 failed to calculate %s values for molecule %s:\n%s\n" % (PropertyName, RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 529 530 return (Values) 531 532 def CalculateWibergLowdinIndices(PropertyName, Psi4Handle, WaveFunction, Mol, MolNum): 533 """Calculate mayer indices.""" 534 535 Values = ["NA"] 536 537 try: 538 Psi4Handle.oeprop(WaveFunction, "WIBERG_LOWDIN_INDICES") 539 WibergLowdinIndices = WaveFunction.array_variable("WIBERG_LOWDIN_INDICES").np.tolist() 540 541 # WibergLowdinIndices a list of lists corresponding to n x n matrix corresponding 542 # to number of atoms in a molecule... 543 Values = [] 544 for IndicesRow in WibergLowdinIndices: 545 IndicesRowFormatted = ["%.*f" % (OptionsInfo["Precision"], Value) for Value in IndicesRow] 546 Values.append(IndicesRowFormatted) 547 except Exception as ErrMsg: 548 Values = ["NA"] 549 if not OptionsInfo["QuietMode"]: 550 MiscUtil.PrintWarning("Psi4 failed to calculate %s values for molecule %s:\n%s\n" % (PropertyName, RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 551 552 return (Values) 553 554 def ProcessSpecifiedPropertyNames(): 555 """Process specified property names.""" 556 557 SetupPropertyNamesInfo() 558 PropertyNames = PropertyNamesMap["Names"] 559 560 OptionsInfo["SpecifiedPropertyNames"] = [] 561 562 SpecifiedNames = re.sub(" ", "", OptionsInfo["Property"]) 563 if not SpecifiedNames: 564 MiscUtil.PrintError("No valid property names specifed using \ -p, --property\" option") 565 566 if re.match("^All$", SpecifiedNames, re.I): 567 OptionsInfo["SpecifiedPropertyNames"] = PropertyNames 568 return 569 570 # Validate propery names... 571 CanonicalPropertyNamesMap = {} 572 for Name in PropertyNames: 573 CanonicalPropertyNamesMap[Name.lower()] = Name 574 575 SpecifiedNamesWords = SpecifiedNames.split(",") 576 for Name in SpecifiedNamesWords: 577 CanonicalName = Name.lower() 578 if CanonicalName not in CanonicalPropertyNamesMap: 579 MiscUtil.PrintError("The property name specified, %s, using \"-p, --property\" option is not a valid name." % Name) 580 581 PropertyName = CanonicalPropertyNamesMap[CanonicalName] 582 OptionsInfo["SpecifiedPropertyNames"].append(PropertyName) 583 584 def ProcessListPropertyNames(): 585 """List available property names.""" 586 587 SetupPropertyNamesInfo() 588 589 MiscUtil.PrintInfo("\nListing available property names...") 590 Delimiter = ", " 591 MiscUtil.PrintInfo("\n%s" % (Delimiter.join(PropertyNamesMap["Names"]))) 592 593 MiscUtil.PrintInfo("\nMultiple property values may be calculated for each propery name as shown below:\n") 594 for PropertyName in PropertyNamesMap["Names"]: 595 MiscUtil.PrintInfo("%s: %s" % (PropertyName, Delimiter.join(PropertyNamesMap["CalcValueLabels"][PropertyName]))) 596 597 MiscUtil.PrintInfo("") 598 599 def SetupPropertyNamesInfo(PrintInfo = True): 600 """Setup information for available property names.""" 601 602 if PrintInfo: 603 MiscUtil.PrintInfo("\nRetrieving information for avalible property names...") 604 605 PropertyNamesMap["Names"] = ["Dipole", "Quadrupole", "LumoHomoEnergyGap", "MayerIndices", "WibergLowdinIndices"] 606 607 PropertyNamesMap["CalcValueLabels"] = {} 608 PropertyNamesMap["CalcFunction"] = {} 609 610 Name = "Dipole" 611 PropertyNamesMap["CalcValueLabels"][Name] = ["DiopleX", "DiopleY", "DiopleZ", "Diople (Debye)"] 612 PropertyNamesMap["CalcFunction"][Name] = CalculateDipole 613 614 Name = "Quadrupole" 615 PropertyNamesMap["CalcValueLabels"][Name] = ["QuadrupoleXX", "QuadrupoleYY", "QuadrupoleZZ", "QuadrupoleXY", "QuadrupoleXZ", "QuadrupoleYZ"] 616 PropertyNamesMap["CalcFunction"][Name] = CalculateQuadrupole 617 618 Name = "LumoHomoEnergyGap" 619 PropertyNamesMap["CalcValueLabels"][Name] = ["HOMOEnergy", "LUMOEnergy", "LUMO-HOMOGap (%s)" % OptionsInfo["EnergyUnits"]] 620 PropertyNamesMap["CalcFunction"][Name] = CalculateLumoHomoEnergyGap 621 622 Name = "MayerIndices" 623 PropertyNamesMap["CalcValueLabels"]["MayerIndices"] = ["MayerIndices"] 624 PropertyNamesMap["CalcFunction"][Name] = CalculateMayerIndices 625 626 Name = "WibergLowdinIndices" 627 PropertyNamesMap["CalcValueLabels"][Name] = ["WibergLowdinIndices"] 628 PropertyNamesMap["CalcFunction"][Name] = CalculateWibergLowdinIndices 629 630 def ProcessOptions(): 631 """Process and validate command line arguments and options.""" 632 633 MiscUtil.PrintInfo("Processing options...") 634 635 # Validate options... 636 ValidateOptions() 637 638 OptionsInfo["Infile"] = Options["--infile"] 639 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 640 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 641 642 OptionsInfo["Outfile"] = Options["--outfile"] 643 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 644 645 OptionsInfo["Overwrite"] = Options["--overwrite"] 646 647 # Method, basis set, and reference wavefunction... 648 OptionsInfo["BasisSet"] = Options["--basisSet"] 649 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 650 651 OptionsInfo["MethodName"] = Options["--methodName"] 652 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 653 654 OptionsInfo["Reference"] = Options["--reference"] 655 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 656 657 # Energy units... 658 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 659 660 # Run and options parameters... 661 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 662 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 663 664 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 665 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 666 667 OptionsInfo["Precision"] = int(Options["--precision"]) 668 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 669 670 OptionsInfo["Property"] = Options["--property"] 671 ProcessSpecifiedPropertyNames() 672 673 def RetrieveOptions(): 674 """Retrieve command line arguments and options.""" 675 676 # Get options... 677 global Options 678 Options = docopt(_docoptUsage_) 679 680 # Set current working directory to the specified directory... 681 WorkingDir = Options["--workingdir"] 682 if WorkingDir: 683 os.chdir(WorkingDir) 684 685 # Handle examples option... 686 if "--examples" in Options and Options["--examples"]: 687 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 688 sys.exit(0) 689 690 # Handle listing of propery names... 691 if Options["--list"]: 692 ProcessListPropertyNames() 693 sys.exit(0) 694 695 def ValidateOptions(): 696 """Validate option values.""" 697 698 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 699 700 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 701 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol") 702 703 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 704 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 705 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 706 707 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 708 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 709 710 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 711 712 # Setup a usage string for docopt... 713 _docoptUsage_ = """ 714 Psi4CalculateProperties.py - Calculate properties 715 716 Usage: 717 Psi4CalculateProperties.py [--basisSet <text>] [--energyUnits <text>] [--infileParams <Name,Value,...>] 718 [--methodName <text>] [--mp <yes or no>] [--mpParams <Name, Value,...>] 719 [--outfileParams <Name,Value,...> ] [--overwrite] [--property <All or Name1,Name2,Name3,...>] 720 [--precision <number> ] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 721 [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile> 722 Psi4CalculateProperties.py -l | --list 723 Psi4CalculateProperties.py -h | --help | -e | --examples 724 725 Description: 726 Calculate properties for molecules using a specified method name and basis 727 set. The molecules must have 3D coordinates in input file. The molecular 728 geometry is not optimized before the calculation. In addition, hydrogens must 729 be present for all molecules in input file. A single point energy calculation is 730 performed before calculating properties. The 3D coordinates are not modified 731 during the calculation. 732 733 A Psi4 XYZ format geometry string is automatically generated for each molecule 734 in input file. It contains atom symbols and 3D coordinates for each atom in a 735 molecule. In addition, the formal charge and spin multiplicity are present in the 736 the geometry string. These values are either retrieved from molecule properties 737 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 738 molecule. 739 740 The supported input file formats are: Mol (.mol), SD (.sdf, .sd) 741 742 The supported output file formats are: SD (.sdf, .sd) 743 744 Options: 745 -b, --basisSet <text> [default: auto] 746 Basis set to use for calculating single point energy before calculating 747 properties. Default: 6-31+G** for sulfur containing molecules; Otherwise, 748 6-31G** [ Ref 150 ]. The specified value must be a valid Psi4 basis set. 749 No validation is performed. 750 751 The following list shows a representative sample of basis sets available 752 in Psi4: 753 754 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 755 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 756 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 757 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 758 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 759 760 --energyUnits <text> [default: kcal/mol] 761 Energy units for writing out LUMO and HOMO energies and their energy gap. 762 Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 763 -e, --examples 764 Print examples. 765 -h, --help 766 Print this help message. 767 -i, --infile <infile> 768 Input file name. 769 --infileParams <Name,Value,...> [default: auto] 770 A comma delimited list of parameter name and value pairs for reading 771 molecules from files. The supported parameter names for different file 772 formats, along with their default values, are shown below: 773 774 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 775 776 -l, --list 777 List property names without performing any calculations. 778 -m, --methodName <text> [default: auto] 779 Method to use for calculating single point energy before calculating 780 properties. Default: B3LYP [ Ref 150 ]. The specified value must be a valid 781 Psi4 method name. No validation is performed. 782 783 The following list shows a representative sample of methods available 784 in Psi4: 785 786 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 787 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 788 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 789 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 790 791 --mp <yes or no> [default: no] 792 Use multiprocessing. 793 794 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 795 function employing lazy RDKit data iterable. This allows processing of 796 arbitrary large data sets without any additional requirements memory. 797 798 All input data may be optionally loaded into memory by mp.Pool.map() 799 before starting worker processes in a process pool by setting the value 800 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 801 802 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 803 data mode may adversely impact the performance. The '--mpParams' section 804 provides additional information to tune the value of 'chunkSize'. 805 --mpParams <Name,Value,...> [default: auto] 806 A comma delimited list of parameter name and value pairs to configure 807 multiprocessing. 808 809 The supported parameter names along with their default and possible 810 values are shown below: 811 812 chunkSize, auto 813 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 814 numProcesses, auto [ Default: mp.cpu_count() ] 815 816 These parameters are used by the following functions to configure and 817 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 818 mp.Pool.imap(). 819 820 The chunkSize determines chunks of input data passed to each worker 821 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 822 The default value of chunkSize is dependent on the value of 'inputDataMode'. 823 824 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 825 automatically converts RDKit data iterable into a list, loads all data into 826 memory, and calculates the default chunkSize using the following method 827 as shown in its code: 828 829 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 830 if extra: chunkSize += 1 831 832 For example, the default chunkSize will be 7 for a pool of 4 worker processes 833 and 100 data items. 834 835 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 836 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 837 data into memory. Consequently, the size of input data is not known a priori. 838 It's not possible to estimate an optimal value for the chunkSize. The default 839 chunkSize is set to 1. 840 841 The default value for the chunkSize during 'Lazy' data mode may adversely 842 impact the performance due to the overhead associated with exchanging 843 small chunks of data. It is generally a good idea to explicitly set chunkSize to 844 a larger value during 'Lazy' input data mode, based on the size of your input 845 data and number of processes in the process pool. 846 847 The mp.Pool.map() function waits for all worker processes to process all 848 the data and return the results. The mp.Pool.imap() function, however, 849 returns the the results obtained from worker processes as soon as the 850 results become available for specified chunks of data. 851 852 The order of data in the results returned by both mp.Pool.map() and 853 mp.Pool.imap() functions always corresponds to the input data. 854 -o, --outfile <outfile> 855 Output file name. 856 --outfileParams <Name,Value,...> [default: auto] 857 A comma delimited list of parameter name and value pairs for writing 858 molecules to files. The supported parameter names for different file 859 formats, along with their default values, are shown below: 860 861 SD: kekulize,yes,forceV3000,no 862 863 --overwrite 864 Overwrite existing files. 865 -p, --property <All or Name1,Name2,Name3,...> [default: Dipole] 866 Comma delimited lists of properties properties to calculate. Default: 867 'Dipole'. The following properties may be calculated for molecules: 868 869 Dipole,Quadrupole,LumoHomoEnergyGap,MayerIndices, 870 WibergLowdinIndices 871 872 Multiple property values are calculated for a single property name as 873 shown below: 874 875 Dipole: DipoleX, DipoleY, DipoleZ,Dipole 876 Quadrupole: QuadrupoleXX, QuadrupoleYY, QuadrupoleZZ, 877 QuadrupoleXY, QuadrupoleXZ, QuadrupoleYZ 878 LumoHomoEnergyGap: HOMOEnergy, LUMOEnergy, LUMO-HOMOGap 879 880 --precision <number> [default: 4] 881 Floating point precision for writing values of calculated properties. 882 --psi4OptionsParams <Name,Value,...> [default: none] 883 A comma delimited list of Psi4 option name and value pairs for setting 884 global and module options. The names are 'option_name' for global options 885 and 'module_name__option_name' for options local to a module. The 886 specified option names must be valid Psi4 names. No validation is 887 performed. 888 889 The specified option name and value pairs are processed and passed to 890 psi4.set_options() as a dictionary. The supported value types are float, 891 integer, boolean, or string. The float value string is converted into a float. 892 The valid values for a boolean string are yes, no, true, false, on, or off. 893 --psi4RunParams <Name,Value,...> [default: auto] 894 A comma delimited list of parameter name and value pairs for configuring 895 Psi4 jobs. 896 897 The supported parameter names along with their default and possible 898 values are shown below: 899 900 MemoryInGB, 1 901 NumThreads, 1 902 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 903 ScratchDir, auto [ Possivle values: DirName] 904 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 905 906 These parameters control the runtime behavior of Psi4. 907 908 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 909 is appended to output file name during multiprocessing as shown: 910 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 911 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 912 all Psi4 output. 913 914 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 915 of any existing Psi4 before creating new files to append output from 916 multiple Psi4 runs. 917 918 The option 'ScratchDir' is a directory path to the location of scratch 919 files. The default value corresponds to Psi4 default. It may be used to 920 override the deafult path. 921 -q, --quiet <yes or no> [default: no] 922 Use quiet mode. The warning and information messages will not be printed. 923 -r, --reference <text> [default: auto] 924 Reference wave function to use for energy calculation. Default: RHF or UHF. 925 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules 926 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell 927 molecules with unpaired electrons. 928 929 The specified value must be a valid Psi4 reference wave function. No validation 930 is performed. For example: ROHF, CUHF, RKS, etc. 931 932 The spin multiplicity determines the default value of reference wave function 933 for input molecules. It is calculated from number of free radical electrons using 934 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 935 electron spin. The total spin is 1/2 the number of free radical electrons in a 936 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 937 over the calculated value of spin multiplicity. 938 -w, --workingdir <dir> 939 Location of working directory which defaults to the current directory. 940 941 Examples: 942 To calculate dipole based on a single point energy calculation using B3LYP/6-31G** 943 and B3LYP/6-31+G** for non-sulfur and sulfur containing molecules in a SD file 944 with 3D structures, use RHF and UHF for closed-shell and open-shell molecules, 945 and write a new SD file, type: 946 947 % Psi4CalculateProperties.py -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 948 949 To run the first example for calculating all available properties and write out a 950 SD file, type: 951 952 % Psi4CalculateProperties.py -p All -i Psi4Sample3D.sdf 953 -o Psi4Sample3DOut.sdf 954 955 To run the first example in multiprocessing mode on all available CPUs 956 without loading all data into memory and write out a SD file, type: 957 958 % Psi4CalculateProperties.py --mp yes -i Psi4Sample3D.sdf 959 -o Psi4Sample3DOut.sdf 960 961 To run the first example in multiprocessing mode on all available CPUs 962 by loading all data into memory and write out a SD file, type: 963 964 % Psi4CalculateProperties.py --mp yes --mpParams "inputDataMode, 965 InMemory" -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 966 967 To run the first example in multiprocessing mode on all available CPUs 968 without loading all data into memory along with multiple threads for each 969 Psi4 run and write out a SD file, type: 970 971 % Psi4CalculateProperties.py --mp yes --psi4RunParams "NumThreads,2" 972 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 973 974 To calculate a specific set of properties based on a single point energy using 975 a specific method and basis set for molecules in a SD containing 3D structures 976 and write a new SD file, type: 977 978 % Psi4CalculateProperties.py -p "Dipole,LumoHomoEnergyGap" -m SCF 979 -b aug-cc-pVDZ -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 980 981 Author: 982 Manish Sud(msud@san.rr.com) 983 984 See also: 985 Psi4CalculatePartialCharges.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py 986 987 Copyright: 988 Copyright (C) 2024 Manish Sud. All rights reserved. 989 990 The functionality available in this script is implemented using Psi4, an 991 open source quantum chemistry software package, and RDKit, an open 992 source toolkit for cheminformatics developed by Greg Landrum. 993 994 This file is part of MayaChemTools. 995 996 MayaChemTools is free software; you can redistribute it and/or modify it under 997 the terms of the GNU Lesser General Public License as published by the Free 998 Software Foundation; either version 3 of the License, or (at your option) any 999 later version. 1000 1001 """ 1002 1003 if __name__ == "__main__": 1004 main()