1 #!/bin/env python 2 # 3 # File: Psi4CalculateEnergy.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 import shutil 38 import multiprocessing as mp 39 40 # Psi4 imports... 41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 42 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 43 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 44 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 45 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 46 sys.stderr.write("Psi4 environment and try again.\n\n") 47 48 # RDKit imports... 49 try: 50 from rdkit import rdBase 51 from rdkit import Chem 52 from rdkit.Chem import AllChem 53 except ImportError as ErrMsg: 54 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 55 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 56 sys.exit(1) 57 58 # MayaChemTools imports... 59 try: 60 from docopt import docopt 61 import MiscUtil 62 import Psi4Util 63 import RDKitUtil 64 except ImportError as ErrMsg: 65 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 66 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 67 sys.exit(1) 68 69 ScriptName = os.path.basename(sys.argv[0]) 70 Options = {} 71 OptionsInfo = {} 72 73 def main(): 74 """Start execution of the script.""" 75 76 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 77 78 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 79 80 # Retrieve command line arguments and options... 81 RetrieveOptions() 82 83 # Process and validate command line arguments and options... 84 ProcessOptions() 85 86 # Perform actions required by the script... 87 CalculateEnergy() 88 89 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 90 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 91 92 def CalculateEnergy(): 93 """Calculate single point energy.""" 94 95 # Setup a molecule reader... 96 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 97 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 98 99 # Set up a molecule writer... 100 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 101 if Writer is None: 102 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 103 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 104 105 MolCount, ValidMolCount, EnergyFailedCount = ProcessMolecules(Mols, Writer) 106 107 if Writer is not None: 108 Writer.close() 109 110 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 111 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 112 MiscUtil.PrintInfo("Number of molecules failed during energy calculation: %d" % EnergyFailedCount) 113 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + EnergyFailedCount)) 114 115 def ProcessMolecules(Mols, Writer): 116 """Process and calculate energy of molecules.""" 117 118 if OptionsInfo["MPMode"]: 119 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 120 else: 121 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 122 123 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 124 """Process and calculate energy of molecules using a single process.""" 125 126 # Intialize Psi4... 127 MiscUtil.PrintInfo("\nInitializing Psi4...") 128 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 129 if OptionsInfo["Psi4DDXSolvationMode"]: 130 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"]) 131 OptionsInfo["psi4"] = Psi4Handle 132 133 # Setup conversion factor for energy units... 134 SetupEnergyConversionFactor(Psi4Handle) 135 136 MiscUtil.PrintInfo("\nCalculating energy...") 137 138 (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3 139 for Mol in Mols: 140 MolCount += 1 141 142 if not CheckAndValidateMolecule(Mol, MolCount): 143 continue 144 145 # Setup a Psi4 molecule... 146 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount) 147 if Psi4Mol is None: 148 continue 149 150 ValidMolCount += 1 151 152 CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolCount) 153 154 if not CalcStatus: 155 if not OptionsInfo["QuietMode"]: 156 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % RDKitUtil.GetMolName(Mol, MolCount)) 157 158 EnergyFailedCount += 1 159 continue 160 161 WriteMolecule(Writer, Mol, Energy, SolvationEnergy) 162 163 return (MolCount, ValidMolCount, EnergyFailedCount) 164 165 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 166 """Process and calculate energy of molecules using process.""" 167 168 MiscUtil.PrintInfo("\nCalculating energy using multiprocessing...") 169 170 MPParams = OptionsInfo["MPParams"] 171 172 # Setup data for initializing a worker process... 173 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 174 175 # Setup a encoded mols data iterable for a worker process... 176 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 177 178 # Setup process pool along with data initialization for each process... 179 if not OptionsInfo["QuietMode"]: 180 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 181 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 182 183 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 184 185 # Start processing... 186 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 187 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 188 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 189 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 190 else: 191 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 192 193 # Print out Psi4 version in the main process... 194 MiscUtil.PrintInfo("\nInitializing Psi4...\n") 195 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False) 196 OptionsInfo["psi4"] = Psi4Handle 197 198 (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3 199 for Result in Results: 200 MolCount += 1 201 MolIndex, EncodedMol, CalcStatus, Energy, SolvationEnergy = Result 202 203 if EncodedMol is None: 204 continue 205 206 ValidMolCount += 1 207 208 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 209 210 if not CalcStatus: 211 if not OptionsInfo["QuietMode"]: 212 MolName = RDKitUtil.GetMolName(Mol, MolCount) 213 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) 214 215 EnergyFailedCount += 1 216 continue 217 218 WriteMolecule(Writer, Mol, Energy, SolvationEnergy) 219 220 return (MolCount, ValidMolCount, EnergyFailedCount) 221 222 def InitializeWorkerProcess(*EncodedArgs): 223 """Initialize data for a worker process.""" 224 225 global Options, OptionsInfo 226 227 if not OptionsInfo["QuietMode"]: 228 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 229 230 # Decode Options and OptionInfo... 231 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 232 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 233 234 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 235 # output files for each process... 236 OptionsInfo["Psi4Initialized"] = False 237 238 def InitializePsi4ForWorkerProcess(): 239 """Initialize Psi4 for a worker process.""" 240 241 if OptionsInfo["Psi4Initialized"]: 242 return 243 244 OptionsInfo["Psi4Initialized"] = True 245 246 # Update output file... 247 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 248 249 # Intialize Psi4... 250 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 251 if OptionsInfo["Psi4DDXSolvationMode"]: 252 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"]) 253 254 # Setup conversion factor for energy units... 255 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 256 257 def WorkerProcess(EncodedMolInfo): 258 """Process data for a worker process.""" 259 260 if not OptionsInfo["Psi4Initialized"]: 261 InitializePsi4ForWorkerProcess() 262 263 MolIndex, EncodedMol = EncodedMolInfo 264 265 CalcStatus = False 266 Energy = None 267 268 if EncodedMol is None: 269 return [MolIndex, None, CalcStatus, Energy] 270 271 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 272 MolCount = MolIndex + 1 273 274 if not CheckAndValidateMolecule(Mol, MolCount): 275 return [MolIndex, None, CalcStatus, Energy] 276 277 # Setup a Psi4 molecule... 278 Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount) 279 if Psi4Mol is None: 280 return [MolIndex, None, CalcStatus, Energy] 281 282 CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount) 283 284 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, Energy, SolvationEnergy] 285 286 def CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolNum = None): 287 """Calculate energy.""" 288 289 Status = False 290 Energy = None 291 SolvationEnergy = None 292 293 # Setup reference wave function... 294 Reference = SetupReferenceWavefunction(Mol) 295 Psi4Handle.set_options({'Reference': Reference}) 296 297 # Setup method name and basis set... 298 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 299 300 if OptionsInfo["Psi4DDXSolvationMode"]: 301 Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 302 SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction) 303 else: 304 Status, Energy = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, Quiet = OptionsInfo["QuietMode"]) 305 306 # Convert energy units... 307 if Status: 308 if OptionsInfo["ApplyEnergyConversionFactor"]: 309 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 310 if SolvationEnergy is not None: 311 SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"] 312 313 # Clean up 314 PerformPsi4Cleanup(Psi4Handle) 315 316 return (Status, Energy, SolvationEnergy) 317 318 def RetrievePsi4DDXSolvationEnergy(WaveFunction): 319 """Retrieve DDX solvation energy.""" 320 321 SolvationEnergy = None 322 try: 323 SolvationEnergy = WaveFunction.variable('DD Solvation Energy') 324 except Exception as ErrMsg: 325 SolvationEnergy = None 326 if not OptionsInfo["QuietMode"]: 327 MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg) 328 329 return SolvationEnergy 330 331 def SetupPsi4Mol(Psi4Handle, Mol, MolCount = None): 332 """Setup a Psi4 molecule object.""" 333 334 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom = True, NoReorient = True) 335 336 try: 337 Psi4Mol = Psi4Handle.geometry(MolGeometry) 338 except Exception as ErrMsg: 339 Psi4Mol = None 340 if not OptionsInfo["QuietMode"]: 341 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 342 MolName = RDKitUtil.GetMolName(Mol, MolCount) 343 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 344 345 return Psi4Mol 346 347 def PerformPsi4Cleanup(Psi4Handle): 348 """Perform clean up.""" 349 350 # Clean up after Psi4 run... 351 Psi4Handle.core.clean() 352 353 # Clean up any leftover scratch files... 354 if OptionsInfo["MPMode"]: 355 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 356 357 def CheckAndValidateMolecule(Mol, MolCount = None): 358 """Validate molecule for Psi4 calculations.""" 359 360 if Mol is None: 361 if not OptionsInfo["QuietMode"]: 362 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 363 return False 364 365 MolName = RDKitUtil.GetMolName(Mol, MolCount) 366 if not OptionsInfo["QuietMode"]: 367 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 368 369 if RDKitUtil.IsMolEmpty(Mol): 370 if not OptionsInfo["QuietMode"]: 371 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 372 return False 373 374 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 375 if not OptionsInfo["QuietMode"]: 376 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 377 return False 378 379 # Check for 3D flag... 380 if not Mol.GetConformer().Is3D(): 381 if not OptionsInfo["QuietMode"]: 382 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName) 383 384 # Check for missing hydrogens... 385 if RDKitUtil.AreHydrogensMissingInMolecule(Mol): 386 if not OptionsInfo["QuietMode"]: 387 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName) 388 389 return True 390 391 def SetupMethodNameAndBasisSet(Mol): 392 """Setup method name and basis set.""" 393 394 MethodName = OptionsInfo["MethodName"] 395 if OptionsInfo["MethodNameAuto"]: 396 MethodName = "B3LYP" 397 398 BasisSet = OptionsInfo["BasisSet"] 399 if OptionsInfo["BasisSetAuto"]: 400 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 401 402 return (MethodName, BasisSet) 403 404 def SetupReferenceWavefunction(Mol): 405 """Setup reference wavefunction.""" 406 407 Reference = OptionsInfo["Reference"] 408 if OptionsInfo["ReferenceAuto"]: 409 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 410 411 return Reference 412 413 def SetupEnergyConversionFactor(Psi4Handle): 414 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 415 416 EnergyUnits = OptionsInfo["EnergyUnits"] 417 418 ApplyConversionFactor = True 419 if re.match("^kcal\/mol$", EnergyUnits, re.I): 420 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 421 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 422 ConversionFactor = Psi4Handle.constants.hartree2kJmol 423 elif re.match("^eV$", EnergyUnits, re.I): 424 ConversionFactor = Psi4Handle.constants.hartree2ev 425 else: 426 ApplyConversionFactor = False 427 ConversionFactor = 1.0 428 429 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 430 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 431 432 def WriteMolecule(Writer, Mol, Energy, SolvationEnergy): 433 """Write molecule.""" 434 435 Energy = "%.*f" % (OptionsInfo["Precision"], Energy) 436 Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy) 437 438 if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None: 439 SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy) 440 Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy) 441 442 Writer.write(Mol) 443 444 def CheckAndSetupDDX(): 445 """Load pyddx and return its handle.""" 446 447 try: 448 import pyddx 449 except ImportError as ErrMsg: 450 sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg) 451 sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n") 452 sys.stderr.write("You may need to install it from pypi or conda-forge.\n") 453 sys.stderr.write("Check/update your environment and try again.\n\n") 454 sys.exit(1) 455 456 return pyddx 457 458 def ProcessDDXListSolventsOption(): 459 """List solvent information provided by tge DDX module for the calculation 460 of solvation energy.""" 461 462 PyDDXHandle = CheckAndSetupDDX() 463 464 MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n") 465 MiscUtil.PrintInfo("Solvent\tEpsilon") 466 Count = 0 467 for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()): 468 Count += 1 469 Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent] 470 MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon)) 471 472 MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count) 473 474 def ProcessOptions(): 475 """Process and validate command line arguments and options.""" 476 477 MiscUtil.PrintInfo("Processing options...") 478 479 # Validate options... 480 ValidateOptions() 481 482 OptionsInfo["Infile"] = Options["--infile"] 483 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 484 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 485 486 OptionsInfo["Outfile"] = Options["--outfile"] 487 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 488 489 OptionsInfo["Overwrite"] = Options["--overwrite"] 490 491 # Method, basis set, and reference wavefunction... 492 OptionsInfo["BasisSet"] = Options["--basisSet"] 493 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 494 495 OptionsInfo["MethodName"] = Options["--methodName"] 496 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 497 498 OptionsInfo["Reference"] = Options["--reference"] 499 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 500 501 # Run and options parameters... 502 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 503 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 504 505 # DDX solvation parameters... 506 OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False 507 Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters("--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"]) 508 Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams) 509 OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams 510 OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions 511 512 if OptionsInfo["Psi4DDXSolvationMode"]: 513 CheckAndSetupDDX() 514 515 # Energy units and label... 516 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 517 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 518 if re.match("^auto$", EnergyDataFieldLabel, re.I): 519 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 520 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 521 522 SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"] 523 OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel 524 525 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 526 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 527 528 OptionsInfo["Precision"] = int(Options["--precision"]) 529 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 530 531 def RetrieveOptions(): 532 """Retrieve command line arguments and options.""" 533 534 # Get options... 535 global Options 536 Options = docopt(_docoptUsage_) 537 538 # Set current working directory to the specified directory... 539 WorkingDir = Options["--workingdir"] 540 if WorkingDir: 541 os.chdir(WorkingDir) 542 543 # Handle examples option... 544 if "--examples" in Options and Options["--examples"]: 545 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 546 sys.exit(0) 547 548 # Handle listing of solvent information... 549 if Options and Options["--psi4DDXListSolvents"]: 550 ProcessDDXListSolventsOption() 551 sys.exit(0) 552 553 def ValidateOptions(): 554 """Validate option values.""" 555 556 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 557 558 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 559 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol") 560 561 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 562 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 563 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 564 565 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 566 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 567 568 MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no") 569 570 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 571 572 # Setup a usage string for docopt... 573 _docoptUsage_ = """ 574 Psi4CalculateEnergy.py - Calculate single point energy 575 576 Usage: 577 Psi4CalculateEnergy.py [--basisSet <text>] [--energyDataFieldLabel <text>] [--energyUnits <text>] 578 [--infileParams <Name,Value,...>] [--methodName <text>] [--mp <yes or no>] 579 [--mpParams <Name, Value,...>] [ --outfileParams <Name,Value,...> ] [--overwrite] 580 [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 581 [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>] 582 [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile> 583 Psi4CalculateEnergy.py --psi4DDXListSolvents 584 Psi4CalculateEnergy.py -h | --help | -e | --examples 585 586 Description: 587 Calculate single point energy for molecules using a specified method name and 588 basis set. The molecules must have 3D coordinates in input file. The molecular 589 geometry is not optimized before the calculation. In addition, the hydrogens 590 must be present for all molecules in input file. The 3D coordinates are not 591 modified during the calculation. 592 593 A Psi4 XYZ format geometry string is automatically generated for each molecule 594 in input file. It contains atom symbols and 3D coordinates for each atom in a 595 molecule. In addition, the formal charge and spin multiplicity are present in the 596 the geometry string. These values are either retrieved from molecule properties 597 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 598 molecule. 599 600 The supported input file formats are: Mol (.mol), SD (.sdf, .sd) 601 602 The supported output file formats are: SD (.sdf, .sd) 603 604 Options: 605 -b, --basisSet <text> [default: auto] 606 Basis set to use for energy calculation. Default: 6-31+G** for sulfur 607 containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 608 value must be a valid Psi4 basis set. No validation is performed. 609 610 The following list shows a representative sample of basis sets available 611 in Psi4: 612 613 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 614 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 615 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 616 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 617 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 618 619 --energyDataFieldLabel <text> [default: auto] 620 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 621 --energyUnits <text> [default: kcal/mol] 622 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 623 -e, --examples 624 Print examples. 625 -h, --help 626 Print this help message. 627 -i, --infile <infile> 628 Input file name. 629 --infileParams <Name,Value,...> [default: auto] 630 A comma delimited list of parameter name and value pairs for reading 631 molecules from files. The supported parameter names for different file 632 formats, along with their default values, are shown below: 633 634 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 635 636 -m, --methodName <text> [default: auto] 637 Method to use for energy calculation. Default: B3LYP [ Ref 150 ]. The 638 specified value must be a valid Psi4 method name. No validation is 639 performed. 640 641 The following list shows a representative sample of methods available 642 in Psi4: 643 644 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 645 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 646 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 647 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 648 649 --mp <yes or no> [default: no] 650 Use multiprocessing. 651 652 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 653 function employing lazy RDKit data iterable. This allows processing of 654 arbitrary large data sets without any additional requirements memory. 655 656 All input data may be optionally loaded into memory by mp.Pool.map() 657 before starting worker processes in a process pool by setting the value 658 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 659 660 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 661 data mode may adversely impact the performance. The '--mpParams' section 662 provides additional information to tune the value of 'chunkSize'. 663 --mpParams <Name,Value,...> [default: auto] 664 A comma delimited list of parameter name and value pairs to configure 665 multiprocessing. 666 667 The supported parameter names along with their default and possible 668 values are shown below: 669 670 chunkSize, auto 671 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 672 numProcesses, auto [ Default: mp.cpu_count() ] 673 674 These parameters are used by the following functions to configure and 675 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 676 mp.Pool.imap(). 677 678 The chunkSize determines chunks of input data passed to each worker 679 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 680 The default value of chunkSize is dependent on the value of 'inputDataMode'. 681 682 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 683 automatically converts RDKit data iterable into a list, loads all data into 684 memory, and calculates the default chunkSize using the following method 685 as shown in its code: 686 687 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 688 if extra: chunkSize += 1 689 690 For example, the default chunkSize will be 7 for a pool of 4 worker processes 691 and 100 data items. 692 693 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 694 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 695 data into memory. Consequently, the size of input data is not known a priori. 696 It's not possible to estimate an optimal value for the chunkSize. The default 697 chunkSize is set to 1. 698 699 The default value for the chunkSize during 'Lazy' data mode may adversely 700 impact the performance due to the overhead associated with exchanging 701 small chunks of data. It is generally a good idea to explicitly set chunkSize to 702 a larger value during 'Lazy' input data mode, based on the size of your input 703 data and number of processes in the process pool. 704 705 The mp.Pool.map() function waits for all worker processes to process all 706 the data and return the results. The mp.Pool.imap() function, however, 707 returns the the results obtained from worker processes as soon as the 708 results become available for specified chunks of data. 709 710 The order of data in the results returned by both mp.Pool.map() and 711 mp.Pool.imap() functions always corresponds to the input data. 712 -o, --outfile <outfile> 713 Output file name. 714 --outfileParams <Name,Value,...> [default: auto] 715 A comma delimited list of parameter name and value pairs for writing 716 molecules to files. The supported parameter names for different file 717 formats, along with their default values, are shown below: 718 719 SD: kekulize,yes,forceV3000,no 720 721 --overwrite 722 Overwrite existing files. 723 --precision <number> [default: 6] 724 Floating point precision for writing energy values. 725 --psi4OptionsParams <Name,Value,...> [default: none] 726 A comma delimited list of Psi4 option name and value pairs for setting 727 global and module options. The names are 'option_name' for global options 728 and 'module_name__option_name' for options local to a module. The 729 specified option names must be valid Psi4 names. No validation is 730 performed. 731 732 The specified option name and value pairs are processed and passed to 733 psi4.set_options() as a dictionary. The supported value types are float, 734 integer, boolean, or string. The float value string is converted into a float. 735 The valid values for a boolean string are yes, no, true, false, on, or off. 736 --psi4RunParams <Name,Value,...> [default: auto] 737 A comma delimited list of parameter name and value pairs for configuring 738 Psi4 jobs. 739 740 The supported parameter names along with their default and possible 741 values are shown below: 742 743 MemoryInGB, 1 744 NumThreads, 1 745 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 746 ScratchDir, auto [ Possivle values: DirName] 747 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 748 749 These parameters control the runtime behavior of Psi4. 750 751 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 752 is appended to output file name during multiprocessing as shown: 753 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 754 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 755 all Psi4 output. 756 757 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 758 of any existing Psi4 before creating new files to append output from 759 multiple Psi4 runs. 760 761 The option 'ScratchDir' is a directory path to the location of scratch 762 files. The default value corresponds to Psi4 default. It may be used to 763 override the deafult path. 764 --psi4DDXSolvation <yes or no> [default: no] 765 Perform energy calculation in solution using domain-decomposition-based 766 continuum solvation models [ Ref 160-161 ] The script relies on Psi4 767 interface to the DDX module to perform these calculations. The DDX library 768 provides a linear-scaling implementation of standard continuum solvation 769 models using a domain- decomposition ansatz. Two solvation models are 770 supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and 771 Polarizable Continuum Model (PCM) [ Ref 164-165 ]. 772 773 The solvation energy is included in the value of the total energy written to 774 the output SD file. In addition, the value of solvation energy is written to the 775 output file under its own data field label. 776 777 Psi4 relies on Python module PYDDX to calculate solvation energy. It must be 778 present in your environment. 779 --psi4DDXSolvationParams <Name,Value,...> [default: auto] 780 A space delimited list of parameter name and value pairs for calculating 781 solvation energy using the DDX module. The supported parameter names, 782 along with their default values, are shown below: 783 784 solvationModel PCM [ Possible values: COSMO or PCM] 785 solvent water [ Possible values: A valid solvent name] 786 solventEpsilon None 787 radiiSet UFF [ Possible values: Bondi or UFF] 788 radiiScaling auto [ Default: 1.2 for Bondi; 1.1 for UFF] 789 790 solvationModel: Solvation model for calculating solvation energy. 791 The corresponding Psi4 option is DDX_MODEL. 792 793 solvent: Solvent to use. The corresponding Ps4 option is 794 DDX_SOLVENT 795 796 solventEpsilon: Dielectric constant of the solvent. The 797 corresponding Psi4 option is DDX_SOLVENT_EPSILON. 798 799 radiiSet: Radius set for cavity spheres. The corresponding Psi4 800 option is DDX_RADII_SET. 801 802 radiiScaling: Scaling factor for cavity spheres. The default value 803 depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding 804 Psi4 option is DDX_RADII_SCALING. 805 806 These parameter names are automatically mapped to appropriate DDX keywords. 807 808 You may specify the solvent either by directly providing a dielectric constant using 809 'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter. 810 The solvent name must be a valid named supported by the DDX module. For example: 811 water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent' 812 parameter is ignored for non-zero value of 'solvent' option. 813 814 The DDX module contains a variety of addtional options to confgure the calculation 815 of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may 816 use '--psi4OptionsParams' to modify values for additional DDX options. 817 --psi4DDXListSolvents 818 List solvent names, along with dielectric values, supported by the DDX module 819 for the calculation of solvent energy without performing any calculation. 820 -q, --quiet <yes or no> [default: no] 821 Use quiet mode. The warning and information messages will not be printed. 822 -r, --reference <text> [default: auto] 823 Reference wave function to use for energy calculation. Default: RHF or UHF. 824 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules 825 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell 826 molecules with unpaired electrons. 827 828 The specified value must be a valid Psi4 reference wave function. No validation 829 is performed. For example: ROHF, CUHF, RKS, etc. 830 831 The spin multiplicity determines the default value of reference wave function 832 for input molecules. It is calculated from number of free radical electrons using 833 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 834 electron spin. The total spin is 1/2 the number of free radical electrons in a 835 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 836 over the calculated value of spin multiplicity. 837 -w, --workingdir <dir> 838 Location of working directory which defaults to the current directory. 839 840 Examples: 841 To calculate single point energy using B3LYP/6-31G** and B3LYP/6-31+G** for 842 non-sulfur and sulfur containing molecules in a SD file with 3D structures, use 843 RHF and UHF for closed-shell and open-shell molecules, and write a new SD file, 844 type: 845 846 % Psi4CalculateEnergy.py -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 847 848 To run the first example in multiprocessing mode on all available CPUs 849 without loading all data into memory and write out a SD file, type: 850 851 % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf 852 -o Psi4Sample3DOut.sdf 853 854 To run the first example in multiprocessing mode on all available CPUs 855 by loading all data into memory and write out a SD file, type: 856 857 % Psi4CalculateEnergy.py --mp yes --mpParams "inputDataMode, 858 InMemory" -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 859 860 To run the first example in multiprocessing mode on all available CPUs 861 without loading all data into memory along with multiple threads for each 862 Psi4 run and write out a SD file, type: 863 864 % Psi4CalculateEnergy.py --mp yes --psi4RunParams "NumThreads,2" 865 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 866 867 To run the first example along with calculating DDX solvation energy using 868 default solvent parameters for water and write out a SD file, type: 869 870 % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf 871 -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes 872 873 To run the first example along with calculating DDX solvation energy using 874 specific solvartion parameters and write out a SD file, type: 875 876 % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf 877 -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes --psi4DDXSolvationParams 878 "solvationModel COSMO solvent water radiiSet UFF" 879 880 To calculate single point energy using a specific method and basis set for 881 molecules in a SD containing 3D structures and write a new SD file, type: 882 883 % Psi4CalculateEnergy.py -m SCF -b aug-cc-pVDZ -i Psi4Sample3D.sdf 884 -o Psi4Sample3DOut.sdf 885 886 Author: 887 Manish Sud(msud@san.rr.com) 888 889 See also: 890 Psi4CalculatePartialCharges.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py 891 892 Copyright: 893 Copyright (C) 2024 Manish Sud. All rights reserved. 894 895 The functionality available in this script is implemented using Psi4, an 896 open source quantum chemistry software package, and RDKit, an open 897 source toolkit for cheminformatics developed by Greg Landrum. 898 899 This file is part of MayaChemTools. 900 901 MayaChemTools is free software; you can redistribute it and/or modify it under 902 the terms of the GNU Lesser General Public License as published by the Free 903 Software Foundation; either version 3 of the License, or (at your option) any 904 later version. 905 906 """ 907 908 if __name__ == "__main__": 909 main()