1 #!/bin/env python 2 # 3 # File: Psi4CalculateEnergy.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2026 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 import os 33 import sys 34 import time 35 import re 36 import shutil 37 import multiprocessing as mp 38 39 # Psi4 imports... 40 if hasattr(shutil, "which") and shutil.which("psi4") is None: 41 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 42 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 43 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 44 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 45 sys.stderr.write("Psi4 environment and try again.\n\n") 46 47 # RDKit imports... 48 try: 49 from rdkit import rdBase 50 from rdkit import Chem 51 except ImportError as ErrMsg: 52 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 53 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 54 sys.exit(1) 55 56 # MayaChemTools imports... 57 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 58 try: 59 from docopt import docopt 60 import MiscUtil 61 import Psi4Util 62 import RDKitUtil 63 except ImportError as ErrMsg: 64 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 65 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 66 sys.exit(1) 67 68 ScriptName = os.path.basename(sys.argv[0]) 69 Options = {} 70 OptionsInfo = {} 71 72 73 def main(): 74 """Start execution of the script.""" 75 76 MiscUtil.PrintInfo( 77 "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" 78 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()) 79 ) 80 81 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 82 83 # Retrieve command line arguments and options... 84 RetrieveOptions() 85 86 # Process and validate command line arguments and options... 87 ProcessOptions() 88 89 # Perform actions required by the script... 90 CalculateEnergy() 91 92 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 93 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 94 95 96 def CalculateEnergy(): 97 """Calculate single point energy.""" 98 99 # Setup a molecule reader... 100 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 101 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 102 103 # Set up a molecule writer... 104 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 105 if Writer is None: 106 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 107 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 108 109 MolCount, ValidMolCount, EnergyFailedCount = ProcessMolecules(Mols, Writer) 110 111 if Writer is not None: 112 Writer.close() 113 114 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 115 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 116 MiscUtil.PrintInfo("Number of molecules failed during energy calculation: %d" % EnergyFailedCount) 117 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + EnergyFailedCount)) 118 119 120 def ProcessMolecules(Mols, Writer): 121 """Process and calculate energy of molecules.""" 122 123 if OptionsInfo["MPMode"]: 124 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 125 else: 126 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 127 128 129 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 130 """Process and calculate energy of molecules using a single process.""" 131 132 # Intialize Psi4... 133 MiscUtil.PrintInfo("\nInitializing Psi4...") 134 Psi4Handle = Psi4Util.InitializePsi4( 135 Psi4RunParams=OptionsInfo["Psi4RunParams"], 136 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"], 137 PrintVersion=True, 138 PrintHeader=True, 139 ) 140 if OptionsInfo["Psi4DDXSolvationMode"]: 141 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"]) 142 OptionsInfo["psi4"] = Psi4Handle 143 144 # Setup conversion factor for energy units... 145 SetupEnergyConversionFactor(Psi4Handle) 146 147 MiscUtil.PrintInfo("\nCalculating energy...") 148 149 (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3 150 for Mol in Mols: 151 MolCount += 1 152 153 if not CheckAndValidateMolecule(Mol, MolCount): 154 continue 155 156 # Setup a Psi4 molecule... 157 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount) 158 if Psi4Mol is None: 159 continue 160 161 ValidMolCount += 1 162 163 CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolCount) 164 165 if not CalcStatus: 166 if not OptionsInfo["QuietMode"]: 167 MiscUtil.PrintWarning( 168 "Failed to calculate energy for molecule %s" % RDKitUtil.GetMolName(Mol, MolCount) 169 ) 170 171 EnergyFailedCount += 1 172 continue 173 174 WriteMolecule(Writer, Mol, Energy, SolvationEnergy) 175 176 return (MolCount, ValidMolCount, EnergyFailedCount) 177 178 179 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 180 """Process and calculate energy of molecules using process.""" 181 182 MiscUtil.PrintInfo("\nCalculating energy using multiprocessing...") 183 184 MPParams = OptionsInfo["MPParams"] 185 186 # Setup data for initializing a worker process... 187 InitializeWorkerProcessArgs = ( 188 MiscUtil.ObjectToBase64EncodedString(Options), 189 MiscUtil.ObjectToBase64EncodedString(OptionsInfo), 190 ) 191 192 # Setup a encoded mols data iterable for a worker process... 193 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 194 195 # Setup process pool along with data initialization for each process... 196 if not OptionsInfo["QuietMode"]: 197 MiscUtil.PrintInfo( 198 "\nConfiguring multiprocessing using %s method..." 199 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()") 200 ) 201 MiscUtil.PrintInfo( 202 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" 203 % ( 204 MPParams["NumProcesses"], 205 MPParams["InputDataMode"], 206 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]), 207 ) 208 ) 209 210 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 211 212 # Start processing... 213 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 214 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 215 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 216 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 217 else: 218 MiscUtil.PrintError( 219 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"]) 220 ) 221 222 # Print out Psi4 version in the main process... 223 MiscUtil.PrintInfo("\nInitializing Psi4...\n") 224 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False) 225 OptionsInfo["psi4"] = Psi4Handle 226 227 (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3 228 for Result in Results: 229 MolCount += 1 230 MolIndex, EncodedMol, CalcStatus, Energy, SolvationEnergy = Result 231 232 if EncodedMol is None: 233 continue 234 235 ValidMolCount += 1 236 237 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 238 239 if not CalcStatus: 240 if not OptionsInfo["QuietMode"]: 241 MolName = RDKitUtil.GetMolName(Mol, MolCount) 242 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) 243 244 EnergyFailedCount += 1 245 continue 246 247 WriteMolecule(Writer, Mol, Energy, SolvationEnergy) 248 249 return (MolCount, ValidMolCount, EnergyFailedCount) 250 251 252 def InitializeWorkerProcess(*EncodedArgs): 253 """Initialize data for a worker process.""" 254 255 global Options, OptionsInfo 256 257 if not OptionsInfo["QuietMode"]: 258 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 259 260 # Decode Options and OptionInfo... 261 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 262 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 263 264 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 265 # output files for each process... 266 OptionsInfo["Psi4Initialized"] = False 267 268 269 def InitializePsi4ForWorkerProcess(): 270 """Initialize Psi4 for a worker process.""" 271 272 if OptionsInfo["Psi4Initialized"]: 273 return 274 275 OptionsInfo["Psi4Initialized"] = True 276 277 # Update output file... 278 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID( 279 OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid() 280 ) 281 282 # Intialize Psi4... 283 OptionsInfo["psi4"] = Psi4Util.InitializePsi4( 284 Psi4RunParams=OptionsInfo["Psi4RunParams"], 285 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"], 286 PrintVersion=False, 287 PrintHeader=True, 288 ) 289 if OptionsInfo["Psi4DDXSolvationMode"]: 290 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"]) 291 292 # Setup conversion factor for energy units... 293 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 294 295 296 def WorkerProcess(EncodedMolInfo): 297 """Process data for a worker process.""" 298 299 if not OptionsInfo["Psi4Initialized"]: 300 InitializePsi4ForWorkerProcess() 301 302 MolIndex, EncodedMol = EncodedMolInfo 303 304 CalcStatus = False 305 Energy = None 306 307 if EncodedMol is None: 308 return [MolIndex, None, CalcStatus, Energy] 309 310 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 311 MolCount = MolIndex + 1 312 313 if not CheckAndValidateMolecule(Mol, MolCount): 314 return [MolIndex, None, CalcStatus, Energy] 315 316 # Setup a Psi4 molecule... 317 Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount) 318 if Psi4Mol is None: 319 return [MolIndex, None, CalcStatus, Energy] 320 321 CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount) 322 323 return [ 324 MolIndex, 325 RDKitUtil.MolToBase64EncodedMolString( 326 Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps 327 ), 328 CalcStatus, 329 Energy, 330 SolvationEnergy, 331 ] 332 333 334 def CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolNum=None): 335 """Calculate energy.""" 336 337 Status = False 338 Energy = None 339 SolvationEnergy = None 340 341 # Setup reference wave function... 342 Reference = SetupReferenceWavefunction(Mol) 343 Psi4Handle.set_options({"Reference": Reference}) 344 345 # Setup method name and basis set... 346 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 347 348 if OptionsInfo["Psi4DDXSolvationMode"]: 349 Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy( 350 Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"] 351 ) 352 SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction) 353 else: 354 Status, Energy = Psi4Util.CalculateSinglePointEnergy( 355 Psi4Handle, Psi4Mol, MethodName, BasisSet, Quiet=OptionsInfo["QuietMode"] 356 ) 357 358 # Convert energy units... 359 if Status: 360 if OptionsInfo["ApplyEnergyConversionFactor"]: 361 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 362 if SolvationEnergy is not None: 363 SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"] 364 365 # Clean up 366 PerformPsi4Cleanup(Psi4Handle) 367 368 return (Status, Energy, SolvationEnergy) 369 370 371 def RetrievePsi4DDXSolvationEnergy(WaveFunction): 372 """Retrieve DDX solvation energy.""" 373 374 SolvationEnergy = None 375 try: 376 SolvationEnergy = WaveFunction.variable("DD Solvation Energy") 377 except Exception as ErrMsg: 378 SolvationEnergy = None 379 if not OptionsInfo["QuietMode"]: 380 MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg) 381 382 return SolvationEnergy 383 384 385 def SetupPsi4Mol(Psi4Handle, Mol, MolCount=None): 386 """Setup a Psi4 molecule object.""" 387 388 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom=True, NoReorient=True) 389 390 try: 391 Psi4Mol = Psi4Handle.geometry(MolGeometry) 392 except Exception as ErrMsg: 393 Psi4Mol = None 394 if not OptionsInfo["QuietMode"]: 395 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 396 MolName = RDKitUtil.GetMolName(Mol, MolCount) 397 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 398 399 return Psi4Mol 400 401 402 def PerformPsi4Cleanup(Psi4Handle): 403 """Perform clean up.""" 404 405 # Clean up after Psi4 run... 406 Psi4Handle.core.clean() 407 408 # Clean up any leftover scratch files... 409 if OptionsInfo["MPMode"]: 410 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 411 412 413 def CheckAndValidateMolecule(Mol, MolCount=None): 414 """Validate molecule for Psi4 calculations.""" 415 416 if Mol is None: 417 if not OptionsInfo["QuietMode"]: 418 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 419 return False 420 421 MolName = RDKitUtil.GetMolName(Mol, MolCount) 422 if not OptionsInfo["QuietMode"]: 423 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 424 425 if RDKitUtil.IsMolEmpty(Mol): 426 if not OptionsInfo["QuietMode"]: 427 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 428 return False 429 430 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 431 if not OptionsInfo["QuietMode"]: 432 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 433 return False 434 435 # Check for 3D flag... 436 if not Mol.GetConformer().Is3D(): 437 if not OptionsInfo["QuietMode"]: 438 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName) 439 440 # Check for missing hydrogens... 441 if RDKitUtil.AreHydrogensMissingInMolecule(Mol): 442 if not OptionsInfo["QuietMode"]: 443 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName) 444 445 return True 446 447 448 def SetupMethodNameAndBasisSet(Mol): 449 """Setup method name and basis set.""" 450 451 MethodName = OptionsInfo["MethodName"] 452 if OptionsInfo["MethodNameAuto"]: 453 MethodName = "B3LYP" 454 455 BasisSet = OptionsInfo["BasisSet"] 456 if OptionsInfo["BasisSetAuto"]: 457 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 458 459 return (MethodName, BasisSet) 460 461 462 def SetupReferenceWavefunction(Mol): 463 """Setup reference wavefunction.""" 464 465 Reference = OptionsInfo["Reference"] 466 if OptionsInfo["ReferenceAuto"]: 467 Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF" 468 469 return Reference 470 471 472 def SetupEnergyConversionFactor(Psi4Handle): 473 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 474 475 EnergyUnits = OptionsInfo["EnergyUnits"] 476 477 ApplyConversionFactor = True 478 if re.match(r"^kcal\/mol$", EnergyUnits, re.I): 479 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 480 elif re.match(r"^kJ\/mol$", EnergyUnits, re.I): 481 ConversionFactor = Psi4Handle.constants.hartree2kJmol 482 elif re.match("^eV$", EnergyUnits, re.I): 483 ConversionFactor = Psi4Handle.constants.hartree2ev 484 else: 485 ApplyConversionFactor = False 486 ConversionFactor = 1.0 487 488 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 489 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 490 491 492 def WriteMolecule(Writer, Mol, Energy, SolvationEnergy): 493 """Write molecule.""" 494 495 Energy = "%.*f" % (OptionsInfo["Precision"], Energy) 496 Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy) 497 498 if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None: 499 SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy) 500 Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy) 501 502 Writer.write(Mol) 503 504 505 def CheckAndSetupDDX(): 506 """Load pyddx and return its handle.""" 507 508 try: 509 import pyddx 510 except ImportError as ErrMsg: 511 sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg) 512 sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n") 513 sys.stderr.write("You may need to install it from pypi or conda-forge.\n") 514 sys.stderr.write("Check/update your environment and try again.\n\n") 515 sys.exit(1) 516 517 return pyddx 518 519 520 def ProcessDDXListSolventsOption(): 521 """List solvent information provided by tge DDX module for the calculation 522 of solvation energy.""" 523 524 PyDDXHandle = CheckAndSetupDDX() 525 526 MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n") 527 MiscUtil.PrintInfo("Solvent\tEpsilon") 528 Count = 0 529 for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()): 530 Count += 1 531 Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent] 532 MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon)) 533 534 MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count) 535 536 537 def ProcessOptions(): 538 """Process and validate command line arguments and options.""" 539 540 MiscUtil.PrintInfo("Processing options...") 541 542 # Validate options... 543 ValidateOptions() 544 545 OptionsInfo["Infile"] = Options["--infile"] 546 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 547 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters( 548 "--infileParams", 549 Options["--infileParams"], 550 InfileName=Options["--infile"], 551 ParamsDefaultInfo=ParamsDefaultInfoOverride, 552 ) 553 554 OptionsInfo["Outfile"] = Options["--outfile"] 555 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters( 556 "--outfileParams", Options["--outfileParams"] 557 ) 558 559 OptionsInfo["Overwrite"] = Options["--overwrite"] 560 561 # Method, basis set, and reference wavefunction... 562 OptionsInfo["BasisSet"] = Options["--basisSet"] 563 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 564 565 OptionsInfo["MethodName"] = Options["--methodName"] 566 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 567 568 OptionsInfo["Reference"] = Options["--reference"] 569 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 570 571 # Run and options parameters... 572 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters( 573 "--psi4OptionsParams", Options["--psi4OptionsParams"] 574 ) 575 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters( 576 "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"] 577 ) 578 579 # DDX solvation parameters... 580 OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False 581 Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters( 582 "--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"] 583 ) 584 Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions( 585 OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams 586 ) 587 OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams 588 OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions 589 590 if OptionsInfo["Psi4DDXSolvationMode"]: 591 CheckAndSetupDDX() 592 593 # Energy units and label... 594 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 595 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 596 if re.match("^auto$", EnergyDataFieldLabel, re.I): 597 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 598 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 599 600 SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"] 601 OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel 602 603 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 604 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 605 606 OptionsInfo["Precision"] = int(Options["--precision"]) 607 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 608 609 610 def RetrieveOptions(): 611 """Retrieve command line arguments and options.""" 612 613 # Get options... 614 global Options 615 Options = docopt(_docoptUsage_) 616 617 # Set current working directory to the specified directory... 618 WorkingDir = Options["--workingdir"] 619 if WorkingDir: 620 os.chdir(WorkingDir) 621 622 # Handle examples option... 623 if "--examples" in Options and Options["--examples"]: 624 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 625 sys.exit(0) 626 627 # Handle listing of solvent information... 628 if Options and Options["--psi4DDXListSolvents"]: 629 ProcessDDXListSolventsOption() 630 sys.exit(0) 631 632 633 def ValidateOptions(): 634 """Validate option values.""" 635 636 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 637 638 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 639 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol") 640 641 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 642 MiscUtil.ValidateOptionsOutputFileOverwrite( 643 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"] 644 ) 645 MiscUtil.ValidateOptionsDistinctFileNames( 646 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"] 647 ) 648 649 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 650 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 651 652 MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no") 653 654 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 655 656 657 # Setup a usage string for docopt... 658 _docoptUsage_ = """ 659 Psi4CalculateEnergy.py - Calculate single point energy 660 661 Usage: 662 Psi4CalculateEnergy.py [--basisSet <text>] [--energyDataFieldLabel <text>] [--energyUnits <text>] 663 [--infileParams <Name,Value,...>] [--methodName <text>] [--mp <yes or no>] 664 [--mpParams <Name, Value,...>] [ --outfileParams <Name,Value,...> ] [--overwrite] 665 [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 666 [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>] 667 [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile> 668 Psi4CalculateEnergy.py --psi4DDXListSolvents 669 Psi4CalculateEnergy.py -h | --help | -e | --examples 670 671 Description: 672 Calculate single point energy for molecules using a specified method name and 673 basis set. The molecules must have 3D coordinates in input file. The molecular 674 geometry is not optimized before the calculation. In addition, the hydrogens 675 must be present for all molecules in input file. The 3D coordinates are not 676 modified during the calculation. 677 678 A Psi4 XYZ format geometry string is automatically generated for each molecule 679 in input file. It contains atom symbols and 3D coordinates for each atom in a 680 molecule. In addition, the formal charge and spin multiplicity are present in the 681 the geometry string. These values are either retrieved from molecule properties 682 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 683 molecule. 684 685 The supported input file formats are: Mol (.mol), SD (.sdf, .sd) 686 687 The supported output file formats are: SD (.sdf, .sd) 688 689 Options: 690 -b, --basisSet <text> [default: auto] 691 Basis set to use for energy calculation. Default: 6-31+G** for sulfur 692 containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 693 value must be a valid Psi4 basis set. No validation is performed. 694 695 The following list shows a representative sample of basis sets available 696 in Psi4: 697 698 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 699 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 700 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 701 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 702 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 703 704 --energyDataFieldLabel <text> [default: auto] 705 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 706 --energyUnits <text> [default: kcal/mol] 707 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 708 -e, --examples 709 Print examples. 710 -h, --help 711 Print this help message. 712 -i, --infile <infile> 713 Input file name. 714 --infileParams <Name,Value,...> [default: auto] 715 A comma delimited list of parameter name and value pairs for reading 716 molecules from files. The supported parameter names for different file 717 formats, along with their default values, are shown below: 718 719 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 720 721 -m, --methodName <text> [default: auto] 722 Method to use for energy calculation. Default: B3LYP [ Ref 150 ]. The 723 specified value must be a valid Psi4 method name. No validation is 724 performed. 725 726 The following list shows a representative sample of methods available 727 in Psi4: 728 729 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 730 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 731 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 732 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 733 734 --mp <yes or no> [default: no] 735 Use multiprocessing. 736 737 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 738 function employing lazy RDKit data iterable. This allows processing of 739 arbitrary large data sets without any additional requirements memory. 740 741 All input data may be optionally loaded into memory by mp.Pool.map() 742 before starting worker processes in a process pool by setting the value 743 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 744 745 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 746 data mode may adversely impact the performance. The '--mpParams' section 747 provides additional information to tune the value of 'chunkSize'. 748 --mpParams <Name,Value,...> [default: auto] 749 A comma delimited list of parameter name and value pairs to configure 750 multiprocessing. 751 752 The supported parameter names along with their default and possible 753 values are shown below: 754 755 chunkSize, auto 756 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 757 numProcesses, auto [ Default: mp.cpu_count() ] 758 759 These parameters are used by the following functions to configure and 760 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 761 mp.Pool.imap(). 762 763 The chunkSize determines chunks of input data passed to each worker 764 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 765 The default value of chunkSize is dependent on the value of 'inputDataMode'. 766 767 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 768 automatically converts RDKit data iterable into a list, loads all data into 769 memory, and calculates the default chunkSize using the following method 770 as shown in its code: 771 772 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 773 if extra: chunkSize += 1 774 775 For example, the default chunkSize will be 7 for a pool of 4 worker processes 776 and 100 data items. 777 778 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 779 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 780 data into memory. Consequently, the size of input data is not known a priori. 781 It's not possible to estimate an optimal value for the chunkSize. The default 782 chunkSize is set to 1. 783 784 The default value for the chunkSize during 'Lazy' data mode may adversely 785 impact the performance due to the overhead associated with exchanging 786 small chunks of data. It is generally a good idea to explicitly set chunkSize to 787 a larger value during 'Lazy' input data mode, based on the size of your input 788 data and number of processes in the process pool. 789 790 The mp.Pool.map() function waits for all worker processes to process all 791 the data and return the results. The mp.Pool.imap() function, however, 792 returns the the results obtained from worker processes as soon as the 793 results become available for specified chunks of data. 794 795 The order of data in the results returned by both mp.Pool.map() and 796 mp.Pool.imap() functions always corresponds to the input data. 797 -o, --outfile <outfile> 798 Output file name. 799 --outfileParams <Name,Value,...> [default: auto] 800 A comma delimited list of parameter name and value pairs for writing 801 molecules to files. The supported parameter names for different file 802 formats, along with their default values, are shown below: 803 804 SD: kekulize,yes,forceV3000,no 805 806 --overwrite 807 Overwrite existing files. 808 --precision <number> [default: 6] 809 Floating point precision for writing energy values. 810 --psi4OptionsParams <Name,Value,...> [default: none] 811 A comma delimited list of Psi4 option name and value pairs for setting 812 global and module options. The names are 'option_name' for global options 813 and 'module_name__option_name' for options local to a module. The 814 specified option names must be valid Psi4 names. No validation is 815 performed. 816 817 The specified option name and value pairs are processed and passed to 818 psi4.set_options() as a dictionary. The supported value types are float, 819 integer, boolean, or string. The float value string is converted into a float. 820 The valid values for a boolean string are yes, no, true, false, on, or off. 821 --psi4RunParams <Name,Value,...> [default: auto] 822 A comma delimited list of parameter name and value pairs for configuring 823 Psi4 jobs. 824 825 The supported parameter names along with their default and possible 826 values are shown below: 827 828 MemoryInGB, 1 829 NumThreads, 1 830 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 831 ScratchDir, auto [ Possivle values: DirName] 832 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 833 834 These parameters control the runtime behavior of Psi4. 835 836 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 837 is appended to output file name during multiprocessing as shown: 838 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 839 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 840 all Psi4 output. 841 842 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 843 of any existing Psi4 before creating new files to append output from 844 multiple Psi4 runs. 845 846 The option 'ScratchDir' is a directory path to the location of scratch 847 files. The default value corresponds to Psi4 default. It may be used to 848 override the deafult path. 849 --psi4DDXSolvation <yes or no> [default: no] 850 Perform energy calculation in solution using domain-decomposition-based 851 continuum solvation models [ Ref 160-161 ] The script relies on Psi4 852 interface to the DDX module to perform these calculations. The DDX library 853 provides a linear-scaling implementation of standard continuum solvation 854 models using a domain- decomposition ansatz. Two solvation models are 855 supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and 856 Polarizable Continuum Model (PCM) [ Ref 164-165 ]. 857 858 The solvation energy is included in the value of the total energy written to 859 the output SD file. In addition, the value of solvation energy is written to the 860 output file under its own data field label. 861 862 Psi4 relies on Python module PYDDX to calculate solvation energy. It must be 863 present in your environment. 864 --psi4DDXSolvationParams <Name,Value,...> [default: auto] 865 A space delimited list of parameter name and value pairs for calculating 866 solvation energy using the DDX module. The supported parameter names, 867 along with their default values, are shown below: 868 869 solvationModel PCM [ Possible values: COSMO or PCM] 870 solvent water [ Possible values: A valid solvent name] 871 solventEpsilon None 872 radiiSet UFF [ Possible values: Bondi or UFF] 873 radiiScaling auto [ Default: 1.2 for Bondi; 1.1 for UFF] 874 875 solvationModel: Solvation model for calculating solvation energy. 876 The corresponding Psi4 option is DDX_MODEL. 877 878 solvent: Solvent to use. The corresponding Ps4 option is 879 DDX_SOLVENT 880 881 solventEpsilon: Dielectric constant of the solvent. The 882 corresponding Psi4 option is DDX_SOLVENT_EPSILON. 883 884 radiiSet: Radius set for cavity spheres. The corresponding Psi4 885 option is DDX_RADII_SET. 886 887 radiiScaling: Scaling factor for cavity spheres. The default value 888 depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding 889 Psi4 option is DDX_RADII_SCALING. 890 891 These parameter names are automatically mapped to appropriate DDX keywords. 892 893 You may specify the solvent either by directly providing a dielectric constant using 894 'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter. 895 The solvent name must be a valid named supported by the DDX module. For example: 896 water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent' 897 parameter is ignored for non-zero value of 'solvent' option. 898 899 The DDX module contains a variety of addtional options to confgure the calculation 900 of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may 901 use '--psi4OptionsParams' to modify values for additional DDX options. 902 --psi4DDXListSolvents 903 List solvent names, along with dielectric values, supported by the DDX module 904 for the calculation of solvent energy without performing any calculation. 905 -q, --quiet <yes or no> [default: no] 906 Use quiet mode. The warning and information messages will not be printed. 907 -r, --reference <text> [default: auto] 908 Reference wave function to use for energy calculation. Default: RHF or UHF. 909 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules 910 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell 911 molecules with unpaired electrons. 912 913 The specified value must be a valid Psi4 reference wave function. No validation 914 is performed. For example: ROHF, CUHF, RKS, etc. 915 916 The spin multiplicity determines the default value of reference wave function 917 for input molecules. It is calculated from number of free radical electrons using 918 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 919 electron spin. The total spin is 1/2 the number of free radical electrons in a 920 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 921 over the calculated value of spin multiplicity. 922 -w, --workingdir <dir> 923 Location of working directory which defaults to the current directory. 924 925 Examples: 926 To calculate single point energy using B3LYP/6-31G** and B3LYP/6-31+G** for 927 non-sulfur and sulfur containing molecules in a SD file with 3D structures, use 928 RHF and UHF for closed-shell and open-shell molecules, and write a new SD file, 929 type: 930 931 % Psi4CalculateEnergy.py -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 932 933 To run the first example in multiprocessing mode on all available CPUs 934 without loading all data into memory and write out a SD file, type: 935 936 % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf 937 -o Psi4Sample3DOut.sdf 938 939 To run the first example in multiprocessing mode on all available CPUs 940 by loading all data into memory and write out a SD file, type: 941 942 % Psi4CalculateEnergy.py --mp yes --mpParams "inputDataMode, 943 InMemory" -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 944 945 To run the first example in multiprocessing mode on all available CPUs 946 without loading all data into memory along with multiple threads for each 947 Psi4 run and write out a SD file, type: 948 949 % Psi4CalculateEnergy.py --mp yes --psi4RunParams "NumThreads,2" 950 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 951 952 To run the first example along with calculating DDX solvation energy using 953 default solvent parameters for water and write out a SD file, type: 954 955 % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf 956 -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes 957 958 To run the first example along with calculating DDX solvation energy using 959 specific solvartion parameters and write out a SD file, type: 960 961 % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf 962 -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes --psi4DDXSolvationParams 963 "solvationModel COSMO solvent water radiiSet UFF" 964 965 To calculate single point energy using a specific method and basis set for 966 molecules in a SD containing 3D structures and write a new SD file, type: 967 968 % Psi4CalculateEnergy.py -m SCF -b aug-cc-pVDZ -i Psi4Sample3D.sdf 969 -o Psi4Sample3DOut.sdf 970 971 Author: 972 Manish Sud(msud@san.rr.com) 973 974 See also: 975 Psi4CalculatePartialCharges.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py 976 977 Copyright: 978 Copyright (C) 2026 Manish Sud. All rights reserved. 979 980 The functionality available in this script is implemented using Psi4, an 981 open source quantum chemistry software package, and RDKit, an open 982 source toolkit for cheminformatics developed by Greg Landrum. 983 984 This file is part of MayaChemTools. 985 986 MayaChemTools is free software; you can redistribute it and/or modify it under 987 the terms of the GNU Lesser General Public License as published by the Free 988 Software Foundation; either version 3 of the License, or (at your option) any 989 later version. 990 991 """ 992 993 if __name__ == "__main__": 994 main()