MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: Psi4CalculateEnergy.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2026 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using Psi4, an
   9 # open source quantum chemistry software package, and RDKit, an open
  10 # source toolkit for cheminformatics developed by Greg Landrum.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 import os
  33 import sys
  34 import time
  35 import re
  36 import shutil
  37 import multiprocessing as mp
  38 
  39 # Psi4 imports...
  40 if hasattr(shutil, "which") and shutil.which("psi4") is None:
  41     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
  42     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
  43     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
  44     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
  45     sys.stderr.write("Psi4 environment and try again.\n\n")
  46 
  47 # RDKit imports...
  48 try:
  49     from rdkit import rdBase
  50     from rdkit import Chem
  51 except ImportError as ErrMsg:
  52     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  53     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  54     sys.exit(1)
  55 
  56 # MayaChemTools imports...
  57 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  58 try:
  59     from docopt import docopt
  60     import MiscUtil
  61     import Psi4Util
  62     import RDKitUtil
  63 except ImportError as ErrMsg:
  64     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  65     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  66     sys.exit(1)
  67 
  68 ScriptName = os.path.basename(sys.argv[0])
  69 Options = {}
  70 OptionsInfo = {}
  71 
  72 
  73 def main():
  74     """Start execution of the script."""
  75 
  76     MiscUtil.PrintInfo(
  77         "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
  78         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
  79     )
  80 
  81     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  82 
  83     # Retrieve command line arguments and options...
  84     RetrieveOptions()
  85 
  86     # Process and validate command line arguments and options...
  87     ProcessOptions()
  88 
  89     # Perform actions required by the script...
  90     CalculateEnergy()
  91 
  92     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  93     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  94 
  95 
  96 def CalculateEnergy():
  97     """Calculate single point energy."""
  98 
  99     # Setup a molecule reader...
 100     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
 101     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
 102 
 103     # Set up a molecule writer...
 104     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 105     if Writer is None:
 106         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 107     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
 108 
 109     MolCount, ValidMolCount, EnergyFailedCount = ProcessMolecules(Mols, Writer)
 110 
 111     if Writer is not None:
 112         Writer.close()
 113 
 114     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 115     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 116     MiscUtil.PrintInfo("Number of molecules failed during energy calculation: %d" % EnergyFailedCount)
 117     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + EnergyFailedCount))
 118 
 119 
 120 def ProcessMolecules(Mols, Writer):
 121     """Process and calculate energy of molecules."""
 122 
 123     if OptionsInfo["MPMode"]:
 124         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
 125     else:
 126         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
 127 
 128 
 129 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
 130     """Process and calculate energy of molecules using a single process."""
 131 
 132     # Intialize Psi4...
 133     MiscUtil.PrintInfo("\nInitializing Psi4...")
 134     Psi4Handle = Psi4Util.InitializePsi4(
 135         Psi4RunParams=OptionsInfo["Psi4RunParams"],
 136         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
 137         PrintVersion=True,
 138         PrintHeader=True,
 139     )
 140     if OptionsInfo["Psi4DDXSolvationMode"]:
 141         Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"])
 142     OptionsInfo["psi4"] = Psi4Handle
 143 
 144     # Setup conversion factor for energy units...
 145     SetupEnergyConversionFactor(Psi4Handle)
 146 
 147     MiscUtil.PrintInfo("\nCalculating energy...")
 148 
 149     (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3
 150     for Mol in Mols:
 151         MolCount += 1
 152 
 153         if not CheckAndValidateMolecule(Mol, MolCount):
 154             continue
 155 
 156         # Setup a Psi4 molecule...
 157         Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount)
 158         if Psi4Mol is None:
 159             continue
 160 
 161         ValidMolCount += 1
 162 
 163         CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolCount)
 164 
 165         if not CalcStatus:
 166             if not OptionsInfo["QuietMode"]:
 167                 MiscUtil.PrintWarning(
 168                     "Failed to calculate energy for molecule %s" % RDKitUtil.GetMolName(Mol, MolCount)
 169                 )
 170 
 171             EnergyFailedCount += 1
 172             continue
 173 
 174         WriteMolecule(Writer, Mol, Energy, SolvationEnergy)
 175 
 176     return (MolCount, ValidMolCount, EnergyFailedCount)
 177 
 178 
 179 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
 180     """Process and calculate energy of molecules using  process."""
 181 
 182     MiscUtil.PrintInfo("\nCalculating energy using multiprocessing...")
 183 
 184     MPParams = OptionsInfo["MPParams"]
 185 
 186     # Setup data for initializing a worker process...
 187     InitializeWorkerProcessArgs = (
 188         MiscUtil.ObjectToBase64EncodedString(Options),
 189         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
 190     )
 191 
 192     # Setup a encoded mols data iterable for a worker process...
 193     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 194 
 195     # Setup process pool along with data initialization for each process...
 196     if not OptionsInfo["QuietMode"]:
 197         MiscUtil.PrintInfo(
 198             "\nConfiguring multiprocessing using %s method..."
 199             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
 200         )
 201         MiscUtil.PrintInfo(
 202             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
 203             % (
 204                 MPParams["NumProcesses"],
 205                 MPParams["InputDataMode"],
 206                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
 207             )
 208         )
 209 
 210     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 211 
 212     # Start processing...
 213     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 214         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 215     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 216         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 217     else:
 218         MiscUtil.PrintError(
 219             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
 220         )
 221 
 222     # Print out Psi4 version in the main process...
 223     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
 224     Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
 225     OptionsInfo["psi4"] = Psi4Handle
 226 
 227     (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3
 228     for Result in Results:
 229         MolCount += 1
 230         MolIndex, EncodedMol, CalcStatus, Energy, SolvationEnergy = Result
 231 
 232         if EncodedMol is None:
 233             continue
 234 
 235         ValidMolCount += 1
 236 
 237         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 238 
 239         if not CalcStatus:
 240             if not OptionsInfo["QuietMode"]:
 241                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 242                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
 243 
 244             EnergyFailedCount += 1
 245             continue
 246 
 247         WriteMolecule(Writer, Mol, Energy, SolvationEnergy)
 248 
 249     return (MolCount, ValidMolCount, EnergyFailedCount)
 250 
 251 
 252 def InitializeWorkerProcess(*EncodedArgs):
 253     """Initialize data for a worker process."""
 254 
 255     global Options, OptionsInfo
 256 
 257     if not OptionsInfo["QuietMode"]:
 258         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 259 
 260     # Decode Options and OptionInfo...
 261     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 262     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 263 
 264     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
 265     # output files for each process...
 266     OptionsInfo["Psi4Initialized"] = False
 267 
 268 
 269 def InitializePsi4ForWorkerProcess():
 270     """Initialize Psi4 for a worker process."""
 271 
 272     if OptionsInfo["Psi4Initialized"]:
 273         return
 274 
 275     OptionsInfo["Psi4Initialized"] = True
 276 
 277     # Update output file...
 278     OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
 279         OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
 280     )
 281 
 282     # Intialize Psi4...
 283     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
 284         Psi4RunParams=OptionsInfo["Psi4RunParams"],
 285         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
 286         PrintVersion=False,
 287         PrintHeader=True,
 288     )
 289     if OptionsInfo["Psi4DDXSolvationMode"]:
 290         Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"])
 291 
 292     # Setup conversion factor for energy units...
 293     SetupEnergyConversionFactor(OptionsInfo["psi4"])
 294 
 295 
 296 def WorkerProcess(EncodedMolInfo):
 297     """Process data for a worker process."""
 298 
 299     if not OptionsInfo["Psi4Initialized"]:
 300         InitializePsi4ForWorkerProcess()
 301 
 302     MolIndex, EncodedMol = EncodedMolInfo
 303 
 304     CalcStatus = False
 305     Energy = None
 306 
 307     if EncodedMol is None:
 308         return [MolIndex, None, CalcStatus, Energy]
 309 
 310     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 311     MolCount = MolIndex + 1
 312 
 313     if not CheckAndValidateMolecule(Mol, MolCount):
 314         return [MolIndex, None, CalcStatus, Energy]
 315 
 316     # Setup a Psi4 molecule...
 317     Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount)
 318     if Psi4Mol is None:
 319         return [MolIndex, None, CalcStatus, Energy]
 320 
 321     CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount)
 322 
 323     return [
 324         MolIndex,
 325         RDKitUtil.MolToBase64EncodedMolString(
 326             Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
 327         ),
 328         CalcStatus,
 329         Energy,
 330         SolvationEnergy,
 331     ]
 332 
 333 
 334 def CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolNum=None):
 335     """Calculate energy."""
 336 
 337     Status = False
 338     Energy = None
 339     SolvationEnergy = None
 340 
 341     #  Setup reference wave function...
 342     Reference = SetupReferenceWavefunction(Mol)
 343     Psi4Handle.set_options({"Reference": Reference})
 344 
 345     # Setup method name and basis set...
 346     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
 347 
 348     if OptionsInfo["Psi4DDXSolvationMode"]:
 349         Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy(
 350             Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
 351         )
 352         SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction)
 353     else:
 354         Status, Energy = Psi4Util.CalculateSinglePointEnergy(
 355             Psi4Handle, Psi4Mol, MethodName, BasisSet, Quiet=OptionsInfo["QuietMode"]
 356         )
 357 
 358     # Convert energy units...
 359     if Status:
 360         if OptionsInfo["ApplyEnergyConversionFactor"]:
 361             Energy = Energy * OptionsInfo["EnergyConversionFactor"]
 362             if SolvationEnergy is not None:
 363                 SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"]
 364 
 365     # Clean up
 366     PerformPsi4Cleanup(Psi4Handle)
 367 
 368     return (Status, Energy, SolvationEnergy)
 369 
 370 
 371 def RetrievePsi4DDXSolvationEnergy(WaveFunction):
 372     """Retrieve DDX solvation energy."""
 373 
 374     SolvationEnergy = None
 375     try:
 376         SolvationEnergy = WaveFunction.variable("DD Solvation Energy")
 377     except Exception as ErrMsg:
 378         SolvationEnergy = None
 379         if not OptionsInfo["QuietMode"]:
 380             MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg)
 381 
 382     return SolvationEnergy
 383 
 384 
 385 def SetupPsi4Mol(Psi4Handle, Mol, MolCount=None):
 386     """Setup a Psi4 molecule object."""
 387 
 388     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom=True, NoReorient=True)
 389 
 390     try:
 391         Psi4Mol = Psi4Handle.geometry(MolGeometry)
 392     except Exception as ErrMsg:
 393         Psi4Mol = None
 394         if not OptionsInfo["QuietMode"]:
 395             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
 396             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 397             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
 398 
 399     return Psi4Mol
 400 
 401 
 402 def PerformPsi4Cleanup(Psi4Handle):
 403     """Perform clean up."""
 404 
 405     # Clean up after Psi4 run...
 406     Psi4Handle.core.clean()
 407 
 408     # Clean up any leftover scratch files...
 409     if OptionsInfo["MPMode"]:
 410         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
 411 
 412 
 413 def CheckAndValidateMolecule(Mol, MolCount=None):
 414     """Validate molecule for Psi4 calculations."""
 415 
 416     if Mol is None:
 417         if not OptionsInfo["QuietMode"]:
 418             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
 419         return False
 420 
 421     MolName = RDKitUtil.GetMolName(Mol, MolCount)
 422     if not OptionsInfo["QuietMode"]:
 423         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
 424 
 425     if RDKitUtil.IsMolEmpty(Mol):
 426         if not OptionsInfo["QuietMode"]:
 427             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
 428         return False
 429 
 430     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
 431         if not OptionsInfo["QuietMode"]:
 432             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
 433         return False
 434 
 435     # Check for 3D flag...
 436     if not Mol.GetConformer().Is3D():
 437         if not OptionsInfo["QuietMode"]:
 438             MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
 439 
 440     # Check for missing hydrogens...
 441     if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
 442         if not OptionsInfo["QuietMode"]:
 443             MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
 444 
 445     return True
 446 
 447 
 448 def SetupMethodNameAndBasisSet(Mol):
 449     """Setup method name and basis set."""
 450 
 451     MethodName = OptionsInfo["MethodName"]
 452     if OptionsInfo["MethodNameAuto"]:
 453         MethodName = "B3LYP"
 454 
 455     BasisSet = OptionsInfo["BasisSet"]
 456     if OptionsInfo["BasisSetAuto"]:
 457         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
 458 
 459     return (MethodName, BasisSet)
 460 
 461 
 462 def SetupReferenceWavefunction(Mol):
 463     """Setup reference wavefunction."""
 464 
 465     Reference = OptionsInfo["Reference"]
 466     if OptionsInfo["ReferenceAuto"]:
 467         Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
 468 
 469     return Reference
 470 
 471 
 472 def SetupEnergyConversionFactor(Psi4Handle):
 473     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
 474 
 475     EnergyUnits = OptionsInfo["EnergyUnits"]
 476 
 477     ApplyConversionFactor = True
 478     if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
 479         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
 480     elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
 481         ConversionFactor = Psi4Handle.constants.hartree2kJmol
 482     elif re.match("^eV$", EnergyUnits, re.I):
 483         ConversionFactor = Psi4Handle.constants.hartree2ev
 484     else:
 485         ApplyConversionFactor = False
 486         ConversionFactor = 1.0
 487 
 488     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
 489     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
 490 
 491 
 492 def WriteMolecule(Writer, Mol, Energy, SolvationEnergy):
 493     """Write molecule."""
 494 
 495     Energy = "%.*f" % (OptionsInfo["Precision"], Energy)
 496     Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
 497 
 498     if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None:
 499         SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy)
 500         Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy)
 501 
 502     Writer.write(Mol)
 503 
 504 
 505 def CheckAndSetupDDX():
 506     """Load pyddx and return its handle."""
 507 
 508     try:
 509         import pyddx
 510     except ImportError as ErrMsg:
 511         sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg)
 512         sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n")
 513         sys.stderr.write("You may need to install it from pypi or conda-forge.\n")
 514         sys.stderr.write("Check/update your environment and try again.\n\n")
 515         sys.exit(1)
 516 
 517     return pyddx
 518 
 519 
 520 def ProcessDDXListSolventsOption():
 521     """List solvent information provided by tge DDX module for the calculation
 522     of solvation energy."""
 523 
 524     PyDDXHandle = CheckAndSetupDDX()
 525 
 526     MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n")
 527     MiscUtil.PrintInfo("Solvent\tEpsilon")
 528     Count = 0
 529     for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()):
 530         Count += 1
 531         Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent]
 532         MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon))
 533 
 534     MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count)
 535 
 536 
 537 def ProcessOptions():
 538     """Process and validate command line arguments and options."""
 539 
 540     MiscUtil.PrintInfo("Processing options...")
 541 
 542     # Validate options...
 543     ValidateOptions()
 544 
 545     OptionsInfo["Infile"] = Options["--infile"]
 546     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 547     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
 548         "--infileParams",
 549         Options["--infileParams"],
 550         InfileName=Options["--infile"],
 551         ParamsDefaultInfo=ParamsDefaultInfoOverride,
 552     )
 553 
 554     OptionsInfo["Outfile"] = Options["--outfile"]
 555     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
 556         "--outfileParams", Options["--outfileParams"]
 557     )
 558 
 559     OptionsInfo["Overwrite"] = Options["--overwrite"]
 560 
 561     # Method, basis set, and reference wavefunction...
 562     OptionsInfo["BasisSet"] = Options["--basisSet"]
 563     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
 564 
 565     OptionsInfo["MethodName"] = Options["--methodName"]
 566     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
 567 
 568     OptionsInfo["Reference"] = Options["--reference"]
 569     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
 570 
 571     # Run and options parameters...
 572     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
 573         "--psi4OptionsParams", Options["--psi4OptionsParams"]
 574     )
 575     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
 576         "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
 577     )
 578 
 579     # DDX solvation parameters...
 580     OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False
 581     Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters(
 582         "--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"]
 583     )
 584     Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(
 585         OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams
 586     )
 587     OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams
 588     OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions
 589 
 590     if OptionsInfo["Psi4DDXSolvationMode"]:
 591         CheckAndSetupDDX()
 592 
 593     # Energy units and label...
 594     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
 595     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
 596     if re.match("^auto$", EnergyDataFieldLabel, re.I):
 597         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
 598     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
 599 
 600     SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"]
 601     OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel
 602 
 603     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 604     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 605 
 606     OptionsInfo["Precision"] = int(Options["--precision"])
 607     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 608 
 609 
 610 def RetrieveOptions():
 611     """Retrieve command line arguments and options."""
 612 
 613     # Get options...
 614     global Options
 615     Options = docopt(_docoptUsage_)
 616 
 617     # Set current working directory to the specified directory...
 618     WorkingDir = Options["--workingdir"]
 619     if WorkingDir:
 620         os.chdir(WorkingDir)
 621 
 622     # Handle examples option...
 623     if "--examples" in Options and Options["--examples"]:
 624         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 625         sys.exit(0)
 626 
 627     # Handle listing of solvent information...
 628     if Options and Options["--psi4DDXListSolvents"]:
 629         ProcessDDXListSolventsOption()
 630         sys.exit(0)
 631 
 632 
 633 def ValidateOptions():
 634     """Validate option values."""
 635 
 636     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 637 
 638     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 639     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 640 
 641     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 642     MiscUtil.ValidateOptionsOutputFileOverwrite(
 643         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
 644     )
 645     MiscUtil.ValidateOptionsDistinctFileNames(
 646         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
 647     )
 648 
 649     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 650     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 651 
 652     MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no")
 653 
 654     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 655 
 656 
 657 # Setup a usage string for docopt...
 658 _docoptUsage_ = """
 659 Psi4CalculateEnergy.py - Calculate single point energy
 660 
 661 Usage:
 662     Psi4CalculateEnergy.py [--basisSet <text>] [--energyDataFieldLabel <text>] [--energyUnits <text>]
 663                            [--infileParams <Name,Value,...>] [--methodName <text>] [--mp <yes or no>]
 664                            [--mpParams <Name, Value,...>] [ --outfileParams <Name,Value,...> ] [--overwrite]
 665                            [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 666                            [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>]
 667                            [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile> 
 668     Psi4CalculateEnergy.py --psi4DDXListSolvents
 669     Psi4CalculateEnergy.py -h | --help | -e | --examples
 670 
 671 Description:
 672     Calculate single point energy for molecules using a specified method name and
 673     basis set. The molecules must have 3D coordinates in input file. The molecular
 674     geometry is not optimized before the calculation. In addition, the hydrogens
 675     must be present for all molecules in input file. The 3D coordinates are not
 676     modified during the calculation.
 677 
 678     A Psi4 XYZ format geometry string is automatically generated for each molecule
 679     in input file. It contains atom symbols and 3D coordinates for each atom in a
 680     molecule. In addition, the formal charge and spin multiplicity are present in the
 681     the geometry string. These values are either retrieved from molecule properties
 682     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 683     molecule.
 684 
 685     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 686 
 687     The supported output file formats are: SD (.sdf, .sd)
 688 
 689 Options:
 690     -b, --basisSet <text>  [default: auto]
 691         Basis set to use for energy calculation. Default: 6-31+G** for sulfur
 692         containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 693         value must be a valid Psi4 basis set. No validation is performed.
 694         
 695         The following list shows a representative sample of basis sets available
 696         in Psi4:
 697             
 698             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 699             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 700             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 701             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 702             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 703             
 704     --energyDataFieldLabel <text>  [default: auto]
 705         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 706     --energyUnits <text>  [default: kcal/mol]
 707         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 708     -e, --examples
 709         Print examples.
 710     -h, --help
 711         Print this help message.
 712     -i, --infile <infile>
 713         Input file name.
 714     --infileParams <Name,Value,...>  [default: auto]
 715         A comma delimited list of parameter name and value pairs for reading
 716         molecules from files. The supported parameter names for different file
 717         formats, along with their default values, are shown below:
 718             
 719             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 720             
 721     -m, --methodName <text>  [default: auto]
 722         Method to use for energy calculation. Default: B3LYP [ Ref 150 ]. The
 723         specified value must be a valid Psi4 method name. No validation is
 724         performed.
 725         
 726         The following list shows a representative sample of methods available
 727         in Psi4:
 728             
 729             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 730             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 731             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 732             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 733             
 734     --mp <yes or no>  [default: no]
 735         Use multiprocessing.
 736          
 737         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 738         function employing lazy RDKit data iterable. This allows processing of
 739         arbitrary large data sets without any additional requirements memory.
 740         
 741         All input data may be optionally loaded into memory by mp.Pool.map()
 742         before starting worker processes in a process pool by setting the value
 743         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 744         
 745         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 746         data mode may adversely impact the performance. The '--mpParams' section
 747         provides additional information to tune the value of 'chunkSize'.
 748     --mpParams <Name,Value,...>  [default: auto]
 749         A comma delimited list of parameter name and value pairs to configure
 750         multiprocessing.
 751         
 752         The supported parameter names along with their default and possible
 753         values are shown below:
 754         
 755             chunkSize, auto
 756             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 757             numProcesses, auto   [ Default: mp.cpu_count() ]
 758         
 759         These parameters are used by the following functions to configure and
 760         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 761         mp.Pool.imap().
 762         
 763         The chunkSize determines chunks of input data passed to each worker
 764         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 765         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 766         
 767         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 768         automatically converts RDKit data iterable into a list, loads all data into
 769         memory, and calculates the default chunkSize using the following method
 770         as shown in its code:
 771         
 772             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 773             if extra: chunkSize += 1
 774         
 775         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 776         and 100 data items.
 777         
 778         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 779         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 780         data into memory. Consequently, the size of input data is not known a priori.
 781         It's not possible to estimate an optimal value for the chunkSize. The default 
 782         chunkSize is set to 1.
 783         
 784         The default value for the chunkSize during 'Lazy' data mode may adversely
 785         impact the performance due to the overhead associated with exchanging
 786         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 787         a larger value during 'Lazy' input data mode, based on the size of your input
 788         data and number of processes in the process pool.
 789         
 790         The mp.Pool.map() function waits for all worker processes to process all
 791         the data and return the results. The mp.Pool.imap() function, however,
 792         returns the the results obtained from worker processes as soon as the
 793         results become available for specified chunks of data.
 794         
 795         The order of data in the results returned by both mp.Pool.map() and 
 796         mp.Pool.imap() functions always corresponds to the input data.
 797     -o, --outfile <outfile>
 798         Output file name.
 799     --outfileParams <Name,Value,...>  [default: auto]
 800         A comma delimited list of parameter name and value pairs for writing
 801         molecules to files. The supported parameter names for different file
 802         formats, along with their default values, are shown below:
 803             
 804             SD: kekulize,yes,forceV3000,no
 805             
 806     --overwrite
 807         Overwrite existing files.
 808     --precision <number>  [default: 6]
 809         Floating point precision for writing energy values.
 810     --psi4OptionsParams <Name,Value,...>  [default: none]
 811         A comma delimited list of Psi4 option name and value pairs for setting
 812         global and module options. The names are 'option_name' for global options
 813         and 'module_name__option_name' for options local to a module. The
 814         specified option names must be valid Psi4 names. No validation is
 815         performed.
 816         
 817         The specified option name and  value pairs are processed and passed to
 818         psi4.set_options() as a dictionary. The supported value types are float,
 819         integer, boolean, or string. The float value string is converted into a float.
 820         The valid values for a boolean string are yes, no, true, false, on, or off. 
 821     --psi4RunParams <Name,Value,...>  [default: auto]
 822         A comma delimited list of parameter name and value pairs for configuring
 823         Psi4 jobs.
 824         
 825         The supported parameter names along with their default and possible
 826         values are shown below:
 827              
 828             MemoryInGB, 1
 829             NumThreads, 1
 830             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 831             ScratchDir, auto   [ Possivle values: DirName]
 832             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 833             
 834         These parameters control the runtime behavior of Psi4.
 835         
 836         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 837         is appended to output file name during multiprocessing as shown:
 838         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 839         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 840         all Psi4 output.
 841         
 842         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 843         of any existing Psi4 before creating new files to append output from
 844         multiple Psi4 runs.
 845         
 846         The option 'ScratchDir' is a directory path to the location of scratch
 847         files. The default value corresponds to Psi4 default. It may be used to
 848         override the deafult path.
 849     --psi4DDXSolvation <yes or no>  [default: no]
 850         Perform energy calculation in solution using domain-decomposition-based
 851         continuum solvation models [ Ref 160-161 ] The script relies on Psi4
 852         interface to the DDX module to perform these calculations. The DDX library
 853         provides a linear-scaling implementation of standard continuum solvation
 854         models using a domain- decomposition ansatz. Two solvation models are
 855         supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and
 856         Polarizable Continuum Model (PCM) [ Ref 164-165 ].
 857         
 858         The solvation energy is included in the value of the total energy written to 
 859         the output SD file. In addition, the value of solvation energy is written to the
 860         output file under its own data field label.
 861         
 862         Psi4 relies on Python module PYDDX to calculate solvation energy. It must be
 863         present in your environment.
 864     --psi4DDXSolvationParams <Name,Value,...>  [default: auto]
 865         A space delimited list of parameter name and value  pairs for calculating
 866         solvation energy using the DDX module. The supported parameter names,
 867         along with their default values, are shown below:
 868             
 869             solvationModel PCM   [ Possible  values: COSMO or PCM]
 870             solvent water   [ Possible  values: A valid solvent name]
 871             solventEpsilon None
 872             radiiSet UFF   [ Possible  values: Bondi or UFF]
 873             radiiScaling auto   [ Default: 1.2 for Bondi; 1.1 for UFF]
 874             
 875             solvationModel: Solvation model for calculating solvation energy.
 876             The corresponding Psi4 option is DDX_MODEL.
 877             
 878             solvent: Solvent to use. The corresponding Ps4 option is
 879             DDX_SOLVENT
 880             
 881             solventEpsilon: Dielectric constant of the solvent. The
 882             corresponding Psi4 option is DDX_SOLVENT_EPSILON.
 883             
 884             radiiSet: Radius set for cavity spheres. The corresponding Psi4
 885             option is DDX_RADII_SET.
 886             
 887             radiiScaling: Scaling factor for cavity spheres. The default value
 888             depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding
 889             Psi4 option is DDX_RADII_SCALING.
 890             
 891         These parameter names are automatically mapped to appropriate DDX keywords.
 892         
 893         You may specify the solvent either by directly providing a dielectric constant using
 894         'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter.
 895         The solvent name must be a valid named supported by the DDX module. For example:
 896         water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent'
 897         parameter is ignored for non-zero value of 'solvent' option.
 898         
 899         The DDX module contains a variety of addtional options to confgure the calculation
 900         of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may
 901         use '--psi4OptionsParams' to modify values for additional DDX options.
 902     --psi4DDXListSolvents
 903         List solvent names, along with dielectric values, supported by the DDX module
 904         for the calculation of solvent energy without performing any calculation.
 905     -q, --quiet <yes or no>  [default: no]
 906         Use quiet mode. The warning and information messages will not be printed.
 907     -r, --reference <text>  [default: auto]
 908         Reference wave function to use for energy calculation. Default: RHF or UHF.
 909         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 910         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 911         molecules with unpaired electrons.
 912         
 913         The specified value must be a valid Psi4 reference wave function. No validation
 914         is performed. For example: ROHF, CUHF, RKS, etc.
 915         
 916         The spin multiplicity determines the default value of reference wave function
 917         for input molecules. It is calculated from number of free radical electrons using
 918         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 919         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 920         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 921         over the calculated value of spin multiplicity.
 922     -w, --workingdir <dir>
 923         Location of working directory which defaults to the current directory.
 924 
 925 Examples:
 926     To calculate single point energy using  B3LYP/6-31G** and B3LYP/6-31+G** for
 927     non-sulfur and sulfur containing molecules in a SD file with 3D structures, use
 928     RHF and UHF for closed-shell and open-shell molecules, and write a new SD file,
 929     type:
 930 
 931         % Psi4CalculateEnergy.py  -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 932 
 933     To run the first example in multiprocessing mode on all available CPUs
 934     without loading all data into memory and write out a SD file, type:
 935 
 936         % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf
 937           -o Psi4Sample3DOut.sdf
 938 
 939     To run the first example in multiprocessing mode on all available CPUs
 940     by loading all data into memory and write out a SD file, type:
 941 
 942         % Psi4CalculateEnergy.py  --mp yes --mpParams "inputDataMode,
 943           InMemory" -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 944 
 945     To run the first example in multiprocessing mode on all available CPUs
 946     without loading all data into memory along with multiple threads for each
 947     Psi4 run and write out a SD file, type:
 948 
 949         % Psi4CalculateEnergy.py --mp yes --psi4RunParams "NumThreads,2"
 950            -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 951 
 952     To run the first example along with calculating DDX solvation energy using
 953     default solvent parameters for water and write out a SD file, type:
 954 
 955         % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf
 956           -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes
 957 
 958     To run the first example along with calculating DDX solvation energy using
 959     specific solvartion parameters and write out a SD file, type:
 960 
 961         % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf
 962           -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes --psi4DDXSolvationParams
 963           "solvationModel COSMO solvent water radiiSet UFF"
 964 
 965     To calculate single point energy using a specific method and basis set for
 966     molecules in a SD containing 3D structures and write a new SD file, type:
 967 
 968         % Psi4CalculateEnergy.py  -m SCF -b aug-cc-pVDZ -i Psi4Sample3D.sdf
 969           -o Psi4Sample3DOut.sdf
 970 
 971 Author:
 972     Manish Sud(msud@san.rr.com)
 973 
 974 See also:
 975     Psi4CalculatePartialCharges.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py
 976 
 977 Copyright:
 978     Copyright (C) 2026 Manish Sud. All rights reserved.
 979 
 980     The functionality available in this script is implemented using Psi4, an
 981     open source quantum chemistry software package, and RDKit, an open
 982     source toolkit for cheminformatics developed by Greg Landrum.
 983 
 984     This file is part of MayaChemTools.
 985 
 986     MayaChemTools is free software; you can redistribute it and/or modify it under
 987     the terms of the GNU Lesser General Public License as published by the Free
 988     Software Foundation; either version 3 of the License, or (at your option) any
 989     later version.
 990 
 991 """
 992 
 993 if __name__ == "__main__":
 994     main()