MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: Psi4CalculateEnergy.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using Psi4, an
   9 # open source quantum chemistry software package, and RDKit, an open
  10 # source toolkit for cheminformatics developed by Greg Landrum.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 # Add local python path to the global path and import standard library modules...
  33 import os
  34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  35 import time
  36 import re
  37 import shutil
  38 import multiprocessing as mp
  39 
  40 # Psi4 imports...
  41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None):
  42     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
  43     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
  44     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
  45     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
  46     sys.stderr.write("Psi4 environment and try again.\n\n")
  47 
  48 # RDKit imports...
  49 try:
  50     from rdkit import rdBase
  51     from rdkit import Chem
  52     from rdkit.Chem import AllChem
  53 except ImportError as ErrMsg:
  54     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  55     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  56     sys.exit(1)
  57 
  58 # MayaChemTools imports...
  59 try:
  60     from docopt import docopt
  61     import MiscUtil
  62     import Psi4Util
  63     import RDKitUtil
  64 except ImportError as ErrMsg:
  65     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  66     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  67     sys.exit(1)
  68 
  69 ScriptName = os.path.basename(sys.argv[0])
  70 Options = {}
  71 OptionsInfo = {}
  72 
  73 def main():
  74     """Start execution of the script."""
  75     
  76     MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  77     
  78     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  79     
  80     # Retrieve command line arguments and options...
  81     RetrieveOptions()
  82     
  83     # Process and validate command line arguments and options...
  84     ProcessOptions()
  85     
  86     # Perform actions required by the script...
  87     CalculateEnergy()
  88     
  89     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  90     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  91 
  92 def CalculateEnergy():
  93     """Calculate single point energy."""
  94     
  95     # Setup a molecule reader...
  96     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  97     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  98     
  99     # Set up a molecule writer...
 100     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 101     if Writer is None:
 102         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 103     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
 104 
 105     MolCount, ValidMolCount, EnergyFailedCount = ProcessMolecules(Mols, Writer)
 106 
 107     if Writer is not None:
 108         Writer.close()
 109     
 110     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 111     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 112     MiscUtil.PrintInfo("Number of molecules failed during energy calculation: %d" % EnergyFailedCount)
 113     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + EnergyFailedCount))
 114 
 115 def ProcessMolecules(Mols, Writer):
 116     """Process and calculate energy of molecules."""
 117     
 118     if OptionsInfo["MPMode"]:
 119         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
 120     else:
 121         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
 122 
 123 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
 124     """Process and calculate energy of molecules using a single process."""
 125     
 126     # Intialize Psi4...
 127     MiscUtil.PrintInfo("\nInitializing Psi4...")
 128     Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True)
 129     if OptionsInfo["Psi4DDXSolvationMode"]:
 130         Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"])
 131     OptionsInfo["psi4"] = Psi4Handle
 132     
 133     # Setup conversion factor for energy units...
 134     SetupEnergyConversionFactor(Psi4Handle)
 135     
 136     MiscUtil.PrintInfo("\nCalculating energy...")
 137     
 138     (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3
 139     for Mol in Mols:
 140         MolCount += 1
 141         
 142         if not CheckAndValidateMolecule(Mol, MolCount):
 143             continue
 144         
 145         # Setup a Psi4 molecule...
 146         Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount)
 147         if Psi4Mol is None:
 148             continue
 149         
 150         ValidMolCount += 1
 151         
 152         CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolCount)
 153 
 154         if not CalcStatus:
 155             if not OptionsInfo["QuietMode"]:
 156                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % RDKitUtil.GetMolName(Mol, MolCount))
 157             
 158             EnergyFailedCount += 1
 159             continue
 160         
 161         WriteMolecule(Writer, Mol, Energy, SolvationEnergy)
 162     
 163     return (MolCount, ValidMolCount, EnergyFailedCount)
 164 
 165 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
 166     """Process and calculate energy of molecules using  process."""
 167     
 168     MiscUtil.PrintInfo("\nCalculating energy using multiprocessing...")
 169     
 170     MPParams = OptionsInfo["MPParams"]
 171     
 172     # Setup data for initializing a worker process...
 173     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 174     
 175     # Setup a encoded mols data iterable for a worker process...
 176     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 177     
 178     # Setup process pool along with data initialization for each process...
 179     if not OptionsInfo["QuietMode"]:
 180         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 181         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 182     
 183     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 184     
 185     # Start processing...
 186     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 187         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 188     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 189         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 190     else:
 191         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 192 
 193     # Print out Psi4 version in the main process...
 194     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
 195     Psi4Handle  = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False)
 196     OptionsInfo["psi4"] = Psi4Handle
 197     
 198     (MolCount, ValidMolCount, EnergyFailedCount) = [0] * 3
 199     for Result in Results:
 200         MolCount += 1
 201         MolIndex, EncodedMol, CalcStatus, Energy, SolvationEnergy = Result
 202         
 203         if EncodedMol is None:
 204             continue
 205         
 206         ValidMolCount += 1
 207 
 208         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 209         
 210         if not CalcStatus:
 211             if not OptionsInfo["QuietMode"]:
 212                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 213                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
 214             
 215             EnergyFailedCount += 1
 216             continue
 217         
 218         WriteMolecule(Writer, Mol, Energy, SolvationEnergy)
 219     
 220     return (MolCount, ValidMolCount, EnergyFailedCount)
 221 
 222 def InitializeWorkerProcess(*EncodedArgs):
 223     """Initialize data for a worker process."""
 224     
 225     global Options, OptionsInfo
 226     
 227     if not OptionsInfo["QuietMode"]:
 228         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 229     
 230     # Decode Options and OptionInfo...
 231     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 232     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 233 
 234     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
 235     # output files for each process...
 236     OptionsInfo["Psi4Initialized"]  = False
 237 
 238 def InitializePsi4ForWorkerProcess():
 239     """Initialize Psi4 for a worker process."""
 240     
 241     if OptionsInfo["Psi4Initialized"]:
 242         return
 243 
 244     OptionsInfo["Psi4Initialized"] = True
 245     
 246     # Update output file...
 247     OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid())
 248     
 249     # Intialize Psi4...
 250     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True)
 251     if OptionsInfo["Psi4DDXSolvationMode"]:
 252         Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"])
 253     
 254     # Setup conversion factor for energy units...
 255     SetupEnergyConversionFactor(OptionsInfo["psi4"])
 256     
 257 def WorkerProcess(EncodedMolInfo):
 258     """Process data for a worker process."""
 259     
 260     if not OptionsInfo["Psi4Initialized"]:
 261         InitializePsi4ForWorkerProcess()
 262     
 263     MolIndex, EncodedMol = EncodedMolInfo
 264     
 265     CalcStatus = False
 266     Energy = None
 267     
 268     if EncodedMol is None:
 269         return [MolIndex, None, CalcStatus, Energy]
 270     
 271     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 272     MolCount = MolIndex + 1
 273     
 274     if not CheckAndValidateMolecule(Mol, MolCount):
 275         return [MolIndex, None, CalcStatus, Energy]
 276     
 277     # Setup a Psi4 molecule...
 278     Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount)
 279     if Psi4Mol is None:
 280         return [MolIndex, None, CalcStatus, Energy]
 281     
 282     CalcStatus, Energy, SolvationEnergy = CalculateMoleculeEnergy(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount)
 283     
 284     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, Energy, SolvationEnergy]
 285 
 286 def CalculateMoleculeEnergy(Psi4Handle, Psi4Mol, Mol, MolNum = None):
 287     """Calculate energy."""
 288     
 289     Status = False
 290     Energy = None
 291     SolvationEnergy = None
 292     
 293     #  Setup reference wave function...
 294     Reference = SetupReferenceWavefunction(Mol)
 295     Psi4Handle.set_options({'Reference': Reference})
 296     
 297     # Setup method name and basis set...
 298     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
 299 
 300     if OptionsInfo["Psi4DDXSolvationMode"]:
 301         Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"])
 302         SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction)
 303     else:
 304         Status, Energy = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, Quiet = OptionsInfo["QuietMode"])
 305     
 306     # Convert energy units...
 307     if Status:
 308         if OptionsInfo["ApplyEnergyConversionFactor"]:
 309             Energy = Energy * OptionsInfo["EnergyConversionFactor"]
 310             if SolvationEnergy is not None:
 311                 SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"]
 312 
 313     # Clean up
 314     PerformPsi4Cleanup(Psi4Handle)
 315 
 316     return (Status, Energy, SolvationEnergy)
 317 
 318 def RetrievePsi4DDXSolvationEnergy(WaveFunction):
 319     """Retrieve DDX solvation energy."""
 320     
 321     SolvationEnergy = None
 322     try:
 323         SolvationEnergy = WaveFunction.variable('DD Solvation Energy')
 324     except Exception as ErrMsg:
 325         SolvationEnergy = None
 326         if not OptionsInfo["QuietMode"]:
 327             MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg)
 328 
 329     return SolvationEnergy
 330 
 331 def SetupPsi4Mol(Psi4Handle, Mol, MolCount = None):
 332     """Setup a Psi4 molecule object."""
 333     
 334     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom = True, NoReorient = True)
 335     
 336     try:
 337         Psi4Mol = Psi4Handle.geometry(MolGeometry)
 338     except Exception as ErrMsg:
 339         Psi4Mol = None
 340         if not OptionsInfo["QuietMode"]:
 341             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
 342             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 343             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
 344     
 345     return Psi4Mol
 346 
 347 def PerformPsi4Cleanup(Psi4Handle):
 348     """Perform clean up."""
 349     
 350     # Clean up after Psi4 run...
 351     Psi4Handle.core.clean()
 352     
 353     # Clean up any leftover scratch files...
 354     if OptionsInfo["MPMode"]:
 355         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
 356 
 357 def CheckAndValidateMolecule(Mol, MolCount = None):
 358     """Validate molecule for Psi4 calculations."""
 359     
 360     if Mol is None:
 361         if not OptionsInfo["QuietMode"]:
 362             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
 363         return False
 364     
 365     MolName = RDKitUtil.GetMolName(Mol, MolCount)
 366     if not OptionsInfo["QuietMode"]:
 367         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
 368     
 369     if RDKitUtil.IsMolEmpty(Mol):
 370         if not OptionsInfo["QuietMode"]:
 371             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
 372         return False
 373     
 374     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
 375         if not OptionsInfo["QuietMode"]:
 376             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
 377         return False
 378     
 379     # Check for 3D flag...
 380     if not Mol.GetConformer().Is3D():
 381         if not OptionsInfo["QuietMode"]:
 382             MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
 383     
 384     # Check for missing hydrogens...
 385     if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
 386         if not OptionsInfo["QuietMode"]:
 387             MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
 388     
 389     return True
 390 
 391 def SetupMethodNameAndBasisSet(Mol):
 392     """Setup method name and basis set."""
 393     
 394     MethodName = OptionsInfo["MethodName"]
 395     if OptionsInfo["MethodNameAuto"]:
 396         MethodName = "B3LYP"
 397     
 398     BasisSet = OptionsInfo["BasisSet"]
 399     if OptionsInfo["BasisSetAuto"]:
 400         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
 401     
 402     return (MethodName, BasisSet)
 403 
 404 def SetupReferenceWavefunction(Mol):
 405     """Setup reference wavefunction."""
 406     
 407     Reference = OptionsInfo["Reference"]
 408     if OptionsInfo["ReferenceAuto"]:
 409         Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF'
 410     
 411     return Reference
 412 
 413 def SetupEnergyConversionFactor(Psi4Handle):
 414     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
 415     
 416     EnergyUnits = OptionsInfo["EnergyUnits"]
 417     
 418     ApplyConversionFactor = True
 419     if re.match("^kcal\/mol$", EnergyUnits, re.I):
 420         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
 421     elif re.match("^kJ\/mol$", EnergyUnits, re.I):
 422         ConversionFactor = Psi4Handle.constants.hartree2kJmol
 423     elif re.match("^eV$", EnergyUnits, re.I):
 424         ConversionFactor = Psi4Handle.constants.hartree2ev
 425     else:
 426         ApplyConversionFactor = False
 427         ConversionFactor = 1.0
 428     
 429     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
 430     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
 431 
 432 def WriteMolecule(Writer, Mol, Energy, SolvationEnergy):
 433     """Write molecule."""
 434     
 435     Energy = "%.*f" % (OptionsInfo["Precision"], Energy)
 436     Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
 437     
 438     if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None:
 439         SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy)
 440         Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy)
 441         
 442     Writer.write(Mol)
 443 
 444 def CheckAndSetupDDX():
 445     """Load pyddx and return its handle."""
 446     
 447     try:
 448         import pyddx
 449     except ImportError as ErrMsg:
 450         sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg)
 451         sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n")
 452         sys.stderr.write("You may need to install it from pypi or conda-forge.\n")
 453         sys.stderr.write("Check/update your environment and try again.\n\n")
 454         sys.exit(1)
 455 
 456     return pyddx
 457 
 458 def ProcessDDXListSolventsOption():
 459     """List solvent information provided by tge DDX module for the calculation
 460     of solvation energy."""
 461 
 462     PyDDXHandle = CheckAndSetupDDX()
 463     
 464     MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n")
 465     MiscUtil.PrintInfo("Solvent\tEpsilon")
 466     Count = 0
 467     for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()):
 468         Count += 1
 469         Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent]
 470         MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon))
 471     
 472     MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count)
 473 
 474 def ProcessOptions():
 475     """Process and validate command line arguments and options."""
 476     
 477     MiscUtil.PrintInfo("Processing options...")
 478     
 479     # Validate options...
 480     ValidateOptions()
 481     
 482     OptionsInfo["Infile"] = Options["--infile"]
 483     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 484     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 485     
 486     OptionsInfo["Outfile"] = Options["--outfile"]
 487     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 488     
 489     OptionsInfo["Overwrite"] = Options["--overwrite"]
 490     
 491     # Method, basis set, and reference wavefunction...
 492     OptionsInfo["BasisSet"] = Options["--basisSet"]
 493     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
 494     
 495     OptionsInfo["MethodName"] = Options["--methodName"]
 496     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
 497     
 498     OptionsInfo["Reference"] = Options["--reference"]
 499     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
 500 
 501     # Run and options parameters...
 502     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"])
 503     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"])
 504     
 505     # DDX solvation parameters...
 506     OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False
 507     Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters("--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"])
 508     Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams)
 509     OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams
 510     OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions
 511     
 512     if OptionsInfo["Psi4DDXSolvationMode"]:
 513         CheckAndSetupDDX()
 514     
 515     # Energy units and label...
 516     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
 517     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
 518     if re.match("^auto$", EnergyDataFieldLabel, re.I):
 519         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
 520     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
 521     
 522     SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"]
 523     OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel
 524 
 525     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 526     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 527     
 528     OptionsInfo["Precision"] = int(Options["--precision"])
 529     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 530 
 531 def RetrieveOptions():
 532     """Retrieve command line arguments and options."""
 533     
 534     # Get options...
 535     global Options
 536     Options = docopt(_docoptUsage_)
 537     
 538     # Set current working directory to the specified directory...
 539     WorkingDir = Options["--workingdir"]
 540     if WorkingDir:
 541         os.chdir(WorkingDir)
 542     
 543     # Handle examples option...
 544     if "--examples" in Options and Options["--examples"]:
 545         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 546         sys.exit(0)
 547     
 548     # Handle listing of solvent information...
 549     if  Options and Options["--psi4DDXListSolvents"]:
 550         ProcessDDXListSolventsOption()
 551         sys.exit(0)
 552 
 553 def ValidateOptions():
 554     """Validate option values."""
 555     
 556     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 557     
 558     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 559     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 560     
 561     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 562     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 563     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 564     
 565     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 566     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 567     
 568     MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no")
 569     
 570     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 571 
 572 # Setup a usage string for docopt...
 573 _docoptUsage_ = """
 574 Psi4CalculateEnergy.py - Calculate single point energy
 575 
 576 Usage:
 577     Psi4CalculateEnergy.py [--basisSet <text>] [--energyDataFieldLabel <text>] [--energyUnits <text>]
 578                            [--infileParams <Name,Value,...>] [--methodName <text>] [--mp <yes or no>]
 579                            [--mpParams <Name, Value,...>] [ --outfileParams <Name,Value,...> ] [--overwrite]
 580                            [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 581                            [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>]
 582                            [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile> 
 583     Psi4CalculateEnergy.py --psi4DDXListSolvents
 584     Psi4CalculateEnergy.py -h | --help | -e | --examples
 585 
 586 Description:
 587     Calculate single point energy for molecules using a specified method name and
 588     basis set. The molecules must have 3D coordinates in input file. The molecular
 589     geometry is not optimized before the calculation. In addition, the hydrogens
 590     must be present for all molecules in input file. The 3D coordinates are not
 591     modified during the calculation.
 592 
 593     A Psi4 XYZ format geometry string is automatically generated for each molecule
 594     in input file. It contains atom symbols and 3D coordinates for each atom in a
 595     molecule. In addition, the formal charge and spin multiplicity are present in the
 596     the geometry string. These values are either retrieved from molecule properties
 597     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 598     molecule.
 599 
 600     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 601 
 602     The supported output file formats are: SD (.sdf, .sd)
 603 
 604 Options:
 605     -b, --basisSet <text>  [default: auto]
 606         Basis set to use for energy calculation. Default: 6-31+G** for sulfur
 607         containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 608         value must be a valid Psi4 basis set. No validation is performed.
 609         
 610         The following list shows a representative sample of basis sets available
 611         in Psi4:
 612             
 613             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 614             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 615             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 616             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 617             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 618             
 619     --energyDataFieldLabel <text>  [default: auto]
 620         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 621     --energyUnits <text>  [default: kcal/mol]
 622         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 623     -e, --examples
 624         Print examples.
 625     -h, --help
 626         Print this help message.
 627     -i, --infile <infile>
 628         Input file name.
 629     --infileParams <Name,Value,...>  [default: auto]
 630         A comma delimited list of parameter name and value pairs for reading
 631         molecules from files. The supported parameter names for different file
 632         formats, along with their default values, are shown below:
 633             
 634             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 635             
 636     -m, --methodName <text>  [default: auto]
 637         Method to use for energy calculation. Default: B3LYP [ Ref 150 ]. The
 638         specified value must be a valid Psi4 method name. No validation is
 639         performed.
 640         
 641         The following list shows a representative sample of methods available
 642         in Psi4:
 643             
 644             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 645             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 646             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 647             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 648             
 649     --mp <yes or no>  [default: no]
 650         Use multiprocessing.
 651          
 652         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 653         function employing lazy RDKit data iterable. This allows processing of
 654         arbitrary large data sets without any additional requirements memory.
 655         
 656         All input data may be optionally loaded into memory by mp.Pool.map()
 657         before starting worker processes in a process pool by setting the value
 658         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 659         
 660         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 661         data mode may adversely impact the performance. The '--mpParams' section
 662         provides additional information to tune the value of 'chunkSize'.
 663     --mpParams <Name,Value,...>  [default: auto]
 664         A comma delimited list of parameter name and value pairs to configure
 665         multiprocessing.
 666         
 667         The supported parameter names along with their default and possible
 668         values are shown below:
 669         
 670             chunkSize, auto
 671             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 672             numProcesses, auto   [ Default: mp.cpu_count() ]
 673         
 674         These parameters are used by the following functions to configure and
 675         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 676         mp.Pool.imap().
 677         
 678         The chunkSize determines chunks of input data passed to each worker
 679         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 680         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 681         
 682         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 683         automatically converts RDKit data iterable into a list, loads all data into
 684         memory, and calculates the default chunkSize using the following method
 685         as shown in its code:
 686         
 687             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 688             if extra: chunkSize += 1
 689         
 690         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 691         and 100 data items.
 692         
 693         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 694         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 695         data into memory. Consequently, the size of input data is not known a priori.
 696         It's not possible to estimate an optimal value for the chunkSize. The default 
 697         chunkSize is set to 1.
 698         
 699         The default value for the chunkSize during 'Lazy' data mode may adversely
 700         impact the performance due to the overhead associated with exchanging
 701         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 702         a larger value during 'Lazy' input data mode, based on the size of your input
 703         data and number of processes in the process pool.
 704         
 705         The mp.Pool.map() function waits for all worker processes to process all
 706         the data and return the results. The mp.Pool.imap() function, however,
 707         returns the the results obtained from worker processes as soon as the
 708         results become available for specified chunks of data.
 709         
 710         The order of data in the results returned by both mp.Pool.map() and 
 711         mp.Pool.imap() functions always corresponds to the input data.
 712     -o, --outfile <outfile>
 713         Output file name.
 714     --outfileParams <Name,Value,...>  [default: auto]
 715         A comma delimited list of parameter name and value pairs for writing
 716         molecules to files. The supported parameter names for different file
 717         formats, along with their default values, are shown below:
 718             
 719             SD: kekulize,yes,forceV3000,no
 720             
 721     --overwrite
 722         Overwrite existing files.
 723     --precision <number>  [default: 6]
 724         Floating point precision for writing energy values.
 725     --psi4OptionsParams <Name,Value,...>  [default: none]
 726         A comma delimited list of Psi4 option name and value pairs for setting
 727         global and module options. The names are 'option_name' for global options
 728         and 'module_name__option_name' for options local to a module. The
 729         specified option names must be valid Psi4 names. No validation is
 730         performed.
 731         
 732         The specified option name and  value pairs are processed and passed to
 733         psi4.set_options() as a dictionary. The supported value types are float,
 734         integer, boolean, or string. The float value string is converted into a float.
 735         The valid values for a boolean string are yes, no, true, false, on, or off. 
 736     --psi4RunParams <Name,Value,...>  [default: auto]
 737         A comma delimited list of parameter name and value pairs for configuring
 738         Psi4 jobs.
 739         
 740         The supported parameter names along with their default and possible
 741         values are shown below:
 742              
 743             MemoryInGB, 1
 744             NumThreads, 1
 745             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 746             ScratchDir, auto   [ Possivle values: DirName]
 747             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 748             
 749         These parameters control the runtime behavior of Psi4.
 750         
 751         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 752         is appended to output file name during multiprocessing as shown:
 753         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 754         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 755         all Psi4 output.
 756         
 757         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 758         of any existing Psi4 before creating new files to append output from
 759         multiple Psi4 runs.
 760         
 761         The option 'ScratchDir' is a directory path to the location of scratch
 762         files. The default value corresponds to Psi4 default. It may be used to
 763         override the deafult path.
 764     --psi4DDXSolvation <yes or no>  [default: no]
 765         Perform energy calculation in solution using domain-decomposition-based
 766         continuum solvation models [ Ref 160-161 ] The script relies on Psi4
 767         interface to the DDX module to perform these calculations. The DDX library
 768         provides a linear-scaling implementation of standard continuum solvation
 769         models using a domain- decomposition ansatz. Two solvation models are
 770         supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and
 771         Polarizable Continuum Model (PCM) [ Ref 164-165 ].
 772         
 773         The solvation energy is included in the value of the total energy written to 
 774         the output SD file. In addition, the value of solvation energy is written to the
 775         output file under its own data field label.
 776         
 777         Psi4 relies on Python module PYDDX to calculate solvation energy. It must be
 778         present in your environment.
 779     --psi4DDXSolvationParams <Name,Value,...>  [default: auto]
 780         A space delimited list of parameter name and value  pairs for calculating
 781         solvation energy using the DDX module. The supported parameter names,
 782         along with their default values, are shown below:
 783             
 784             solvationModel PCM   [ Possible  values: COSMO or PCM]
 785             solvent water   [ Possible  values: A valid solvent name]
 786             solventEpsilon None
 787             radiiSet UFF   [ Possible  values: Bondi or UFF]
 788             radiiScaling auto   [ Default: 1.2 for Bondi; 1.1 for UFF]
 789             
 790             solvationModel: Solvation model for calculating solvation energy.
 791             The corresponding Psi4 option is DDX_MODEL.
 792             
 793             solvent: Solvent to use. The corresponding Ps4 option is
 794             DDX_SOLVENT
 795             
 796             solventEpsilon: Dielectric constant of the solvent. The
 797             corresponding Psi4 option is DDX_SOLVENT_EPSILON.
 798             
 799             radiiSet: Radius set for cavity spheres. The corresponding Psi4
 800             option is DDX_RADII_SET.
 801             
 802             radiiScaling: Scaling factor for cavity spheres. The default value
 803             depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding
 804             Psi4 option is DDX_RADII_SCALING.
 805             
 806         These parameter names are automatically mapped to appropriate DDX keywords.
 807         
 808         You may specify the solvent either by directly providing a dielectric constant using
 809         'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter.
 810         The solvent name must be a valid named supported by the DDX module. For example:
 811         water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent'
 812         parameter is ignored for non-zero value of 'solvent' option.
 813         
 814         The DDX module contains a variety of addtional options to confgure the calculation
 815         of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may
 816         use '--psi4OptionsParams' to modify values for additional DDX options.
 817     --psi4DDXListSolvents
 818         List solvent names, along with dielectric values, supported by the DDX module
 819         for the calculation of solvent energy without performing any calculation.
 820     -q, --quiet <yes or no>  [default: no]
 821         Use quiet mode. The warning and information messages will not be printed.
 822     -r, --reference <text>  [default: auto]
 823         Reference wave function to use for energy calculation. Default: RHF or UHF.
 824         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 825         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 826         molecules with unpaired electrons.
 827         
 828         The specified value must be a valid Psi4 reference wave function. No validation
 829         is performed. For example: ROHF, CUHF, RKS, etc.
 830         
 831         The spin multiplicity determines the default value of reference wave function
 832         for input molecules. It is calculated from number of free radical electrons using
 833         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 834         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 835         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 836         over the calculated value of spin multiplicity.
 837     -w, --workingdir <dir>
 838         Location of working directory which defaults to the current directory.
 839 
 840 Examples:
 841     To calculate single point energy using  B3LYP/6-31G** and B3LYP/6-31+G** for
 842     non-sulfur and sulfur containing molecules in a SD file with 3D structures, use
 843     RHF and UHF for closed-shell and open-shell molecules, and write a new SD file,
 844     type:
 845 
 846         % Psi4CalculateEnergy.py  -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 847 
 848     To run the first example in multiprocessing mode on all available CPUs
 849     without loading all data into memory and write out a SD file, type:
 850 
 851         % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf
 852           -o Psi4Sample3DOut.sdf
 853 
 854     To run the first example in multiprocessing mode on all available CPUs
 855     by loading all data into memory and write out a SD file, type:
 856 
 857         % Psi4CalculateEnergy.py  --mp yes --mpParams "inputDataMode,
 858           InMemory" -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 859 
 860     To run the first example in multiprocessing mode on all available CPUs
 861     without loading all data into memory along with multiple threads for each
 862     Psi4 run and write out a SD file, type:
 863 
 864         % Psi4CalculateEnergy.py --mp yes --psi4RunParams "NumThreads,2"
 865            -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 866 
 867     To run the first example along with calculating DDX solvation energy using
 868     default solvent parameters for water and write out a SD file, type:
 869 
 870         % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf
 871           -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes
 872 
 873     To run the first example along with calculating DDX solvation energy using
 874     specific solvartion parameters and write out a SD file, type:
 875 
 876         % Psi4CalculateEnergy.py --mp yes -i Psi4Sample3D.sdf
 877           -o Psi4Sample3DOut.sdf --psi4DDXSolvation yes --psi4DDXSolvationParams
 878           "solvationModel COSMO solvent water radiiSet UFF"
 879 
 880     To calculate single point energy using a specific method and basis set for
 881     molecules in a SD containing 3D structures and write a new SD file, type:
 882 
 883         % Psi4CalculateEnergy.py  -m SCF -b aug-cc-pVDZ -i Psi4Sample3D.sdf
 884           -o Psi4Sample3DOut.sdf
 885 
 886 Author:
 887     Manish Sud(msud@san.rr.com)
 888 
 889 See also:
 890     Psi4CalculatePartialCharges.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py
 891 
 892 Copyright:
 893     Copyright (C) 2024 Manish Sud. All rights reserved.
 894 
 895     The functionality available in this script is implemented using Psi4, an
 896     open source quantum chemistry software package, and RDKit, an open
 897     source toolkit for cheminformatics developed by Greg Landrum.
 898 
 899     This file is part of MayaChemTools.
 900 
 901     MayaChemTools is free software; you can redistribute it and/or modify it under
 902     the terms of the GNU Lesser General Public License as published by the Free
 903     Software Foundation; either version 3 of the License, or (at your option) any
 904     later version.
 905 
 906 """
 907 
 908 if __name__ == "__main__":
 909     main()