MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4PerformMinimization.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Contributor: Francois Berenger
    7 #
    8 # Copyright (C) 2024 Manish Sud. All rights reserved.
    9 #
   10 # The functionality available in this script is implemented using Psi4, an
   11 # open source quantum chemistry software package, and RDKit, an open
   12 # source toolkit for cheminformatics developed by Greg Landrum.
   13 #
   14 # This file is part of MayaChemTools.
   15 #
   16 # MayaChemTools is free software; you can redistribute it and/or modify it under
   17 # the terms of the GNU Lesser General Public License as published by the Free
   18 # Software Foundation; either version 3 of the License, or (at your option) any
   19 # later version.
   20 #
   21 # MayaChemTools is distributed in the hope that it will be useful, but without
   22 # any warranty; without even the implied warranty of merchantability of fitness
   23 # for a particular purpose.  See the GNU Lesser General Public License for more
   24 # details.
   25 #
   26 # You should have received a copy of the GNU Lesser General Public License
   27 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   28 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   29 # Boston, MA, 02111-1307, USA.
   30 #
   31 
   32 from __future__ import print_function
   33 
   34 # Add local python path to the global path and import standard library modules...
   35 import os
   36 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   37 import time
   38 import re
   39 import shutil
   40 import multiprocessing as mp
   41 
   42 # Psi4 imports...
   43 if (hasattr(shutil, 'which') and shutil.which("psi4") is None):
   44     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   45     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   46     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   47     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   48     sys.stderr.write("Psi4 environment and try again.\n\n")
   49 
   50 # RDKit imports...
   51 try:
   52     from rdkit import rdBase
   53     from rdkit import Chem
   54     from rdkit.Chem import AllChem
   55 except ImportError as ErrMsg:
   56     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   57     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   58     sys.exit(1)
   59 
   60 # MayaChemTools imports...
   61 try:
   62     from docopt import docopt
   63     import MiscUtil
   64     import Psi4Util
   65     import RDKitUtil
   66 except ImportError as ErrMsg:
   67     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   68     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   69     sys.exit(1)
   70 
   71 ScriptName = os.path.basename(sys.argv[0])
   72 Options = {}
   73 OptionsInfo = {}
   74 
   75 def main():
   76     """Start execution of the script."""
   77     
   78     MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   79     
   80     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   81     
   82     # Retrieve command line arguments and options...
   83     RetrieveOptions()
   84     
   85     # Process and validate command line arguments and options...
   86     ProcessOptions()
   87     
   88     # Perform actions required by the script...
   89     PerformMinimization()
   90     
   91     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   92     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   93 
   94 def PerformMinimization():
   95     """Perform minimization."""
   96     
   97     # Setup a molecule reader...
   98     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
   99     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  100     
  101     # Set up a molecule writer...
  102     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  103     if Writer is None:
  104         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  105     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  106 
  107     MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer)
  108 
  109     if Writer is not None:
  110         Writer.close()
  111     
  112     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  113     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  114     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
  115     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  116     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount))
  117 
  118 def ProcessMolecules(Mols, Writer):
  119     """Process and minimize molecules."""
  120     
  121     if OptionsInfo["MPMode"]:
  122         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
  123     else:
  124         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
  125 
  126 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
  127     """Process and minimize molecules using a single process."""
  128     
  129     # Intialize Psi4...
  130     MiscUtil.PrintInfo("\nInitializing Psi4...")
  131     Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True)
  132     if OptionsInfo["Psi4DDXSolvationMode"]:
  133         Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"])
  134     OptionsInfo["psi4"] = Psi4Handle
  135     
  136     # Setup max iterations global variable...
  137     Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  138     
  139     # Setup conversion factor for energy units...
  140     SetupEnergyConversionFactor(Psi4Handle)
  141     
  142     if OptionsInfo["SkipConformerGeneration"]:
  143         if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  144             MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers and constrained torsions...")
  145         else:
  146             MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...")
  147     else:
  148         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...")
  149 
  150     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  151     for Mol in Mols:
  152         MolCount += 1
  153 
  154         if not CheckAndValidateMolecule(Mol, MolCount):
  155             continue
  156 
  157         # Setup 2D coordinates for SMILES input file...
  158         if OptionsInfo["SMILESInfileStatus"]:
  159             AllChem.Compute2DCoords(Mol)
  160         
  161         ValidMolCount += 1
  162         
  163         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolCount)
  164 
  165         if not CalcStatus:
  166             if not OptionsInfo["QuietMode"]:
  167                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  168                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
  169             
  170             MinimizationFailedCount += 1
  171             continue
  172         
  173         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  174         
  175         if not WriteStatus:
  176             WriteFailedCount += 1
  177     
  178     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  179 
  180 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
  181     """Process and minimize molecules using multiprocessing."""
  182     
  183     if OptionsInfo["MPLevelConformersMode"]:
  184         return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer)
  185     elif OptionsInfo["MPLevelMoleculesMode"]:
  186         return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer)
  187     else:
  188         MiscUtil.PrintError("The value, %s,  option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"]))
  189         
  190 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer):
  191     """Process and minimize molecules using multiprocessing at molecules level."""
  192 
  193     if OptionsInfo["SkipConformerGeneration"]:
  194         if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  195             MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers and constrained torsions using multiprocessing at molecules level...")
  196         else:
  197             MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers using multiprocessing at molecules level...")
  198     else:
  199         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at molecules level...")
  200 
  201     MPParams = OptionsInfo["MPParams"]
  202     
  203     # Setup data for initializing a worker process...
  204     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  205     
  206     # Setup a encoded mols data iterable for a worker process...
  207     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  208     
  209     # Setup process pool along with data initialization for each process...
  210     if not OptionsInfo["QuietMode"]:
  211         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  212         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  213     
  214     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  215     
  216     # Start processing...
  217     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  218         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  219     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  220         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  221     else:
  222         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  223     
  224     # Print out Psi4 version in the main process...
  225     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  226     Psi4Handle  = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False)
  227     OptionsInfo["psi4"] = Psi4Handle
  228     
  229     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  230     for Result in Results:
  231         MolCount += 1
  232         MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
  233         
  234         if EncodedMol is None:
  235             continue
  236         ValidMolCount += 1
  237 
  238         if not CalcStatus:
  239             MinimizationFailedCount += 1
  240             continue
  241             
  242         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  243         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  244         
  245         if not WriteStatus:
  246             WriteFailedCount += 1
  247     
  248     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  249 
  250 def InitializeWorkerProcess(*EncodedArgs):
  251     """Initialize data for a worker process."""
  252     
  253     global Options, OptionsInfo
  254 
  255     if not OptionsInfo["QuietMode"]:
  256         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  257     
  258     # Decode Options and OptionInfo...
  259     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  260     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  261 
  262     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  263     # output files for each process...
  264     OptionsInfo["Psi4Initialized"]  = False
  265 
  266 def WorkerProcess(EncodedMolInfo):
  267     """Process data for a worker process."""
  268 
  269     if not OptionsInfo["Psi4Initialized"]:
  270         InitializePsi4ForWorkerProcess()
  271     
  272     MolIndex, EncodedMol = EncodedMolInfo
  273 
  274     MolNum = MolIndex + 1
  275     CalcStatus = False
  276     ConfID = None
  277     Energy = None
  278     SolvationEnergy = None
  279     
  280     if EncodedMol is None:
  281         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  282 
  283     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  284     
  285     if not CheckAndValidateMolecule(Mol, MolNum):
  286         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  287     
  288     # Setup 2D coordinates for SMILES input file...
  289     if OptionsInfo["SMILESInfileStatus"]:
  290         AllChem.Compute2DCoords(Mol)
  291         
  292     Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(OptionsInfo["psi4"], Mol, MolNum)
  293 
  294     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy]
  295 
  296 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer):
  297     """Process and minimize molecules using multiprocessing at conformers level."""
  298 
  299     MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at conformers level...")
  300 
  301     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  302     for Mol in Mols:
  303         MolCount += 1
  304         
  305         if not CheckAndValidateMolecule(Mol, MolCount):
  306             continue
  307 
  308         # Setup 2D coordinates for SMILES input file...
  309         if OptionsInfo["SMILESInfileStatus"]:
  310             AllChem.Compute2DCoords(Mol)
  311 
  312         ValidMolCount += 1
  313         
  314         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = ProcessConformersUsingMultipleProcesses(Mol, MolCount)
  315 
  316         if not CalcStatus:
  317             if not OptionsInfo["QuietMode"]:
  318                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  319                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
  320             
  321             MinimizationFailedCount += 1
  322             continue
  323         
  324         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  325         
  326         if not WriteStatus:
  327             WriteFailedCount += 1
  328     
  329     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  330 
  331 def ProcessConformersUsingMultipleProcesses(Mol, MolNum):
  332     """Generate conformers and minimize them using multiple processes. """
  333 
  334     # Add hydrogens...
  335     Mol = Chem.AddHs(Mol)
  336 
  337     # Setup conformers...
  338     ConfIDs = EmbedMolecule(Mol, MolNum)
  339     if not len(ConfIDs):
  340         if not OptionsInfo["QuietMode"]:
  341             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  342             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
  343         return (Mol, False, None, None, None)
  344 
  345     MPParams = OptionsInfo["MPParams"]
  346     
  347     # Setup data for initializing a worker process...
  348     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  349 
  350     # Setup a encoded mols data iterable for a worker process...
  351     MolIndex = MolNum - 1
  352     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs)
  353 
  354     # Setup process pool along with data initialization for each process...
  355     if not OptionsInfo["QuietMode"]:
  356         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  357         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  358     
  359     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
  360     
  361     # Start processing...
  362     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  363         Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  364     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  365         Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  366     else:
  367         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  368     
  369     CalcEnergyMap = {}
  370     CalcSolvationEnergyMap = {}
  371     CalcFailedCount = 0
  372     for Result in Results:
  373         MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
  374 
  375         if EncodedMol is None:
  376             CalcFailedCount += 1
  377             continue
  378         
  379         if not CalcStatus:
  380             CalcFailedCount += 1
  381             continue
  382         
  383         # Retrieve minimized atom positions...
  384         MinimizedMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  385         AtomPositions = RDKitUtil.GetAtomPositions(MinimizedMol, ConfID = ConfID)
  386 
  387         # Update atom positions...
  388         RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID)
  389         
  390         CalcEnergyMap[ConfID] = Energy
  391         CalcSolvationEnergyMap[ConfID] = SolvationEnergy
  392 
  393     if CalcFailedCount:
  394         return (Mol, False, None, None, None)
  395     
  396     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
  397     MinEnergyConfID = SortedConfIDs[0]
  398     MinEnergy = CalcEnergyMap[MinEnergyConfID]
  399     SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
  400     
  401     return (Mol, True, MinEnergyConfID, MinEnergy, SolvationEnergy)
  402     
  403 def InitializeConformerWorkerProcess(*EncodedArgs):
  404     """Initialize data for a conformer worker process."""
  405     
  406     global Options, OptionsInfo
  407 
  408     if not OptionsInfo["QuietMode"]:
  409         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  410     
  411     # Decode Options and OptionInfo...
  412     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  413     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  414     
  415     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  416     # output files for each process...
  417     OptionsInfo["Psi4Initialized"]  = False
  418 
  419 def ConformerWorkerProcess(EncodedMolInfo):
  420     """Process data for a conformer worker process."""
  421 
  422     if not OptionsInfo["Psi4Initialized"]:
  423         InitializePsi4ForWorkerProcess()
  424     
  425     MolIndex, EncodedMol, ConfID = EncodedMolInfo
  426 
  427     MolNum = MolIndex + 1
  428     
  429     CalcStatus = False
  430     Energy = None
  431     SolvationEnergy = None
  432     
  433     if EncodedMol is None:
  434         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  435     
  436     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  437     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  438     
  439     if not OptionsInfo["QuietMode"]:
  440         MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
  441     
  442     Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
  443     if not Status:
  444         return [MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy]
  445     
  446     if ConvergeStatus != 0:
  447         if not OptionsInfo["QuietMode"]:
  448             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  449             MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"]))
  450     
  451     # Perform Psi4 minimization...
  452     CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], Mol, MolNum, ConfID)
  453     if not CalcStatus:
  454         if not OptionsInfo["QuietMode"]:
  455             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  456             return [MolIndex, EncodedMol, False, ConfID, Energy, SolvationEnergy]
  457 
  458     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy]
  459 
  460 def InitializePsi4ForWorkerProcess():
  461     """Initialize Psi4 for a worker process."""
  462     
  463     if OptionsInfo["Psi4Initialized"]:
  464         return
  465 
  466     OptionsInfo["Psi4Initialized"] = True
  467 
  468     if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I):
  469         # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
  470         OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
  471     else:
  472         # Update output file...
  473         OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid())
  474             
  475     # Intialize Psi4...
  476     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True)
  477     
  478     if OptionsInfo["Psi4DDXSolvationMode"]:
  479         Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"])
  480     
  481     # Setup max iterations global variable...
  482     Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  483     
  484     # Setup conversion factor for energy units...
  485     SetupEnergyConversionFactor(OptionsInfo["psi4"])
  486 
  487 def MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolNum):
  488     """Minimize molecule or conformers."""
  489 
  490     ConfID = None
  491     if OptionsInfo["SkipConformerGeneration"]:
  492         Mol, CalcStatus, Energy, SolvationEnergy = MinimizeMolecule(Psi4Handle, Mol, MolNum)
  493     else:
  494         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum)
  495 
  496     return (Mol, CalcStatus, ConfID, Energy, SolvationEnergy)
  497 
  498 def GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum = None):
  499     """Generate and minimize conformers for a molecule to get the lowest energy conformer."""
  500 
  501     # Add hydrogens..
  502     Mol = Chem.AddHs(Mol)
  503     
  504     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  505 
  506     # Setup conformers...
  507     ConfIDs = EmbedMolecule(Mol, MolNum)
  508     if not len(ConfIDs):
  509         if not OptionsInfo["QuietMode"]:
  510             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
  511         return (Mol, False, None, None, None)
  512     
  513     # Minimize conformers...
  514     CalcEnergyMap = {}
  515     CalcSolvationEnergyMap = {}
  516     for ConfID in ConfIDs:
  517         if not OptionsInfo["QuietMode"]:
  518             MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
  519             
  520         # Perform forcefield minimization...
  521         Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
  522         if not Status:
  523             return (Mol, False, None, None, None)
  524         
  525         if ConvergeStatus != 0:
  526             if not OptionsInfo["QuietMode"]:
  527                 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"]))
  528         
  529         # Perform Psi4 minimization...
  530         CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID)
  531         if not CalcStatus:
  532             if not OptionsInfo["QuietMode"]:
  533                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  534             return (Mol, False, None, None, None)
  535 
  536         CalcEnergyMap[ConfID] = Energy
  537         CalcSolvationEnergyMap[ConfID] = SolvationEnergy
  538 
  539     # Select the lowest energy conformer..
  540     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
  541     MinEnergyConfID = SortedConfIDs[0]
  542     Energy = CalcEnergyMap[MinEnergyConfID]
  543     SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
  544 
  545     return (Mol, True, MinEnergyConfID, Energy, SolvationEnergy)
  546 
  547 def MinimizeMolecule(Psi4Handle, Mol, MolNum):
  548     """Minimize molecule."""
  549 
  550     # Skip conformation generation and forcefield minimization...
  551     CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum)
  552     
  553     return (Mol, CalcStatus, Energy, SolvationEnergy)
  554     
  555 def MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID = -1):
  556     """Minimize molecule using Psi4."""
  557 
  558     # Setup a Psi4Mol...
  559     Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
  560     if Psi4Mol is None:
  561         return (False, None, None)
  562         
  563     #  Setup reference wave function...
  564     Reference = SetupReferenceWavefunction(Mol)
  565     Psi4Handle.set_options({'Reference': Reference})
  566     
  567     # Setup method name and basis set...
  568     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  569 
  570     # Setup constrained torsions...
  571     if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  572         SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol)
  573     
  574     # Optimize geometry...
  575     Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"])
  576     
  577     if not Status:
  578         PerformPsi4Cleanup(Psi4Handle)
  579         return (False, None, None)
  580 
  581     SolvationEnergy = None
  582     if OptionsInfo["Psi4DDXSolvationMode"]:
  583         SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction)
  584         
  585     # Update atom positions...
  586     AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True)
  587     RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID)
  588 
  589     # Convert energy units...
  590     if OptionsInfo["ApplyEnergyConversionFactor"]:
  591         Energy = Energy * OptionsInfo["EnergyConversionFactor"]
  592         if SolvationEnergy is not None:
  593             SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"]
  594     
  595     # Clean up
  596     PerformPsi4Cleanup(Psi4Handle)
  597     
  598     return (True, Energy, SolvationEnergy)
  599 
  600 def RetrievePsi4DDXSolvationEnergy(WaveFunction):
  601     """Retrieve DDX solvation energy."""
  602     
  603     SolvationEnergy = None
  604     try:
  605         SolvationEnergy = WaveFunction.variable('DD Solvation Energy')
  606     except Exception as ErrMsg:
  607         SolvationEnergy = None
  608         if not OptionsInfo["QuietMode"]:
  609             MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg)
  610 
  611     return SolvationEnergy
  612 
  613 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID = -1):
  614     """Minimize molecule using forcefield available in RDKit."""
  615     
  616     try:
  617         if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
  618             ConvergeStatus = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"])
  619         elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]:
  620             ConvergeStatus = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"], mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"])
  621         else:
  622             MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"])
  623     except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
  624         if not OptionsInfo["QuietMode"]:
  625             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  626             MiscUtil.PrintWarning("Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg))
  627         return (False, None)
  628     
  629     return (True, ConvergeStatus)
  630 
  631 def EmbedMolecule(Mol, MolNum = None):
  632     """Embed conformations."""
  633 
  634     ConfIDs = []
  635     
  636     MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
  637     RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"]
  638     EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
  639     UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
  640     ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
  641     UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
  642     EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
  643 
  644     try:
  645         ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, pruneRmsThresh = EmbedRMSDCutoff, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion)
  646     except ValueError as ErrMsg:
  647         if not OptionsInfo["QuietMode"]:
  648             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  649             MiscUtil.PrintWarning("Embedding failed  for molecule %s:\n%s\n" % (MolName, ErrMsg))
  650         ConfIDs = []
  651 
  652     if not OptionsInfo["QuietMode"]:
  653         if EmbedRMSDCutoff > 0:
  654             MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, MaxConfs, len(ConfIDs)))
  655         else:
  656             MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(ConfIDs)))
  657     
  658     return ConfIDs
  659 
  660 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1):
  661     """Setup a Psi4 molecule object."""
  662 
  663     if OptionsInfo["RecenterAndReorient"]:
  664         MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = False, NoReorient = False)
  665     else:
  666         MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True)
  667     
  668     try:
  669         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  670     except Exception as ErrMsg:
  671         Psi4Mol = None
  672         if not OptionsInfo["QuietMode"]:
  673             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  674             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  675             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  676     
  677     if OptionsInfo["Symmetrize"]:
  678         Psi4Mol.symmetrize(OptionsInfo["SymmetrizeTolerance"])
  679     
  680     return Psi4Mol
  681 
  682 def PerformPsi4Cleanup(Psi4Handle):
  683     """Perform clean up."""
  684 
  685     # Clean up after Psi4 run...
  686     Psi4Handle.core.clean()
  687     
  688     # Clean up any leftover scratch files...
  689     if OptionsInfo["MPMode"]:
  690         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  691 
  692 def CheckAndValidateMolecule(Mol, MolCount = None):
  693     """Validate molecule for Psi4 calculations."""
  694     
  695     if Mol is None:
  696         if not OptionsInfo["QuietMode"]:
  697             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
  698         return False
  699     
  700     MolName = RDKitUtil.GetMolName(Mol, MolCount)
  701     if not OptionsInfo["QuietMode"]:
  702         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
  703     
  704     if RDKitUtil.IsMolEmpty(Mol):
  705         if not OptionsInfo["QuietMode"]:
  706             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
  707         return False
  708     
  709     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
  710         if not OptionsInfo["QuietMode"]:
  711             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
  712         return False
  713     
  714     # Check for 3D flag...
  715     if OptionsInfo["SkipConformerGeneration"]:
  716         if not Mol.GetConformer().Is3D():
  717             if not OptionsInfo["QuietMode"]:
  718                 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
  719     
  720     # Check for missing hydrogens...
  721     if OptionsInfo["SkipConformerGeneration"]:
  722         # Otherwise, Hydrogens are always added...
  723         if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
  724             if not OptionsInfo["QuietMode"]:
  725                 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
  726     
  727     return True
  728 
  729 def SetupMethodNameAndBasisSet(Mol):
  730     """Setup method name and basis set."""
  731     
  732     MethodName = OptionsInfo["MethodName"]
  733     if OptionsInfo["MethodNameAuto"]:
  734         MethodName = "B3LYP"
  735     
  736     BasisSet = OptionsInfo["BasisSet"]
  737     if OptionsInfo["BasisSetAuto"]:
  738         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
  739     
  740     return (MethodName, BasisSet)
  741 
  742 def SetupReferenceWavefunction(Mol):
  743     """Setup reference wavefunction."""
  744     
  745     Reference = OptionsInfo["Reference"]
  746     if OptionsInfo["ReferenceAuto"]:
  747         Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF'
  748     
  749     return Reference
  750 
  751 def SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol):
  752     """Setup constrained torsions around rotatable bonds. """
  753 
  754     if not OptionsInfo["Psi4ConstrainTorsionsMode"]:
  755         return
  756 
  757     # Get torsions around rotatable bonds...
  758     TorsionAtomsIndicesList = RDKitUtil.GetTorsionsAroundRotatableBonds(Mol, GetConstrainTorsionsRotBondsPatternMol(), OptionsInfo["Psi4ConstrainTorsionsParams"]["IgnoreHydrogens"])
  759 
  760     # Freeze dihedrals corresponding to torsions around rotatable bonds...
  761     FreezeDihedralsString = SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList)
  762     if FreezeDihedralsString is None:
  763         # Clear any frozen dihedral values
  764         Psi4Handle.set_options({"OPTKING__frozen_dihedral": ""})
  765     else:
  766         # Setup frozen dihedral values...
  767         Psi4Handle.set_options({"OPTKING__frozen_dihedral": FreezeDihedralsString})
  768 
  769 def SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList):
  770     """Setup freeze dihedrals string for Psi4."""
  771 
  772     FreezeDihedralsString = None
  773     if TorsionAtomsIndicesList is None or len(TorsionAtomsIndicesList) == 0:
  774         return FreezeDihedralsString
  775 
  776     TorsionsList = []
  777     for TorsionAtomIndices in TorsionAtomsIndicesList:
  778         AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4 = TorsionAtomIndices
  779         
  780         # Atom indices start from 1 for Psi4 instead 0 for RDKit...
  781         TorsionString = "%s %s %s %s" % (AtomIndex1 +1, AtomIndex2 + 1, AtomIndex3 + 1, AtomIndex4 + 1)
  782         TorsionsList.append(TorsionString)
  783 
  784     if len(TorsionsList):
  785         FreezeDihedralsString = "%s" % "\n".join(TorsionsList)
  786         
  787     return FreezeDihedralsString
  788     
  789 def GetConstrainTorsionsRotBondsPatternMol():
  790     """Get constrain torsions rotatable bonds pattern molecule."""
  791 
  792     if "RotBondsSMARTSPatternMol" not in OptionsInfo["Psi4ConstrainTorsionsParams"]:
  793         OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"] = Chem.MolFromSmarts(OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"])
  794     
  795     return OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"]
  796 
  797 def SetupEnergyConversionFactor(Psi4Handle):
  798     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
  799     
  800     EnergyUnits = OptionsInfo["EnergyUnits"]
  801     
  802     ApplyConversionFactor = True
  803     if re.match("^kcal\/mol$", EnergyUnits, re.I):
  804         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
  805     elif re.match("^kJ\/mol$", EnergyUnits, re.I):
  806         ConversionFactor = Psi4Handle.constants.hartree2kJmol
  807     elif re.match("^eV$", EnergyUnits, re.I):
  808         ConversionFactor = Psi4Handle.constants.hartree2ev
  809     else:
  810         ApplyConversionFactor = False
  811         ConversionFactor = 1.0
  812     
  813     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
  814     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
  815 
  816 def WriteMolecule(Writer, Mol, MolNum = None, ConfID = None, Energy = None, SolvationEnergy = None):
  817     """Write molecule."""
  818 
  819     if OptionsInfo["EnergyOut"]  and Energy is not None:
  820         Energy = "%.*f" % (OptionsInfo["Precision"], Energy)
  821         Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
  822         
  823         if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None:
  824             SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy)
  825             Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy)
  826 
  827     try:
  828         if ConfID is None:
  829             Writer.write(Mol)
  830         else:
  831             Writer.write(Mol, confId = ConfID)
  832     except (ValueError, RuntimeError) as ErrMsg:
  833         if not OptionsInfo["QuietMode"]:
  834             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  835             MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
  836         return False
  837 
  838     return True
  839 
  840 def CheckAndSetupDDX():
  841     """Load pyddx and return its handle."""
  842     
  843     try:
  844         import pyddx
  845     except ImportError as ErrMsg:
  846         sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg)
  847         sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n")
  848         sys.stderr.write("You may need to install it from pypi or conda-forge.\n")
  849         sys.stderr.write("Check/update your environment and try again.\n\n")
  850         sys.exit(1)
  851 
  852     return pyddx
  853 
  854 def ProcessDDXListSolventsOption():
  855     """List solvent information provided by tge DDX module for the calculation
  856     of solvation energy."""
  857 
  858     PyDDXHandle = CheckAndSetupDDX()
  859     
  860     MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n")
  861     MiscUtil.PrintInfo("Solvent\tEpsilon")
  862     Count = 0
  863     for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()):
  864         Count += 1
  865         Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent]
  866         MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon))
  867     
  868     MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count)
  869 
  870 def ProcessOptions():
  871     """Process and validate command line arguments and options."""
  872     
  873     MiscUtil.PrintInfo("Processing options...")
  874     
  875     # Validate options...
  876     ValidateOptions()
  877     
  878     OptionsInfo["Infile"] = Options["--infile"]
  879     OptionsInfo["SMILESInfileStatus"] = True if  MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
  880     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
  881     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
  882     
  883     OptionsInfo["Outfile"] = Options["--outfile"]
  884     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
  885     
  886     OptionsInfo["Overwrite"] = Options["--overwrite"]
  887     
  888     # Method, basis set, and reference wavefunction...
  889     OptionsInfo["BasisSet"] = Options["--basisSet"]
  890     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
  891     
  892     OptionsInfo["MethodName"] = Options["--methodName"]
  893     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
  894     
  895     OptionsInfo["Reference"] = Options["--reference"]
  896     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
  897     
  898     # Run and options parameters...
  899     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"])
  900     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"])
  901 
  902     # Psi4 constrain torsions paramaters...
  903     OptionsInfo["Psi4ConstrainTorsionsMode"] = True if re.match("^yes$", Options["--psi4ConstrainTorsions"], re.I) else False
  904     ParamsDefaultInfoOverride = {"RotBondsSMARTSMode": "NonStrict"}
  905     Psi4ConstrainTorsionsParams = Psi4Util.ProcessPsi4ConstrainTorrionsParameters("--psi4ConstrainTorsionsParams", Options["--psi4ConstrainTorsionsParams"], ParamsDefaultInfoOverride)
  906     OptionsInfo["Psi4ConstrainTorsionsParams"] = Psi4ConstrainTorsionsParams
  907 
  908     if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  909         RotBondsSMARTSPattern = OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"]
  910         RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern)
  911         if RotBondsPatternMol is None:
  912             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified for parameter name \"--rotBondsSMARTSPattern\" using \"--psi4ConstrainTorsions\"option is not valid." % (RotBondsSMARTSPattern))
  913     
  914     # DDX solvation parameters...
  915     OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False
  916     Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters("--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"])
  917     Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams)
  918     OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams
  919     OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions
  920     
  921     if OptionsInfo["Psi4DDXSolvationMode"]:
  922         CheckAndSetupDDX()
  923     
  924     # Conformer generation paramaters...
  925     OptionsInfo["ConfGeneration"] = Options["--confGeneration"]
  926     OptionsInfo["SkipConformerGeneration"] = False if re.match("^yes$", Options["--confGeneration"], re.I) else True
  927 
  928     if OptionsInfo["SkipConformerGeneration"]:
  929         if OptionsInfo["SMILESInfileStatus"]:
  930             MiscUtil.PrintError("The value, %s, specified for option \"-c --confGeneration\" is not allowed during, %s, value for option \"-i, --infile\" . The input file must be a 3D SD or MOL file. " % (Options["--confGeneration"], Options["--infile"]))
  931 
  932     if not OptionsInfo["SkipConformerGeneration"]:
  933         if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  934             MiscUtil.PrintError("The value, %s, specified for option \"-c --confGeneration\" is not allowed during, %s, value for option \"--psi4ConstrainTorsions\"." % (Options["--confGeneration"], Options["--psi4ConstrainTorsions"]))
  935 
  936     ParamsDefaultInfoOverride = {"MaxConfs": 50, "MaxIters": 250}
  937     OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride)
  938     
  939     # Write energy parameters...
  940     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
  941     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
  942     
  943     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
  944     if re.match("^auto$", EnergyDataFieldLabel, re.I):
  945         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
  946     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
  947     
  948     SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"]
  949     OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel
  950     
  951     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
  952 
  953     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
  954     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
  955 
  956     # Multiprocessing level...
  957     MPLevelMoleculesMode = False
  958     MPLevelConformersMode = False
  959     MPLevel = Options["--mpLevel"]
  960     if re.match("^Molecules$", MPLevel, re.I):
  961         MPLevelMoleculesMode = True
  962     elif re.match("^Conformers$", MPLevel, re.I):
  963         if  OptionsInfo["SkipConformerGeneration"]:
  964             MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not allowed during, %s, value of option \"--confGeneration\"  . " % (MPLevel, Options["--confGeneration"]))
  965         MPLevelConformersMode = True
  966     else:
  967         MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel)
  968     OptionsInfo["MPLevel"] = MPLevel
  969     OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
  970     OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
  971     
  972     OptionsInfo["Precision"] = int(Options["--precision"])
  973     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
  974 
  975     RecenterAndReorient = Options["--recenterAndReorient"]
  976     if re.match("^auto$", RecenterAndReorient, re.I):
  977         RecenterAndReorient  = False if  OptionsInfo["SkipConformerGeneration"] else True
  978     else:
  979         RecenterAndReorient  = True if re.match("^yes$", RecenterAndReorient, re.I) else False
  980     OptionsInfo["RecenterAndReorient"] = RecenterAndReorient
  981     
  982     Symmetrize = Options["--symmetrize"]
  983     if re.match("^auto$", Symmetrize, re.I):
  984         Symmetrize  = True if  RecenterAndReorient else False
  985     else:
  986         Symmetrize  = True if re.match("^yes$", Symmetrize, re.I) else False
  987     OptionsInfo["Symmetrize"] = Symmetrize
  988     
  989     OptionsInfo["SymmetrizeTolerance"] = float(Options["--symmetrizeTolerance"])
  990 
  991 def RetrieveOptions():
  992     """Retrieve command line arguments and options."""
  993     
  994     # Get options...
  995     global Options
  996     Options = docopt(_docoptUsage_)
  997     
  998     # Set current working directory to the specified directory...
  999     WorkingDir = Options["--workingdir"]
 1000     if WorkingDir:
 1001         os.chdir(WorkingDir)
 1002     
 1003     # Handle examples option...
 1004     if "--examples" in Options and Options["--examples"]:
 1005         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1006         sys.exit(0)
 1007     
 1008     # Handle listing of solvent information...
 1009     if  Options and Options["--psi4DDXListSolvents"]:
 1010         ProcessDDXListSolventsOption()
 1011         sys.exit(0)
 1012 
 1013 def ValidateOptions():
 1014     """Validate option values."""
 1015     
 1016     MiscUtil.ValidateOptionTextValue("-c, --confGeneration", Options["--confGeneration"], "yes no")
 1017     
 1018     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 1019     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 1020     
 1021     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1022     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 1023     
 1024     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 1025     
 1026     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1027     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1028     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1029     
 1030     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1031     MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
 1032     
 1033     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1034     
 1035     MiscUtil.ValidateOptionTextValue("--psi4ConstrainTorsions", Options["--psi4ConstrainTorsions"], "yes no")
 1036     
 1037     MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no")
 1038     
 1039     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 1040     
 1041     MiscUtil.ValidateOptionTextValue("--recenterAndReorient", Options["--recenterAndReorient"], "yes no auto")
 1042     MiscUtil.ValidateOptionTextValue("--symmetrize", Options["--symmetrize"], "yes no auto")
 1043     MiscUtil.ValidateOptionFloatValue("--symmetrizeTolerance", Options["--symmetrizeTolerance"], {">": 0})
 1044 
 1045 # Setup a usage string for docopt...
 1046 _docoptUsage_ = """
 1047 Psi4PerformMinimization.py - Perform structure minimization
 1048 
 1049 Usage:
 1050     Psi4PerformMinimization.py [--basisSet <text>] [--confGeneration <yes or no> ] [--confParams <Name,Value,...>]
 1051                                [--energyOut <yes or no>] [--energyDataFieldLabel <text>] [--energyUnits <text>]
 1052                                [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>]
 1053                                [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
 1054                                [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
 1055                                [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 1056                                [--psi4ConstrainTorsions <yes or no>] [--psi4ConstrainTorsionsParams <Name,Value,...>]
 1057                                [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>]
 1058                                [--quiet <yes or no>] [--reference <text>] [--recenterAndReorient <yes or no>]
 1059                                [--symmetrize <yes or no>] [--symmetrizeTolerance <number>] [-w <dir>] -i <infile> -o <outfile> 
 1060     Psi4PerformMinimization.py --psi4DDXListSolvents
 1061     Psi4PerformMinimization.py -h | --help | -e | --examples
 1062 
 1063 Description:
 1064     Generate 3D structures for molecules using a combination of distance geometry
 1065     and forcefield minimization followed by geometry optimization using a quantum
 1066     chemistry method. A set of initial 3D structures are generated for a molecule 
 1067     employing distance geometry. The 3D structures in the conformation ensemble
 1068     are sequentially minimized using forcefield and a quantum chemistry method.
 1069     The conformation with lowest energy is selected to represent the final structure.
 1070     An option is available to skip the generation of the conformation ensemble along
 1071     with forcefield minimization and simply perform minimization on the initial 3D
 1072     structure using a quantum chemistry method.
 1073 
 1074     A Psi4 XYZ format geometry string is automatically generated for each molecule
 1075     in input file. It contains atom symbols and 3D coordinates for each atom in a
 1076     molecule. In addition, the formal charge and spin multiplicity are present in the
 1077     the geometry string. These values are either retrieved from molecule properties
 1078     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 1079     molecule.
 1080 
 1081     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 1082     .csv, .tsv, .txt)
 1083 
 1084     The supported output file formats are: SD (.sdf, .sd)
 1085 
 1086 Options:
 1087     -b, --basisSet <text>  [default: auto]
 1088         Basis set to use for energy minimization. Default: 6-31+G** for sulfur
 1089         containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 1090         value must be a valid Psi4 basis set. No validation is performed.
 1091         
 1092         The following list shows a representative sample of basis sets available
 1093         in Psi4:
 1094             
 1095             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 1096             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 1097             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 1098             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 1099             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 1100             
 1101     -c, --confGeneration <yes or no>  [default: yes]
 1102         Generate an initial 3D conformation ensemble using distance geometry and
 1103         forcefield minimization before final geometry optimization by a specified
 1104         method name and basis set. Possible values: yes or no.
 1105         
 1106         The 'No' value skips the generation of conformations employing distance
 1107         geometry and forcefield minimization. The 3D structures in input file are
 1108         minimized by a quantum method. It is not allowed for SMILES input file.
 1109     --confParams <Name,Value,...>  [default: auto]
 1110         A comma delimited list of parameter name and value pairs for generating
 1111         initial sets of 3D conformations for molecules. The 3D conformation ensemble
 1112         is generated using distance geometry and forcefield functionality available
 1113         in RDKit. The 3D structures in the conformation ensemble are subsequently
 1114         minimized by a quantum chemistry method available in Psi4.
 1115        
 1116         The supported parameter names along with their default values are shown
 1117         below:
 1118             
 1119             confMethod,ETKDGv2,
 1120             forceField,MMFF, forceFieldMMFFVariant,MMFF94,
 1121             enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
 1122             maxIters,250,randomSeed,auto
 1123             
 1124             confMethod,ETKDGv2   [ Possible values: SDG, KDG, ETDG,
 1125                 ETKDG , or ETKDGv2]
 1126             forceField, MMFF   [ Possible values: UFF or MMFF ]
 1127             forceFieldMMFFVariant,MMFF94   [ Possible values: MMFF94 or MMFF94s ]
 1128             enforceChirality,yes   [ Possible values: yes or no ]
 1129             embedRMSDCutoff,0.5   [ Possible values: number or None]
 1130             
 1131         confMethod: Conformation generation methodology for generating initial 3D
 1132         coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
 1133         Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
 1134         with Distance Geometry (KDG) and Experimental Torsion-angle preference
 1135         along with basic Knowledge-terms and Distance Geometry (ETKDG or
 1136         ETKDGv2) [Ref 129, 167] .
 1137         
 1138         forceField: Forcefield method to use for energy minimization. Possible
 1139         values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics
 1140         Force Field [ Ref 83-87 ] .
 1141         
 1142         enforceChirality: Enforce chirality for defined chiral centers during
 1143         forcefield minimization.
 1144         
 1145         maxConfs: Maximum number of conformations to generate for each molecule
 1146         during the generation of an initial 3D conformation ensemble using 
 1147         conformation generation methodology. The conformations are minimized
 1148         using the specified forcefield and a quantum chemistry method. The lowest
 1149         energy conformation is written to the output file.
 1150         
 1151         embedRMSDCutoff: RMSD cutoff for retaining initial set conformers embedded
 1152         using distance geometry and before forcefield minimization. All embedded
 1153         conformers are kept for 'None' value. Otherwise, only those conformers which
 1154         are different from each other by the specified RMSD cutoff, 0.5 by default,
 1155         are kept. The first embedded conformer is always retained.
 1156         
 1157         maxIters: Maximum number of iterations to perform for each conformation
 1158         during forcefield minimization.
 1159         
 1160         randomSeed: Seed for the random number generator for reproducing initial
 1161         3D coordinates in a conformation ensemble. Default is to use a random seed.
 1162     --energyOut <yes or no>  [default: yes]
 1163         Write out energy values.
 1164     --energyDataFieldLabel <text>  [default: auto]
 1165         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 1166     --energyUnits <text>  [default: kcal/mol]
 1167         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 1168     -e, --examples
 1169         Print examples.
 1170     -h, --help
 1171         Print this help message.
 1172     -i, --infile <infile>
 1173         Input file name.
 1174     --infileParams <Name,Value,...>  [default: auto]
 1175         A comma delimited list of parameter name and value pairs for reading
 1176         molecules from files. The supported parameter names for different file
 1177         formats, along with their default values, are shown below:
 1178             
 1179             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1180             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1181                 smilesTitleLine,auto,sanitize,yes
 1182             
 1183         Possible values for smilesDelimiter: space, comma or tab.
 1184     --maxIters <number>  [default: 50]
 1185         Maximum number of iterations to perform for each molecule or conformer
 1186         during energy minimization by a quantum chemistry method.
 1187     -m, --methodName <text>  [default: auto]
 1188         Method to use for energy minimization. Default: B3LYP [ Ref 150 ]. The
 1189         specified value must be a valid Psi4 method name. No validation is
 1190         performed.
 1191         
 1192         The following list shows a representative sample of methods available
 1193         in Psi4:
 1194             
 1195             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1196             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1197             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1198             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1199             
 1200     --mp <yes or no>  [default: no]
 1201         Use multiprocessing.
 1202          
 1203         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1204         function employing lazy RDKit data iterable. This allows processing of
 1205         arbitrary large data sets without any additional requirements memory.
 1206         
 1207         All input data may be optionally loaded into memory by mp.Pool.map()
 1208         before starting worker processes in a process pool by setting the value
 1209         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1210         
 1211         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1212         data mode may adversely impact the performance. The '--mpParams' section
 1213         provides additional information to tune the value of 'chunkSize'.
 1214     --mpLevel <Molecules or Conformers>  [default: Molecules]
 1215         Perform multiprocessing at molecules or conformers level. Possible values:
 1216         Molecules or Conformers. The 'Molecules' value starts a process pool at the
 1217         molecules level. All conformers of a molecule are processed in a single
 1218         process. The 'Conformers' value, however, starts a process pool at the 
 1219         conformers level. Each conformer of a molecule is processed in an individual
 1220         process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
 1221         using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
 1222         a large number of Psi4 output files.
 1223     --mpParams <Name,Value,...>  [default: auto]
 1224         A comma delimited list of parameter name and value pairs to configure
 1225         multiprocessing.
 1226         
 1227         The supported parameter names along with their default and possible
 1228         values are shown below:
 1229         
 1230             chunkSize, auto
 1231             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1232             numProcesses, auto   [ Default: mp.cpu_count() ]
 1233         
 1234         These parameters are used by the following functions to configure and
 1235         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1236         mp.Pool.imap().
 1237         
 1238         The chunkSize determines chunks of input data passed to each worker
 1239         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1240         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1241         
 1242         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1243         automatically converts RDKit data iterable into a list, loads all data into
 1244         memory, and calculates the default chunkSize using the following method
 1245         as shown in its code:
 1246         
 1247             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1248             if extra: chunkSize += 1
 1249         
 1250         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1251         and 100 data items.
 1252         
 1253         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1254         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1255         data into memory. Consequently, the size of input data is not known a priori.
 1256         It's not possible to estimate an optimal value for the chunkSize. The default 
 1257         chunkSize is set to 1.
 1258         
 1259         The default value for the chunkSize during 'Lazy' data mode may adversely
 1260         impact the performance due to the overhead associated with exchanging
 1261         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1262         a larger value during 'Lazy' input data mode, based on the size of your input
 1263         data and number of processes in the process pool.
 1264         
 1265         The mp.Pool.map() function waits for all worker processes to process all
 1266         the data and return the results. The mp.Pool.imap() function, however,
 1267         returns the the results obtained from worker processes as soon as the
 1268         results become available for specified chunks of data.
 1269         
 1270         The order of data in the results returned by both mp.Pool.map() and 
 1271         mp.Pool.imap() functions always corresponds to the input data.
 1272     -o, --outfile <outfile>
 1273         Output file name.
 1274     --outfileParams <Name,Value,...>  [default: auto]
 1275         A comma delimited list of parameter name and value pairs for writing
 1276         molecules to files. The supported parameter names for different file
 1277         formats, along with their default values, are shown below:
 1278             
 1279             SD: kekulize,yes,forceV3000,no
 1280             
 1281     --overwrite
 1282         Overwrite existing files.
 1283     --precision <number>  [default: 6]
 1284         Floating point precision for writing energy values.
 1285     --psi4OptionsParams <Name,Value,...>  [default: none]
 1286         A comma delimited list of Psi4 option name and value pairs for setting
 1287         global and module options. The names are 'option_name' for global options
 1288         and 'module_name__option_name' for options local to a module. The
 1289         specified option names must be valid Psi4 names. No validation is
 1290         performed.
 1291         
 1292         The specified option name and  value pairs are processed and passed to
 1293         psi4.set_options() as a dictionary. The supported value types are float,
 1294         integer, boolean, or string. The float value string is converted into a float.
 1295         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1296     --psi4RunParams <Name,Value,...>  [default: auto]
 1297         A comma delimited list of parameter name and value pairs for configuring
 1298         Psi4 jobs.
 1299         
 1300         The supported parameter names along with their default and possible
 1301         values are shown below:
 1302              
 1303             MemoryInGB, 1
 1304             NumThreads, 1
 1305             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1306             ScratchDir, auto   [ Possivle values: DirName]
 1307             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1308             
 1309         These parameters control the runtime behavior of Psi4.
 1310         
 1311         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1312         is appended to output file name during multiprocessing as shown:
 1313         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1314         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1315         all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 
 1316         'Conformers' of '--mpLevel' option.
 1317         
 1318         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1319         of any existing Psi4 before creating new files to append output from
 1320         multiple Psi4 runs.
 1321         
 1322         The option 'ScratchDir' is a directory path to the location of scratch
 1323         files. The default value corresponds to Psi4 default. It may be used to
 1324         override the deafult path.
 1325     --psi4ConstrainTorsions <yes or no>  [default: no]
 1326         Constrain torsions around rotatable bonds during Psi4 minimization.
 1327     --psi4ConstrainTorsionsParams <Name,Value,...>  [default: auto]
 1328         A space delimited list of parameter name and value  pairs for constraining
 1329         torsions around rotatable bonds during Psi4 minimization. The supported
 1330         parameter names, along with their default values, are shown below:
 1331             
 1332             ignoreHydrogens, yes,  [ Possible values: yes or no ]
 1333             rotBondsSMARTSMode, NonStrict,   [ Possible  values: NonStrict,
 1334                 SemiStrict, Strict, Specify ]
 1335             rotBondsSMARTSPattern, auto  [ Possible values: Valid SMARTS ]
 1336         
 1337         ignoreHydrogens: Ignore torsions around rotatable bonds containing
 1338         hydrogens during Psi4 minimization.
 1339         
 1340         rotBondsSMARTSMode: SMARTS pattern to use for identifying rotatable bonds
 1341         for constraining torsions during Psi4 minimization. Possible values: NonStrict,
 1342         SemiStrict, Strict or Specify. The following SMARTS patterns are used to identify
 1343         rotatable bonds for different modes:
 1344             
 1345             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 1346             
 1347             SemiStrict:
 1348             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1349             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1350             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1351             
 1352             Strict:
 1353             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1354             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 1355             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 1356             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1357             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1358             
 1359         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 1360         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 1361          specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 1362          derived from 'Strict' SMARTS patterns for its usage in this script.
 1363         
 1364         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 1365         choosing 'Specify' value for 'rotBondsSMARTSMode' parameter and providing
 1366         its value via '--rotBondsSMARTSPattern'.
 1367         
 1368         rotBondsSMARTSPattern: SMARTS pattern for identifying rotatable bonds for
 1369         constraining torsions during Psi4 minimization. This option is only valid for
 1370         'Specify' value of 'rotBondsSMARTSMode' parameter.
 1371     --psi4DDXSolvation <yes or no>  [default: no]
 1372         Perform minimization in solution using domain-decomposition-based
 1373         continuum solvation models [ Ref 160-161 ]. The script relies on Psi4
 1374         interface to the DDX module to perform these calculations. The DDX library
 1375         provides a linear-scaling implementation of standard continuum solvation
 1376         models using a domain- decomposition ansatz. Two solvation models are
 1377         supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and
 1378         Polarizable Continuum Model (PCM) [ Ref 164-165 ].
 1379         
 1380         The solvation energy is included in the value of the total energy written to 
 1381         the output SD file. In addition, the value of solvation energy is written to the
 1382         output file under its own data field label.
 1383         
 1384         Psi4 relies on Python module PYDDX to calculate solvation energy. It must be
 1385         present in your environment.
 1386     --psi4DDXSolvationParams <Name,Value,...>  [default: auto]
 1387         A space delimited list of parameter name and value  pairs for calculating
 1388         solvation energy using the DDX module. The supported parameter names,
 1389         along with their default values, are shown below:
 1390             
 1391             solvationModel PCM   [ Possible  values: COSMO or PCM]
 1392             solvent water   [ Possible  values: A valid solvent name]
 1393             solventEpsilon None
 1394             radiiSet UFF   [ Possible  values: Bondi or UFF]
 1395             radiiScaling auto   [ Default: 1.2 for Bondi; 1.1 for UFF]
 1396             
 1397             solvationModel: Solvation model for calculating solvation energy.
 1398             The corresponding Psi4 option is DDX_MODEL.
 1399             
 1400             solvent: Solvent to use. The corresponding Ps4 option is
 1401             DDX_SOLVENT
 1402             
 1403             solventEpsilon: Dielectric constant of the solvent. The
 1404             corresponding Psi4 option is DDX_SOLVENT_EPSILON.
 1405             
 1406             radiiSet: Radius set for cavity spheres. The corresponding Psi4
 1407             option is DDX_RADII_SET.
 1408             
 1409             radiiScaling: Scaling factor for cavity spheres. The default value
 1410             depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding
 1411             Psi4 option is DDX_RADII_SCALING.
 1412             
 1413         These parameter names are automatically mapped to appropriate DDX keywords.
 1414         
 1415         You may specify the solvent either by directly providing a dielectric constant using
 1416         'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter.
 1417         The solvent name must be a valid named supported by the DDX module. For example:
 1418         water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent'
 1419         parameter is ignored for non-zero value of 'solvent' option.
 1420         
 1421         The DDX module contains a variety of addtional options to confgure the calculation
 1422         of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may
 1423         use '--psi4OptionsParams' to modify values for additional DDX options.
 1424     --psi4DDXListSolvents
 1425         List solvent names, along with dielectric values, supported by the DDX module
 1426         for the calculation of solvent energy without performing any calculation.
 1427     -q, --quiet <yes or no>  [default: no]
 1428         Use quiet mode. The warning and information messages will not be printed.
 1429     -r, --reference <text>  [default: auto]
 1430         Reference wave function to use for energy calculation. Default: RHF or UHF.
 1431         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 1432         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 1433         molecules with unpaired electrons.
 1434         
 1435         The specified value must be a valid Psi4 reference wave function. No validation
 1436         is performed. For example: ROHF, CUHF, RKS, etc.
 1437         
 1438         The spin multiplicity determines the default value of reference wave function
 1439         for input molecules. It is calculated from number of free radical electrons using
 1440         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1441         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1442         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1443         over the calculated value of spin multiplicity.
 1444     --recenterAndReorient <yes or no>  [default: auto]
 1445         Recenter and reorient a molecule during creation of a Psi4 molecule from
 1446         a geometry string. Default: 'No' during 'No' value of '--confGeneration';
 1447         Otherwise, 'Yes'.
 1448         
 1449         The 'No' values allows the minimization of a molecule in its initial 3D
 1450         coordinate space in input file or conformer ensemble.
 1451     --symmetrize <yes or no>  [default: auto]
 1452         Symmetrize molecules before energy minimization. Default: 'Yes' during
 1453         'Yes' value of '--recenterAndReorient'; Otherwise, 'No'. The psi4 function,
 1454         psi4mol.symmetrize( SymmetrizeTolerance), is called to symmetrize
 1455         the molecule before calling psi4.optimize().
 1456         
 1457         The 'No' value of '--symmetrize' during 'Yes' value of '--recenterAndReorient'
 1458         may cause psi4.optimize() to fail with a 'Point group changed...' error
 1459         message.
 1460     --symmetrizeTolerance <number>  [default: 0.01]
 1461         Symmetry tolerance for '--symmetrize'.
 1462     -w, --workingdir <dir>
 1463         Location of working directory which defaults to the current directory.
 1464 
 1465 Examples:
 1466     To generate an initial conformer ensemble of up to 50 conformations using a
 1467     combination of ETKDGv2 distance geometry methodology, applying RMSD cutoff
 1468     of 0.5 and MMFF forcefield minimization, followed by energy minimization
 1469     using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and sulfur containing
 1470     molecules in a SMILES file and  write out a SD file containing minimum energy
 1471     structure corresponding to each molecule, type:
 1472 
 1473         % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1474 
 1475     To run the first example in a quiet mode and write out a SD file, type:
 1476 
 1477         % Psi4PerformMinimization.py -q yes -i Psi4Sample.smi -o
 1478           Psi4SampleOut.sdf
 1479 
 1480     To run the first example along with performing energy mimimization in solution
 1481     using default DDX solvation parameters for water and write out a SD file, type:
 1482 
 1483         % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1484           --psi4DDXSolvation yes
 1485 
 1486     To run the first example in multiprocessing mode at molecules level on all
 1487     available CPUs without loading all data into memory and write out a SD file,
 1488     type:
 1489 
 1490         % Psi4PerformMinimization.py --mp yes -i Psi4Sample.smi -o
 1491           Psi4SampleOut.sdf
 1492 
 1493     To run the first example in multiprocessing mode at conformers level on all
 1494     available CPUs without loading all data into memory and write out a SD file,
 1495     type:
 1496 
 1497         % Psi4PerformMinimization.py --mp yes --mpLevel Conformers
 1498           -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1499 
 1500     To run the first example in multiprocessing mode at molecules level on all
 1501     available CPUs by loading all data into memory and write out a SD file, type:
 1502 
 1503         % Psi4PerformMinimization.py  --mp yes --mpParams "inputDataMode,
 1504           InMemory" -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1505 
 1506     To run the first example in multiprocessing mode at molecules level on specific
 1507     number of CPUs and chunk size without loading all data into memory and write
 1508     out a SD file, type:
 1509 
 1510         % Psi4PerformMinimization.py  --mp yes --mpParams "inputDataMode,Lazy,
 1511           numProcesses,4,chunkSize,8" -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1512 
 1513     To generate an initial conformer ensemble of up to 20 conformations using a
 1514     combination of ETKDGv2 distance geometry methodology and MMFF94s forcefield
 1515     minimization followed by energy minimization for a maxium of 20 iterations
 1516     using B3LYP/6-31+G** molecules in a SMILES file and  write out a SD file
 1517     containing minimum energy structure along with energy in specific units,
 1518     type:
 1519 
 1520         % Psi4PerformMinimization.py --confGeneration yes --confParams
 1521           "confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
 1522           maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
 1523           -b "6-31+G**" --maxIters 20 -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1524 
 1525     To minimize molecules in a 3D files using B3LYP/6-31G** and B3LYP/6-31+G**
 1526     for non-sulfur and sulfur containing molecules for a maximum of 25 iterations
 1527     without generating any conformations and write out a SD file containing 
 1528     minimum energy structures corresponding to each molecule, type:
 1529 
 1530         % Psi4PerformMinimization.py --confGeneration no --maxIters 25
 1531           -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1532 
 1533     To run the first example for molecules in a CSV SMILES file, SMILES strings
 1534     in column 1, name column 2, and write out a SD file, type:
 1535 
 1536         % Psi4PerformMinimization.py --infileParams "smilesDelimiter,comma,
 1537           smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
 1538           -i Psi4Sample.csv -o Psi4SampleOut.sdf
 1539 
 1540 Author:
 1541 
 1542     Manish Sud(msud@san.rr.com)
 1543 
 1544 Contributor:
 1545     Francois Berenger
 1546 
 1547 See also:
 1548     Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py
 1549 
 1550 Copyright:
 1551     Copyright (C) 2024 Manish Sud. All rights reserved.
 1552 
 1553     The functionality available in this script is implemented using Psi4, an
 1554     open source quantum chemistry software package, and RDKit, an open
 1555     source toolkit for cheminformatics developed by Greg Landrum.
 1556 
 1557     This file is part of MayaChemTools.
 1558 
 1559     MayaChemTools is free software; you can redistribute it and/or modify it under
 1560     the terms of the GNU Lesser General Public License as published by the Free
 1561     Software Foundation; either version 3 of the License, or (at your option) any
 1562     later version.
 1563 
 1564 """
 1565 
 1566 if __name__ == "__main__":
 1567     main()