MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4PerformMinimization.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2024 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using Psi4, an
    9 # open source quantum chemistry software package, and RDKit, an open
   10 # source toolkit for cheminformatics developed by Greg Landrum.
   11 #
   12 # This file is part of MayaChemTools.
   13 #
   14 # MayaChemTools is free software; you can redistribute it and/or modify it under
   15 # the terms of the GNU Lesser General Public License as published by the Free
   16 # Software Foundation; either version 3 of the License, or (at your option) any
   17 # later version.
   18 #
   19 # MayaChemTools is distributed in the hope that it will be useful, but without
   20 # any warranty; without even the implied warranty of merchantability of fitness
   21 # for a particular purpose.  See the GNU Lesser General Public License for more
   22 # details.
   23 #
   24 # You should have received a copy of the GNU Lesser General Public License
   25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   27 # Boston, MA, 02111-1307, USA.
   28 #
   29 
   30 from __future__ import print_function
   31 
   32 # Add local python path to the global path and import standard library modules...
   33 import os
   34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   35 import time
   36 import re
   37 import shutil
   38 import multiprocessing as mp
   39 
   40 # Psi4 imports...
   41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None):
   42     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   43     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   44     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   45     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   46     sys.stderr.write("Psi4 environment and try again.\n\n")
   47 
   48 # RDKit imports...
   49 try:
   50     from rdkit import rdBase
   51     from rdkit import Chem
   52     from rdkit.Chem import AllChem
   53 except ImportError as ErrMsg:
   54     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   55     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   56     sys.exit(1)
   57 
   58 # MayaChemTools imports...
   59 try:
   60     from docopt import docopt
   61     import MiscUtil
   62     import Psi4Util
   63     import RDKitUtil
   64 except ImportError as ErrMsg:
   65     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   66     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   67     sys.exit(1)
   68 
   69 ScriptName = os.path.basename(sys.argv[0])
   70 Options = {}
   71 OptionsInfo = {}
   72 
   73 def main():
   74     """Start execution of the script."""
   75     
   76     MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   77     
   78     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   79     
   80     # Retrieve command line arguments and options...
   81     RetrieveOptions()
   82     
   83     # Process and validate command line arguments and options...
   84     ProcessOptions()
   85     
   86     # Perform actions required by the script...
   87     PerformMinimization()
   88     
   89     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   90     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   91 
   92 def PerformMinimization():
   93     """Perform minimization."""
   94     
   95     # Setup a molecule reader...
   96     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
   97     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
   98     
   99     # Set up a molecule writer...
  100     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  101     if Writer is None:
  102         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  103     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  104 
  105     MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer)
  106 
  107     if Writer is not None:
  108         Writer.close()
  109     
  110     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  111     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  112     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
  113     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  114     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount))
  115 
  116 def ProcessMolecules(Mols, Writer):
  117     """Process and minimize molecules."""
  118     
  119     if OptionsInfo["MPMode"]:
  120         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
  121     else:
  122         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
  123 
  124 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
  125     """Process and minimize molecules using a single process."""
  126     
  127     # Intialize Psi4...
  128     MiscUtil.PrintInfo("\nInitializing Psi4...")
  129     Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True)
  130     if OptionsInfo["Psi4DDXSolvationMode"]:
  131         Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"])
  132     OptionsInfo["psi4"] = Psi4Handle
  133     
  134     # Setup max iterations global variable...
  135     Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  136     
  137     # Setup conversion factor for energy units...
  138     SetupEnergyConversionFactor(Psi4Handle)
  139     
  140     if OptionsInfo["SkipConformerGeneration"]:
  141         MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...")
  142     else:
  143         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...")
  144 
  145     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  146     for Mol in Mols:
  147         MolCount += 1
  148 
  149         if not CheckAndValidateMolecule(Mol, MolCount):
  150             continue
  151 
  152         # Setup 2D coordinates for SMILES input file...
  153         if OptionsInfo["SMILESInfileStatus"]:
  154             AllChem.Compute2DCoords(Mol)
  155         
  156         ValidMolCount += 1
  157         
  158         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolCount)
  159 
  160         if not CalcStatus:
  161             if not OptionsInfo["QuietMode"]:
  162                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  163                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
  164             
  165             MinimizationFailedCount += 1
  166             continue
  167         
  168         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  169         
  170         if not WriteStatus:
  171             WriteFailedCount += 1
  172     
  173     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  174 
  175 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
  176     """Process and minimize molecules using multiprocessing."""
  177     
  178     if OptionsInfo["MPLevelConformersMode"]:
  179         return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer)
  180     elif OptionsInfo["MPLevelMoleculesMode"]:
  181         return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer)
  182     else:
  183         MiscUtil.PrintError("The value, %s,  option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"]))
  184         
  185 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer):
  186     """Process and minimize molecules using multiprocessing at molecules level."""
  187 
  188     if OptionsInfo["SkipConformerGeneration"]:
  189         MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers using multiprocessing at molecules level...")
  190     else:
  191         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at molecules level...")
  192 
  193     MPParams = OptionsInfo["MPParams"]
  194     
  195     # Setup data for initializing a worker process...
  196     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  197     
  198     # Setup a encoded mols data iterable for a worker process...
  199     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  200     
  201     # Setup process pool along with data initialization for each process...
  202     if not OptionsInfo["QuietMode"]:
  203         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  204         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  205     
  206     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  207     
  208     # Start processing...
  209     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  210         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  211     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  212         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  213     else:
  214         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  215     
  216     # Print out Psi4 version in the main process...
  217     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  218     Psi4Handle  = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False)
  219     OptionsInfo["psi4"] = Psi4Handle
  220     
  221     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  222     for Result in Results:
  223         MolCount += 1
  224         MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
  225         
  226         if EncodedMol is None:
  227             continue
  228         ValidMolCount += 1
  229 
  230         if not CalcStatus:
  231             MinimizationFailedCount += 1
  232             continue
  233             
  234         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  235         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  236         
  237         if not WriteStatus:
  238             WriteFailedCount += 1
  239     
  240     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  241 
  242 def InitializeWorkerProcess(*EncodedArgs):
  243     """Initialize data for a worker process."""
  244     
  245     global Options, OptionsInfo
  246 
  247     if not OptionsInfo["QuietMode"]:
  248         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  249     
  250     # Decode Options and OptionInfo...
  251     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  252     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  253     
  254     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  255     # output files for each process...
  256     OptionsInfo["Psi4Initialized"]  = False
  257 
  258 def WorkerProcess(EncodedMolInfo):
  259     """Process data for a worker process."""
  260 
  261     if not OptionsInfo["Psi4Initialized"]:
  262         InitializePsi4ForWorkerProcess()
  263     
  264     MolIndex, EncodedMol = EncodedMolInfo
  265 
  266     MolNum = MolIndex + 1
  267     CalcStatus = False
  268     ConfID = None
  269     Energy = None
  270     SolvationEnergy = None
  271     
  272     if EncodedMol is None:
  273         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  274 
  275     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  276     
  277     if not CheckAndValidateMolecule(Mol, MolNum):
  278         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  279     
  280     # Setup 2D coordinates for SMILES input file...
  281     if OptionsInfo["SMILESInfileStatus"]:
  282         AllChem.Compute2DCoords(Mol)
  283         
  284     Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(OptionsInfo["psi4"], Mol, MolNum)
  285 
  286     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy]
  287 
  288 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer):
  289     """Process and minimize molecules using multiprocessing at conformers level."""
  290 
  291     MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at conformers level...")
  292 
  293     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  294     for Mol in Mols:
  295         MolCount += 1
  296         
  297         if not CheckAndValidateMolecule(Mol, MolCount):
  298             continue
  299 
  300         # Setup 2D coordinates for SMILES input file...
  301         if OptionsInfo["SMILESInfileStatus"]:
  302             AllChem.Compute2DCoords(Mol)
  303 
  304         ValidMolCount += 1
  305         
  306         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = ProcessConformersUsingMultipleProcesses(Mol, MolCount)
  307 
  308         if not CalcStatus:
  309             if not OptionsInfo["QuietMode"]:
  310                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  311                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
  312             
  313             MinimizationFailedCount += 1
  314             continue
  315         
  316         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  317         
  318         if not WriteStatus:
  319             WriteFailedCount += 1
  320     
  321     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  322 
  323 def ProcessConformersUsingMultipleProcesses(Mol, MolNum):
  324     """Generate conformers and minimize them using multiple processes. """
  325 
  326     # Add hydrogens...
  327     Mol = Chem.AddHs(Mol)
  328 
  329     # Setup conformers...
  330     ConfIDs = EmbedMolecule(Mol, MolNum)
  331     if not len(ConfIDs):
  332         if not OptionsInfo["QuietMode"]:
  333             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  334             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
  335         return (Mol, False, None, None, None)
  336 
  337     MPParams = OptionsInfo["MPParams"]
  338     
  339     # Setup data for initializing a worker process...
  340     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  341 
  342     # Setup a encoded mols data iterable for a worker process...
  343     MolIndex = MolNum - 1
  344     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs)
  345 
  346     # Setup process pool along with data initialization for each process...
  347     if not OptionsInfo["QuietMode"]:
  348         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  349         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  350     
  351     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
  352     
  353     # Start processing...
  354     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  355         Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  356     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  357         Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  358     else:
  359         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  360     
  361     CalcEnergyMap = {}
  362     CalcSolvationEnergyMap = {}
  363     CalcFailedCount = 0
  364     for Result in Results:
  365         MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
  366 
  367         if EncodedMol is None:
  368             CalcFailedCount += 1
  369             continue
  370         
  371         if not CalcStatus:
  372             CalcFailedCount += 1
  373             continue
  374         
  375         # Retrieve minimized atom positions...
  376         MinimizedMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  377         AtomPositions = RDKitUtil.GetAtomPositions(MinimizedMol, ConfID = ConfID)
  378 
  379         # Update atom positions...
  380         RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID)
  381         
  382         CalcEnergyMap[ConfID] = Energy
  383         CalcSolvationEnergyMap[ConfID] = SolvationEnergy
  384 
  385     if CalcFailedCount:
  386         return (Mol, False, None, None, None)
  387     
  388     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
  389     MinEnergyConfID = SortedConfIDs[0]
  390     MinEnergy = CalcEnergyMap[MinEnergyConfID]
  391     SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
  392     
  393     return (Mol, True, MinEnergyConfID, MinEnergy, SolvationEnergy)
  394     
  395 def InitializeConformerWorkerProcess(*EncodedArgs):
  396     """Initialize data for a conformer worker process."""
  397     
  398     global Options, OptionsInfo
  399 
  400     if not OptionsInfo["QuietMode"]:
  401         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  402     
  403     # Decode Options and OptionInfo...
  404     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  405     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  406     
  407     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  408     # output files for each process...
  409     OptionsInfo["Psi4Initialized"]  = False
  410 
  411 def ConformerWorkerProcess(EncodedMolInfo):
  412     """Process data for a conformer worker process."""
  413 
  414     if not OptionsInfo["Psi4Initialized"]:
  415         InitializePsi4ForWorkerProcess()
  416     
  417     MolIndex, EncodedMol, ConfID = EncodedMolInfo
  418 
  419     MolNum = MolIndex + 1
  420     
  421     CalcStatus = False
  422     Energy = None
  423     SolvationEnergy = None
  424     
  425     if EncodedMol is None:
  426         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  427     
  428     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  429     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  430     
  431     if not OptionsInfo["QuietMode"]:
  432         MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
  433     
  434     Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
  435     if not Status:
  436         return [MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy]
  437     
  438     if ConvergeStatus != 0:
  439         if not OptionsInfo["QuietMode"]:
  440             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  441             MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"]))
  442     
  443     # Perform Psi4 minimization...
  444     CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], Mol, MolNum, ConfID)
  445     if not CalcStatus:
  446         if not OptionsInfo["QuietMode"]:
  447             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  448             return [MolIndex, EncodedMol, False, ConfID, Energy, SolvationEnergy]
  449 
  450     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy]
  451 
  452 def InitializePsi4ForWorkerProcess():
  453     """Initialize Psi4 for a worker process."""
  454     
  455     if OptionsInfo["Psi4Initialized"]:
  456         return
  457 
  458     OptionsInfo["Psi4Initialized"] = True
  459 
  460     if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I):
  461         # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
  462         OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
  463     else:
  464         # Update output file...
  465         OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid())
  466             
  467     # Intialize Psi4...
  468     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True)
  469     
  470     if OptionsInfo["Psi4DDXSolvationMode"]:
  471         Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"])
  472     
  473     # Setup max iterations global variable...
  474     Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  475     
  476     # Setup conversion factor for energy units...
  477     SetupEnergyConversionFactor(OptionsInfo["psi4"])
  478 
  479 def MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolNum):
  480     """Minimize molecule or conformers."""
  481 
  482     ConfID = None
  483     if OptionsInfo["SkipConformerGeneration"]:
  484         Mol, CalcStatus, Energy, SolvationEnergy = MinimizeMolecule(Psi4Handle, Mol, MolNum)
  485     else:
  486         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum)
  487 
  488     return (Mol, CalcStatus, ConfID, Energy, SolvationEnergy)
  489 
  490 def GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum = None):
  491     """Generate and minimize conformers for a molecule to get the lowest energy conformer."""
  492 
  493     # Add hydrogens..
  494     Mol = Chem.AddHs(Mol)
  495     
  496     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  497 
  498     # Setup conformers...
  499     ConfIDs = EmbedMolecule(Mol, MolNum)
  500     if not len(ConfIDs):
  501         if not OptionsInfo["QuietMode"]:
  502             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
  503         return (Mol, False, None, None, None)
  504     
  505     # Minimize conformers...
  506     CalcEnergyMap = {}
  507     CalcSolvationEnergyMap = {}
  508     for ConfID in ConfIDs:
  509         if not OptionsInfo["QuietMode"]:
  510             MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
  511             
  512         # Perform forcefield minimization...
  513         Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
  514         if not Status:
  515             return (Mol, False, None, None, None)
  516         
  517         if ConvergeStatus != 0:
  518             if not OptionsInfo["QuietMode"]:
  519                 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"]))
  520         
  521         # Perform Psi4 minimization...
  522         CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID)
  523         if not CalcStatus:
  524             if not OptionsInfo["QuietMode"]:
  525                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  526             return (Mol, False, None, None, None)
  527 
  528         CalcEnergyMap[ConfID] = Energy
  529         CalcSolvationEnergyMap[ConfID] = SolvationEnergy
  530 
  531     # Select the lowest energy conformer..
  532     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
  533     MinEnergyConfID = SortedConfIDs[0]
  534     Energy = CalcEnergyMap[MinEnergyConfID]
  535     SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
  536 
  537     return (Mol, True, MinEnergyConfID, Energy, SolvationEnergy)
  538 
  539 def MinimizeMolecule(Psi4Handle, Mol, MolNum):
  540     """Minimize molecule."""
  541 
  542     # Skip conformation generation and forcefield minimization...
  543     CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum)
  544     
  545     return (Mol, CalcStatus, Energy, SolvationEnergy)
  546     
  547 def MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID = -1):
  548     """Minimize molecule using Psi4."""
  549 
  550     # Setup a Psi4Mol...
  551     Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
  552     if Psi4Mol is None:
  553         return (False, None, None)
  554         
  555     #  Setup reference wave function...
  556     Reference = SetupReferenceWavefunction(Mol)
  557     Psi4Handle.set_options({'Reference': Reference})
  558     
  559     # Setup method name and basis set...
  560     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  561 
  562     # Optimize geometry...
  563     Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"])
  564     
  565     if not Status:
  566         PerformPsi4Cleanup(Psi4Handle)
  567         return (False, None, None)
  568 
  569     SolvationEnergy = None
  570     if OptionsInfo["Psi4DDXSolvationMode"]:
  571         SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction)
  572         
  573     # Update atom positions...
  574     AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True)
  575     RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID)
  576 
  577     # Convert energy units...
  578     if OptionsInfo["ApplyEnergyConversionFactor"]:
  579         Energy = Energy * OptionsInfo["EnergyConversionFactor"]
  580         if SolvationEnergy is not None:
  581             SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"]
  582     
  583     # Clean up
  584     PerformPsi4Cleanup(Psi4Handle)
  585     
  586     return (True, Energy, SolvationEnergy)
  587 
  588 def RetrievePsi4DDXSolvationEnergy(WaveFunction):
  589     """Retrieve DDX solvation energy."""
  590     
  591     SolvationEnergy = None
  592     try:
  593         SolvationEnergy = WaveFunction.variable('DD Solvation Energy')
  594     except Exception as ErrMsg:
  595         SolvationEnergy = None
  596         if not OptionsInfo["QuietMode"]:
  597             MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg)
  598 
  599     return SolvationEnergy
  600 
  601 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID = -1):
  602     """Minimize molecule using forcefield available in RDKit."""
  603     
  604     try:
  605         if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
  606             ConvergeStatus = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"])
  607         elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]:
  608             ConvergeStatus = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"], mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"])
  609         else:
  610             MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"])
  611     except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
  612         if not OptionsInfo["QuietMode"]:
  613             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  614             MiscUtil.PrintWarning("Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg))
  615         return (False, None)
  616     
  617     return (True, ConvergeStatus)
  618 
  619 def EmbedMolecule(Mol, MolNum = None):
  620     """Embed conformations."""
  621 
  622     ConfIDs = []
  623     
  624     MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
  625     RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"]
  626     EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
  627     UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
  628     ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
  629     UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
  630     EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
  631 
  632     try:
  633         ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, pruneRmsThresh = EmbedRMSDCutoff, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion)
  634     except ValueError as ErrMsg:
  635         if not OptionsInfo["QuietMode"]:
  636             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  637             MiscUtil.PrintWarning("Embedding failed  for molecule %s:\n%s\n" % (MolName, ErrMsg))
  638         ConfIDs = []
  639 
  640     if not OptionsInfo["QuietMode"]:
  641         if EmbedRMSDCutoff > 0:
  642             MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, MaxConfs, len(ConfIDs)))
  643         else:
  644             MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(ConfIDs)))
  645     
  646     return ConfIDs
  647 
  648 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1):
  649     """Setup a Psi4 molecule object."""
  650 
  651     if OptionsInfo["RecenterAndReorient"]:
  652         MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = False, NoReorient = False)
  653     else:
  654         MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True)
  655     
  656     try:
  657         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  658     except Exception as ErrMsg:
  659         Psi4Mol = None
  660         if not OptionsInfo["QuietMode"]:
  661             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  662             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  663             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  664     
  665     if OptionsInfo["Symmetrize"]:
  666         Psi4Mol.symmetrize(OptionsInfo["SymmetrizeTolerance"])
  667     
  668     return Psi4Mol
  669 
  670 def PerformPsi4Cleanup(Psi4Handle):
  671     """Perform clean up."""
  672 
  673     # Clean up after Psi4 run...
  674     Psi4Handle.core.clean()
  675     
  676     # Clean up any leftover scratch files...
  677     if OptionsInfo["MPMode"]:
  678         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  679 
  680 def CheckAndValidateMolecule(Mol, MolCount = None):
  681     """Validate molecule for Psi4 calculations."""
  682     
  683     if Mol is None:
  684         if not OptionsInfo["QuietMode"]:
  685             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
  686         return False
  687     
  688     MolName = RDKitUtil.GetMolName(Mol, MolCount)
  689     if not OptionsInfo["QuietMode"]:
  690         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
  691     
  692     if RDKitUtil.IsMolEmpty(Mol):
  693         if not OptionsInfo["QuietMode"]:
  694             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
  695         return False
  696     
  697     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
  698         if not OptionsInfo["QuietMode"]:
  699             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
  700         return False
  701     
  702     # Check for 3D flag...
  703     if OptionsInfo["SkipConformerGeneration"]:
  704         if not Mol.GetConformer().Is3D():
  705             if not OptionsInfo["QuietMode"]:
  706                 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
  707     
  708     # Check for missing hydrogens...
  709     if OptionsInfo["SkipConformerGeneration"]:
  710         # Otherwise, Hydrogens are always added...
  711         if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
  712             if not OptionsInfo["QuietMode"]:
  713                 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
  714     
  715     return True
  716 
  717 def SetupMethodNameAndBasisSet(Mol):
  718     """Setup method name and basis set."""
  719     
  720     MethodName = OptionsInfo["MethodName"]
  721     if OptionsInfo["MethodNameAuto"]:
  722         MethodName = "B3LYP"
  723     
  724     BasisSet = OptionsInfo["BasisSet"]
  725     if OptionsInfo["BasisSetAuto"]:
  726         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
  727     
  728     return (MethodName, BasisSet)
  729 
  730 def SetupReferenceWavefunction(Mol):
  731     """Setup reference wavefunction."""
  732     
  733     Reference = OptionsInfo["Reference"]
  734     if OptionsInfo["ReferenceAuto"]:
  735         Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF'
  736     
  737     return Reference
  738 
  739 def SetupEnergyConversionFactor(Psi4Handle):
  740     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
  741     
  742     EnergyUnits = OptionsInfo["EnergyUnits"]
  743     
  744     ApplyConversionFactor = True
  745     if re.match("^kcal\/mol$", EnergyUnits, re.I):
  746         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
  747     elif re.match("^kJ\/mol$", EnergyUnits, re.I):
  748         ConversionFactor = Psi4Handle.constants.hartree2kJmol
  749     elif re.match("^eV$", EnergyUnits, re.I):
  750         ConversionFactor = Psi4Handle.constants.hartree2ev
  751     else:
  752         ApplyConversionFactor = False
  753         ConversionFactor = 1.0
  754     
  755     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
  756     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
  757 
  758 def WriteMolecule(Writer, Mol, MolNum = None, ConfID = None, Energy = None, SolvationEnergy = None):
  759     """Write molecule."""
  760 
  761     if OptionsInfo["EnergyOut"]  and Energy is not None:
  762         Energy = "%.*f" % (OptionsInfo["Precision"], Energy)
  763         Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
  764         
  765         if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None:
  766             SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy)
  767             Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy)
  768 
  769     try:
  770         if ConfID is None:
  771             Writer.write(Mol)
  772         else:
  773             Writer.write(Mol, confId = ConfID)
  774     except (ValueError, RuntimeError) as ErrMsg:
  775         if not OptionsInfo["QuietMode"]:
  776             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  777             MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
  778         return False
  779 
  780     return True
  781 
  782 def CheckAndSetupDDX():
  783     """Load pyddx and return its handle."""
  784     
  785     try:
  786         import pyddx
  787     except ImportError as ErrMsg:
  788         sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg)
  789         sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n")
  790         sys.stderr.write("You may need to install it from pypi or conda-forge.\n")
  791         sys.stderr.write("Check/update your environment and try again.\n\n")
  792         sys.exit(1)
  793 
  794     return pyddx
  795 
  796 def ProcessDDXListSolventsOption():
  797     """List solvent information provided by tge DDX module for the calculation
  798     of solvation energy."""
  799 
  800     PyDDXHandle = CheckAndSetupDDX()
  801     
  802     MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n")
  803     MiscUtil.PrintInfo("Solvent\tEpsilon")
  804     Count = 0
  805     for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()):
  806         Count += 1
  807         Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent]
  808         MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon))
  809     
  810     MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count)
  811 
  812 def ProcessOptions():
  813     """Process and validate command line arguments and options."""
  814     
  815     MiscUtil.PrintInfo("Processing options...")
  816     
  817     # Validate options...
  818     ValidateOptions()
  819     
  820     OptionsInfo["Infile"] = Options["--infile"]
  821     OptionsInfo["SMILESInfileStatus"] = True if  MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
  822     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
  823     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
  824     
  825     OptionsInfo["Outfile"] = Options["--outfile"]
  826     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
  827     
  828     OptionsInfo["Overwrite"] = Options["--overwrite"]
  829     
  830     # Method, basis set, and reference wavefunction...
  831     OptionsInfo["BasisSet"] = Options["--basisSet"]
  832     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
  833     
  834     OptionsInfo["MethodName"] = Options["--methodName"]
  835     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
  836     
  837     OptionsInfo["Reference"] = Options["--reference"]
  838     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
  839     
  840     # Run and options parameters...
  841     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"])
  842     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"])
  843     
  844     # DDX solvation parameters...
  845     OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False
  846     Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters("--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"])
  847     Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams)
  848     OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams
  849     OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions
  850     
  851     if OptionsInfo["Psi4DDXSolvationMode"]:
  852         CheckAndSetupDDX()
  853     
  854     # Conformer generation paramaters...
  855     OptionsInfo["ConfGeneration"] = Options["--confGeneration"]
  856     OptionsInfo["SkipConformerGeneration"] = False if re.match("^yes$", Options["--confGeneration"], re.I) else True
  857 
  858     if OptionsInfo["SkipConformerGeneration"]:
  859         if OptionsInfo["SMILESInfileStatus"]:
  860             MiscUtil.PrintError("The value, %s, specified for option \"-c --confGeneration\" is not allowed during, %s, value for option \"-i, --infile\" . The input file must be a 3D SD or MOL file. " % (Options["--confGeneration"], Options["--infile"]))
  861     
  862     ParamsDefaultInfoOverride = {"MaxConfs": 50, "MaxIters": 250}
  863     OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride)
  864     
  865     # Write energy parameters...
  866     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
  867     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
  868     
  869     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
  870     if re.match("^auto$", EnergyDataFieldLabel, re.I):
  871         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
  872     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
  873     
  874     SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"]
  875     OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel
  876     
  877     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
  878 
  879     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
  880     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
  881 
  882     # Multiprocessing level...
  883     MPLevelMoleculesMode = False
  884     MPLevelConformersMode = False
  885     MPLevel = Options["--mpLevel"]
  886     if re.match("^Molecules$", MPLevel, re.I):
  887         MPLevelMoleculesMode = True
  888     elif re.match("^Conformers$", MPLevel, re.I):
  889         if  OptionsInfo["SkipConformerGeneration"]:
  890             MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not allowed during, %s, value of option \"--confGeneration\"  . " % (MPLevel, Options["--confGeneration"]))
  891         MPLevelConformersMode = True
  892     else:
  893         MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel)
  894     OptionsInfo["MPLevel"] = MPLevel
  895     OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
  896     OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
  897     
  898     OptionsInfo["Precision"] = int(Options["--precision"])
  899     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
  900 
  901     RecenterAndReorient = Options["--recenterAndReorient"]
  902     if re.match("^auto$", RecenterAndReorient, re.I):
  903         RecenterAndReorient  = False if  OptionsInfo["SkipConformerGeneration"] else True
  904     else:
  905         RecenterAndReorient  = True if re.match("^yes$", RecenterAndReorient, re.I) else False
  906     OptionsInfo["RecenterAndReorient"] = RecenterAndReorient
  907     
  908     Symmetrize = Options["--symmetrize"]
  909     if re.match("^auto$", Symmetrize, re.I):
  910         Symmetrize  = True if  RecenterAndReorient else False
  911     else:
  912         Symmetrize  = True if re.match("^yes$", Symmetrize, re.I) else False
  913     OptionsInfo["Symmetrize"] = Symmetrize
  914     
  915     OptionsInfo["SymmetrizeTolerance"] = float(Options["--symmetrizeTolerance"])
  916 
  917 def RetrieveOptions():
  918     """Retrieve command line arguments and options."""
  919     
  920     # Get options...
  921     global Options
  922     Options = docopt(_docoptUsage_)
  923     
  924     # Set current working directory to the specified directory...
  925     WorkingDir = Options["--workingdir"]
  926     if WorkingDir:
  927         os.chdir(WorkingDir)
  928     
  929     # Handle examples option...
  930     if "--examples" in Options and Options["--examples"]:
  931         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
  932         sys.exit(0)
  933     
  934     # Handle listing of solvent information...
  935     if  Options and Options["--psi4DDXListSolvents"]:
  936         ProcessDDXListSolventsOption()
  937         sys.exit(0)
  938 
  939 def ValidateOptions():
  940     """Validate option values."""
  941     
  942     MiscUtil.ValidateOptionTextValue("-c, --confGeneration", Options["--confGeneration"], "yes no")
  943     
  944     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
  945     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
  946     
  947     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
  948     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
  949     
  950     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
  951     
  952     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
  953     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
  954     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
  955     
  956     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
  957     MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
  958     
  959     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
  960     
  961     MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no")
  962     
  963     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
  964     
  965     MiscUtil.ValidateOptionTextValue("--recenterAndReorient", Options["--recenterAndReorient"], "yes no auto")
  966     MiscUtil.ValidateOptionTextValue("--symmetrize", Options["--symmetrize"], "yes no auto")
  967     MiscUtil.ValidateOptionFloatValue("--symmetrizeTolerance", Options["--symmetrizeTolerance"], {">": 0})
  968 
  969 # Setup a usage string for docopt...
  970 _docoptUsage_ = """
  971 Psi4PerformMinimization.py - Perform structure minimization
  972 
  973 Usage:
  974     Psi4PerformMinimization.py [--basisSet <text>] [--confGeneration <yes or no> ] [--confParams <Name,Value,...>]
  975                                [--energyOut <yes or no>] [--energyDataFieldLabel <text>] [--energyUnits <text>]
  976                                [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>]
  977                                [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
  978                                [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
  979                                [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
  980                                [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>]
  981                                [--quiet <yes or no>] [--reference <text>] [--recenterAndReorient <yes or no>]
  982                                [--symmetrize <yes or no>] [--symmetrizeTolerance <number>] [-w <dir>] -i <infile> -o <outfile> 
  983     Psi4PerformMinimization.py --psi4DDXListSolvents
  984     Psi4PerformMinimization.py -h | --help | -e | --examples
  985 
  986 Description:
  987     Generate 3D structures for molecules using a combination of distance geometry
  988     and forcefield minimization followed by geometry optimization using a quantum
  989     chemistry method. A set of initial 3D structures are generated for a molecule 
  990     employing distance geometry. The 3D structures in the conformation ensemble
  991     are sequentially minimized using forcefield and a quantum chemistry method.
  992     The conformation with lowest energy is selected to represent the final structure.
  993     An option is available to skip the generation of the conformation ensemble along
  994     with forcefield minimization and simply perform minimization on the initial 3D
  995     structure using a quantum chemistry method.
  996 
  997     A Psi4 XYZ format geometry string is automatically generated for each molecule
  998     in input file. It contains atom symbols and 3D coordinates for each atom in a
  999     molecule. In addition, the formal charge and spin multiplicity are present in the
 1000     the geometry string. These values are either retrieved from molecule properties
 1001     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 1002     molecule.
 1003 
 1004     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 1005     .csv, .tsv, .txt)
 1006 
 1007     The supported output file formats are: SD (.sdf, .sd)
 1008 
 1009 Options:
 1010     -b, --basisSet <text>  [default: auto]
 1011         Basis set to use for energy minimization. Default: 6-31+G** for sulfur
 1012         containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 1013         value must be a valid Psi4 basis set. No validation is performed.
 1014         
 1015         The following list shows a representative sample of basis sets available
 1016         in Psi4:
 1017             
 1018             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 1019             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 1020             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 1021             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 1022             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 1023             
 1024     -c, --confGeneration <yes or no>  [default: yes]
 1025         Generate an initial 3D conformation ensemble using distance geometry and
 1026         forcefield minimization before final geometry optimization by a specified
 1027         method name and basis set. Possible values: yes or no.
 1028         
 1029         The 'No' value skips the generation of conformations employing distance
 1030         geometry and forcefield minimization. The 3D structures in input file are
 1031         minimized by a quantum method. It is not allowed for SMILES input file.
 1032     --confParams <Name,Value,...>  [default: auto]
 1033         A comma delimited list of parameter name and value pairs for generating
 1034         initial sets of 3D conformations for molecules. The 3D conformation ensemble
 1035         is generated using distance geometry and forcefield functionality available
 1036         in RDKit. The 3D structures in the conformation ensemble are subsequently
 1037         minimized by a quantum chemistry method available in Psi4.
 1038        
 1039         The supported parameter names along with their default values are shown
 1040         below:
 1041             
 1042             confMethod,ETKDGv2,
 1043             forceField,MMFF, forceFieldMMFFVariant,MMFF94,
 1044             enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
 1045             maxIters,250,randomSeed,auto
 1046             
 1047             confMethod,ETKDGv2   [ Possible values: SDG, KDG, ETDG,
 1048                 ETKDG , or ETKDGv2]
 1049             forceField, MMFF   [ Possible values: UFF or MMFF ]
 1050             forceFieldMMFFVariant,MMFF94   [ Possible values: MMFF94 or MMFF94s ]
 1051             enforceChirality,yes   [ Possible values: yes or no ]
 1052             embedRMSDCutoff,0.5   [ Possible values: number or None]
 1053             
 1054         confMethod: Conformation generation methodology for generating initial 3D
 1055         coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
 1056         Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
 1057         with Distance Geometry (KDG) and Experimental Torsion-angle preference
 1058         along with basic Knowledge-terms and Distance Geometry (ETKDG or
 1059         ETKDGv2) [Ref 129, 167] .
 1060         
 1061         forceField: Forcefield method to use for energy minimization. Possible
 1062         values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics
 1063         Force Field [ Ref 83-87 ] .
 1064         
 1065         enforceChirality: Enforce chirality for defined chiral centers during
 1066         forcefield minimization.
 1067         
 1068         maxConfs: Maximum number of conformations to generate for each molecule
 1069         during the generation of an initial 3D conformation ensemble using 
 1070         conformation generation methodology. The conformations are minimized
 1071         using the specified forcefield and a quantum chemistry method. The lowest
 1072         energy conformation is written to the output file.
 1073         
 1074         embedRMSDCutoff: RMSD cutoff for retaining initial set conformers embedded
 1075         using distance geometry and before forcefield minimization. All embedded
 1076         conformers are kept for 'None' value. Otherwise, only those conformers which
 1077         are different from each other by the specified RMSD cutoff, 0.5 by default,
 1078         are kept. The first embedded conformer is always retained.
 1079         
 1080         maxIters: Maximum number of iterations to perform for each conformation
 1081         during forcefield minimization.
 1082         
 1083         randomSeed: Seed for the random number generator for reproducing initial
 1084         3D coordinates in a conformation ensemble. Default is to use a random seed.
 1085     --energyOut <yes or no>  [default: yes]
 1086         Write out energy values.
 1087     --energyDataFieldLabel <text>  [default: auto]
 1088         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 1089     --energyUnits <text>  [default: kcal/mol]
 1090         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 1091     -e, --examples
 1092         Print examples.
 1093     -h, --help
 1094         Print this help message.
 1095     -i, --infile <infile>
 1096         Input file name.
 1097     --infileParams <Name,Value,...>  [default: auto]
 1098         A comma delimited list of parameter name and value pairs for reading
 1099         molecules from files. The supported parameter names for different file
 1100         formats, along with their default values, are shown below:
 1101             
 1102             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1103             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1104                 smilesTitleLine,auto,sanitize,yes
 1105             
 1106         Possible values for smilesDelimiter: space, comma or tab.
 1107     --maxIters <number>  [default: 50]
 1108         Maximum number of iterations to perform for each molecule or conformer
 1109         during energy minimization by a quantum chemistry method.
 1110     -m, --methodName <text>  [default: auto]
 1111         Method to use for energy minimization. Default: B3LYP [ Ref 150 ]. The
 1112         specified value must be a valid Psi4 method name. No validation is
 1113         performed.
 1114         
 1115         The following list shows a representative sample of methods available
 1116         in Psi4:
 1117             
 1118             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1119             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1120             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1121             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1122             
 1123     --mp <yes or no>  [default: no]
 1124         Use multiprocessing.
 1125          
 1126         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1127         function employing lazy RDKit data iterable. This allows processing of
 1128         arbitrary large data sets without any additional requirements memory.
 1129         
 1130         All input data may be optionally loaded into memory by mp.Pool.map()
 1131         before starting worker processes in a process pool by setting the value
 1132         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1133         
 1134         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1135         data mode may adversely impact the performance. The '--mpParams' section
 1136         provides additional information to tune the value of 'chunkSize'.
 1137     --mpLevel <Molecules or Conformers>  [default: Molecules]
 1138         Perform multiprocessing at molecules or conformers level. Possible values:
 1139         Molecules or Conformers. The 'Molecules' value starts a process pool at the
 1140         molecules level. All conformers of a molecule are processed in a single
 1141         process. The 'Conformers' value, however, starts a process pool at the 
 1142         conformers level. Each conformer of a molecule is processed in an individual
 1143         process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
 1144         using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
 1145         a large number of Psi4 output files.
 1146     --mpParams <Name,Value,...>  [default: auto]
 1147         A comma delimited list of parameter name and value pairs to configure
 1148         multiprocessing.
 1149         
 1150         The supported parameter names along with their default and possible
 1151         values are shown below:
 1152         
 1153             chunkSize, auto
 1154             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1155             numProcesses, auto   [ Default: mp.cpu_count() ]
 1156         
 1157         These parameters are used by the following functions to configure and
 1158         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1159         mp.Pool.imap().
 1160         
 1161         The chunkSize determines chunks of input data passed to each worker
 1162         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1163         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1164         
 1165         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1166         automatically converts RDKit data iterable into a list, loads all data into
 1167         memory, and calculates the default chunkSize using the following method
 1168         as shown in its code:
 1169         
 1170             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1171             if extra: chunkSize += 1
 1172         
 1173         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1174         and 100 data items.
 1175         
 1176         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1177         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1178         data into memory. Consequently, the size of input data is not known a priori.
 1179         It's not possible to estimate an optimal value for the chunkSize. The default 
 1180         chunkSize is set to 1.
 1181         
 1182         The default value for the chunkSize during 'Lazy' data mode may adversely
 1183         impact the performance due to the overhead associated with exchanging
 1184         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1185         a larger value during 'Lazy' input data mode, based on the size of your input
 1186         data and number of processes in the process pool.
 1187         
 1188         The mp.Pool.map() function waits for all worker processes to process all
 1189         the data and return the results. The mp.Pool.imap() function, however,
 1190         returns the the results obtained from worker processes as soon as the
 1191         results become available for specified chunks of data.
 1192         
 1193         The order of data in the results returned by both mp.Pool.map() and 
 1194         mp.Pool.imap() functions always corresponds to the input data.
 1195     -o, --outfile <outfile>
 1196         Output file name.
 1197     --outfileParams <Name,Value,...>  [default: auto]
 1198         A comma delimited list of parameter name and value pairs for writing
 1199         molecules to files. The supported parameter names for different file
 1200         formats, along with their default values, are shown below:
 1201             
 1202             SD: kekulize,yes,forceV3000,no
 1203             
 1204     --overwrite
 1205         Overwrite existing files.
 1206     --precision <number>  [default: 6]
 1207         Floating point precision for writing energy values.
 1208     --psi4OptionsParams <Name,Value,...>  [default: none]
 1209         A comma delimited list of Psi4 option name and value pairs for setting
 1210         global and module options. The names are 'option_name' for global options
 1211         and 'module_name__option_name' for options local to a module. The
 1212         specified option names must be valid Psi4 names. No validation is
 1213         performed.
 1214         
 1215         The specified option name and  value pairs are processed and passed to
 1216         psi4.set_options() as a dictionary. The supported value types are float,
 1217         integer, boolean, or string. The float value string is converted into a float.
 1218         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1219     --psi4RunParams <Name,Value,...>  [default: auto]
 1220         A comma delimited list of parameter name and value pairs for configuring
 1221         Psi4 jobs.
 1222         
 1223         The supported parameter names along with their default and possible
 1224         values are shown below:
 1225              
 1226             MemoryInGB, 1
 1227             NumThreads, 1
 1228             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1229             ScratchDir, auto   [ Possivle values: DirName]
 1230             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1231             
 1232         These parameters control the runtime behavior of Psi4.
 1233         
 1234         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1235         is appended to output file name during multiprocessing as shown:
 1236         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1237         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1238         all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 
 1239         'Conformers' of '--mpLevel' option.
 1240         
 1241         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1242         of any existing Psi4 before creating new files to append output from
 1243         multiple Psi4 runs.
 1244         
 1245         The option 'ScratchDir' is a directory path to the location of scratch
 1246         files. The default value corresponds to Psi4 default. It may be used to
 1247         override the deafult path.
 1248     --psi4DDXSolvation <yes or no>  [default: no]
 1249         Perform minimization in solution using domain-decomposition-based
 1250         continuum solvation models [ Ref 160-161 ]. The script relies on Psi4
 1251         interface to the DDX module to perform these calculations. The DDX library
 1252         provides a linear-scaling implementation of standard continuum solvation
 1253         models using a domain- decomposition ansatz. Two solvation models are
 1254         supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and
 1255         Polarizable Continuum Model (PCM) [ Ref 164-165 ].
 1256         
 1257         The solvation energy is included in the value of the total energy written to 
 1258         the output SD file. In addition, the value of solvation energy is written to the
 1259         output file under its own data field label.
 1260         
 1261         Psi4 relies on Python module PYDDX to calculate solvation energy. It must be
 1262         present in your environment.
 1263     --psi4DDXSolvationParams <Name,Value,...>  [default: auto]
 1264         A space delimited list of parameter name and value  pairs for calculating
 1265         solvation energy using the DDX module. The supported parameter names,
 1266         along with their default values, are shown below:
 1267             
 1268             solvationModel PCM   [ Possible  values: COSMO or PCM]
 1269             solvent water   [ Possible  values: A valid solvent name]
 1270             solventEpsilon None
 1271             radiiSet UFF   [ Possible  values: Bondi or UFF]
 1272             radiiScaling auto   [ Default: 1.2 for Bondi; 1.1 for UFF]
 1273             
 1274             solvationModel: Solvation model for calculating solvation energy.
 1275             The corresponding Psi4 option is DDX_MODEL.
 1276             
 1277             solvent: Solvent to use. The corresponding Ps4 option is
 1278             DDX_SOLVENT
 1279             
 1280             solventEpsilon: Dielectric constant of the solvent. The
 1281             corresponding Psi4 option is DDX_SOLVENT_EPSILON.
 1282             
 1283             radiiSet: Radius set for cavity spheres. The corresponding Psi4
 1284             option is DDX_RADII_SET.
 1285             
 1286             radiiScaling: Scaling factor for cavity spheres. The default value
 1287             depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding
 1288             Psi4 option is DDX_RADII_SCALING.
 1289             
 1290         These parameter names are automatically mapped to appropriate DDX keywords.
 1291         
 1292         You may specify the solvent either by directly providing a dielectric constant using
 1293         'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter.
 1294         The solvent name must be a valid named supported by the DDX module. For example:
 1295         water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent'
 1296         parameter is ignored for non-zero value of 'solvent' option.
 1297         
 1298         The DDX module contains a variety of addtional options to confgure the calculation
 1299         of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may
 1300         use '--psi4OptionsParams' to modify values for additional DDX options.
 1301     --psi4DDXListSolvents
 1302         List solvent names, along with dielectric values, supported by the DDX module
 1303         for the calculation of solvent energy without performing any calculation.
 1304     -q, --quiet <yes or no>  [default: no]
 1305         Use quiet mode. The warning and information messages will not be printed.
 1306     -r, --reference <text>  [default: auto]
 1307         Reference wave function to use for energy calculation. Default: RHF or UHF.
 1308         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 1309         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 1310         molecules with unpaired electrons.
 1311         
 1312         The specified value must be a valid Psi4 reference wave function. No validation
 1313         is performed. For example: ROHF, CUHF, RKS, etc.
 1314         
 1315         The spin multiplicity determines the default value of reference wave function
 1316         for input molecules. It is calculated from number of free radical electrons using
 1317         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1318         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1319         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1320         over the calculated value of spin multiplicity.
 1321     --recenterAndReorient <yes or no>  [default: auto]
 1322         Recenter and reorient a molecule during creation of a Psi4 molecule from
 1323         a geometry string. Default: 'No' during 'No' value of '--confGeneration';
 1324         Otherwise, 'Yes'.
 1325         
 1326         The 'No' values allows the minimization of a molecule in its initial 3D
 1327         coordinate space in input file or conformer ensemble.
 1328     --symmetrize <yes or no>  [default: auto]
 1329         Symmetrize molecules before energy minimization. Default: 'Yes' during
 1330         'Yes' value of '--recenterAndReorient'; Otherwise, 'No'. The psi4 function,
 1331         psi4mol.symmetrize( SymmetrizeTolerance), is called to symmetrize
 1332         the molecule before calling psi4.optimize().
 1333         
 1334         The 'No' value of '--symmetrize' during 'Yes' value of '--recenterAndReorient'
 1335         may cause psi4.optimize() to fail with a 'Point group changed...' error
 1336         message.
 1337     --symmetrizeTolerance <number>  [default: 0.01]
 1338         Symmetry tolerance for '--symmetrize'.
 1339     -w, --workingdir <dir>
 1340         Location of working directory which defaults to the current directory.
 1341 
 1342 Examples:
 1343     To generate an initial conformer ensemble of up to 50 conformations using a
 1344     combination of ETKDGv2 distance geometry methodology, applying RMSD cutoff
 1345     of 0.5 and MMFF forcefield minimization, followed by energy minimization
 1346     using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and sulfur containing
 1347     molecules in a SMILES file and  write out a SD file containing minimum energy
 1348     structure corresponding to each molecule, type:
 1349 
 1350         % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1351 
 1352     To run the first example in a quiet mode and write out a SD file, type:
 1353 
 1354         % Psi4PerformMinimization.py -q yes -i Psi4Sample.smi -o
 1355           Psi4SampleOut.sdf --psi4DDXSolvation yes
 1356 
 1357     To run the first example along with performing energy mimimization in solution
 1358     using default DDX solvation parameters for water abd write oyt a SD file, type:
 1359 
 1360         % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1361 
 1362     To run the first example in multiprocessing mode at molecules level on all
 1363     available CPUs without loading all data into memory and write out a SD file,
 1364     type:
 1365 
 1366         % Psi4PerformMinimization.py --mp yes -i Psi4Sample.smi -o
 1367           Psi4SampleOut.sdf
 1368 
 1369     To run the first example in multiprocessing mode at conformers level on all
 1370     available CPUs without loading all data into memory and write out a SD file,
 1371     type:
 1372 
 1373         % Psi4PerformMinimization.py --mp yes --mpLevel Conformers
 1374           -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1375 
 1376     To run the first example in multiprocessing mode at molecules level on all
 1377     available CPUs by loading all data into memory and write out a SD file, type:
 1378 
 1379         % Psi4PerformMinimization.py  --mp yes --mpParams "inputDataMode,
 1380           InMemory" -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1381 
 1382     To run the first example in multiprocessing mode at molecules level on specific
 1383     number of CPUs and chunk size without loading all data into memory and write
 1384     out a SD file, type:
 1385 
 1386         % Psi4PerformMinimization.py  --mp yes --mpParams "inputDataMode,Lazy,
 1387           numProcesses,4,chunkSize,8" -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1388 
 1389     To generate an initial conformer ensemble of up to 20 conformations using a
 1390     combination of ETKDGv2 distance geometry methodology and MMFF94s forcefield
 1391     minimization followed by energy minimization for a maxium of 20 iterations
 1392     using B3LYP/6-31+G** molecules in a SMILES file and  write out a SD file
 1393     containing minimum energy structure along with energy in specific units,
 1394     type:
 1395 
 1396         % Psi4PerformMinimization.py --confGeneration yes --confParams
 1397           "confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
 1398           maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
 1399           -b "6-31+G**" --maxIters 20 -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1400 
 1401     To minimize molecules in a 3D files using B3LYP/6-31G** and B3LYP/6-31+G**
 1402     for non-sulfur and sulfur containing molecules for a maximum of 25 iterations
 1403     without generating any conformations and write out a SD file containing 
 1404     minimum energy structures corresponding to each molecule, type:
 1405 
 1406         % Psi4PerformMinimization.py --confGeneration no --maxIters 25
 1407           -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1408 
 1409     To run the first example for molecules in a CSV SMILES file, SMILES strings
 1410     in column 1, name column 2, and write out a SD file, type:
 1411 
 1412         % Psi4PerformMinimization.py --infileParams "smilesDelimiter,comma,
 1413           smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
 1414           -i Psi4Sample.csv -o Psi4SampleOut.sdf
 1415 
 1416 Author:
 1417 
 1418     Manish Sud(msud@san.rr.com)
 1419 
 1420 See also:
 1421     Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py
 1422 
 1423 Copyright:
 1424     Copyright (C) 2024 Manish Sud. All rights reserved.
 1425 
 1426     The functionality available in this script is implemented using Psi4, an
 1427     open source quantum chemistry software package, and RDKit, an open
 1428     source toolkit for cheminformatics developed by Greg Landrum.
 1429 
 1430     This file is part of MayaChemTools.
 1431 
 1432     MayaChemTools is free software; you can redistribute it and/or modify it under
 1433     the terms of the GNU Lesser General Public License as published by the Free
 1434     Software Foundation; either version 3 of the License, or (at your option) any
 1435     later version.
 1436 
 1437 """
 1438 
 1439 if __name__ == "__main__":
 1440     main()