MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4PerformMinimization.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Contributor: Francois Berenger
    7 #
    8 # Copyright (C) 2025 Manish Sud. All rights reserved.
    9 #
   10 # The functionality available in this script is implemented using Psi4, an
   11 # open source quantum chemistry software package, and RDKit, an open
   12 # source toolkit for cheminformatics developed by Greg Landrum.
   13 #
   14 # This file is part of MayaChemTools.
   15 #
   16 # MayaChemTools is free software; you can redistribute it and/or modify it under
   17 # the terms of the GNU Lesser General Public License as published by the Free
   18 # Software Foundation; either version 3 of the License, or (at your option) any
   19 # later version.
   20 #
   21 # MayaChemTools is distributed in the hope that it will be useful, but without
   22 # any warranty; without even the implied warranty of merchantability of fitness
   23 # for a particular purpose.  See the GNU Lesser General Public License for more
   24 # details.
   25 #
   26 # You should have received a copy of the GNU Lesser General Public License
   27 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   28 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   29 # Boston, MA, 02111-1307, USA.
   30 #
   31 
   32 from __future__ import print_function
   33 
   34 import os
   35 import sys
   36 import time
   37 import re
   38 import shutil
   39 import multiprocessing as mp
   40 
   41 # Psi4 imports...
   42 if hasattr(shutil, "which") and shutil.which("psi4") is None:
   43     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   44     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   45     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   46     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   47     sys.stderr.write("Psi4 environment and try again.\n\n")
   48 
   49 # RDKit imports...
   50 try:
   51     from rdkit import rdBase
   52     from rdkit import Chem
   53     from rdkit.Chem import AllChem
   54 except ImportError as ErrMsg:
   55     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   56     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   57     sys.exit(1)
   58 
   59 # MayaChemTools imports...
   60 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   61 try:
   62     from docopt import docopt
   63     import MiscUtil
   64     import Psi4Util
   65     import RDKitUtil
   66 except ImportError as ErrMsg:
   67     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   68     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   69     sys.exit(1)
   70 
   71 ScriptName = os.path.basename(sys.argv[0])
   72 Options = {}
   73 OptionsInfo = {}
   74 
   75 
   76 def main():
   77     """Start execution of the script."""
   78 
   79     MiscUtil.PrintInfo(
   80         "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
   81         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
   82     )
   83 
   84     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   85 
   86     # Retrieve command line arguments and options...
   87     RetrieveOptions()
   88 
   89     # Process and validate command line arguments and options...
   90     ProcessOptions()
   91 
   92     # Perform actions required by the script...
   93     PerformMinimization()
   94 
   95     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   96     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   97 
   98 
   99 def PerformMinimization():
  100     """Perform minimization."""
  101 
  102     # Setup a molecule reader...
  103     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  104     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  105 
  106     # Set up a molecule writer...
  107     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  108     if Writer is None:
  109         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  110     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  111 
  112     MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer)
  113 
  114     if Writer is not None:
  115         Writer.close()
  116 
  117     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  118     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  119     MiscUtil.PrintInfo(
  120         "Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount
  121     )
  122     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  123     MiscUtil.PrintInfo(
  124         "Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount)
  125     )
  126 
  127 
  128 def ProcessMolecules(Mols, Writer):
  129     """Process and minimize molecules."""
  130 
  131     if OptionsInfo["MPMode"]:
  132         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
  133     else:
  134         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
  135 
  136 
  137 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
  138     """Process and minimize molecules using a single process."""
  139 
  140     # Intialize Psi4...
  141     MiscUtil.PrintInfo("\nInitializing Psi4...")
  142     Psi4Handle = Psi4Util.InitializePsi4(
  143         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  144         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  145         PrintVersion=True,
  146         PrintHeader=True,
  147     )
  148     if OptionsInfo["Psi4DDXSolvationMode"]:
  149         Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"])
  150     OptionsInfo["psi4"] = Psi4Handle
  151 
  152     # Setup max iterations global variable...
  153     Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
  154 
  155     # Setup conversion factor for energy units...
  156     SetupEnergyConversionFactor(Psi4Handle)
  157 
  158     if OptionsInfo["SkipConformerGeneration"]:
  159         if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  160             MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers and constrained torsions...")
  161         else:
  162             MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...")
  163     else:
  164         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...")
  165 
  166     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  167     for Mol in Mols:
  168         MolCount += 1
  169 
  170         if not CheckAndValidateMolecule(Mol, MolCount):
  171             continue
  172 
  173         # Setup 2D coordinates for SMILES input file...
  174         if OptionsInfo["SMILESInfileStatus"]:
  175             AllChem.Compute2DCoords(Mol)
  176 
  177         ValidMolCount += 1
  178 
  179         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolCount)
  180 
  181         if not CalcStatus:
  182             if not OptionsInfo["QuietMode"]:
  183                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  184                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
  185 
  186             MinimizationFailedCount += 1
  187             continue
  188 
  189         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  190 
  191         if not WriteStatus:
  192             WriteFailedCount += 1
  193 
  194     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  195 
  196 
  197 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
  198     """Process and minimize molecules using multiprocessing."""
  199 
  200     if OptionsInfo["MPLevelConformersMode"]:
  201         return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer)
  202     elif OptionsInfo["MPLevelMoleculesMode"]:
  203         return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer)
  204     else:
  205         MiscUtil.PrintError('The value, %s,  option "--mpLevel" is not supported.' % (OptionsInfo["MPLevel"]))
  206 
  207 
  208 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer):
  209     """Process and minimize molecules using multiprocessing at molecules level."""
  210 
  211     if OptionsInfo["SkipConformerGeneration"]:
  212         if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  213             MiscUtil.PrintInfo(
  214                 "\nPerforming minimization without generation of conformers and constrained torsions using multiprocessing at molecules level..."
  215             )
  216         else:
  217             MiscUtil.PrintInfo(
  218                 "\nPerforming minimization without generation of conformers using multiprocessing at molecules level..."
  219             )
  220     else:
  221         MiscUtil.PrintInfo(
  222             "\nPerforming minimization with generation of conformers using multiprocessing at molecules level..."
  223         )
  224 
  225     MPParams = OptionsInfo["MPParams"]
  226 
  227     # Setup data for initializing a worker process...
  228     InitializeWorkerProcessArgs = (
  229         MiscUtil.ObjectToBase64EncodedString(Options),
  230         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  231     )
  232 
  233     # Setup a encoded mols data iterable for a worker process...
  234     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  235 
  236     # Setup process pool along with data initialization for each process...
  237     if not OptionsInfo["QuietMode"]:
  238         MiscUtil.PrintInfo(
  239             "\nConfiguring multiprocessing using %s method..."
  240             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  241         )
  242         MiscUtil.PrintInfo(
  243             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  244             % (
  245                 MPParams["NumProcesses"],
  246                 MPParams["InputDataMode"],
  247                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  248             )
  249         )
  250 
  251     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  252 
  253     # Start processing...
  254     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  255         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  256     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  257         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  258     else:
  259         MiscUtil.PrintError(
  260             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  261         )
  262 
  263     # Print out Psi4 version in the main process...
  264     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  265     Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
  266     OptionsInfo["psi4"] = Psi4Handle
  267 
  268     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  269     for Result in Results:
  270         MolCount += 1
  271         MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
  272 
  273         if EncodedMol is None:
  274             continue
  275         ValidMolCount += 1
  276 
  277         if not CalcStatus:
  278             MinimizationFailedCount += 1
  279             continue
  280 
  281         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  282         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  283 
  284         if not WriteStatus:
  285             WriteFailedCount += 1
  286 
  287     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  288 
  289 
  290 def InitializeWorkerProcess(*EncodedArgs):
  291     """Initialize data for a worker process."""
  292 
  293     global Options, OptionsInfo
  294 
  295     if not OptionsInfo["QuietMode"]:
  296         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  297 
  298     # Decode Options and OptionInfo...
  299     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  300     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  301 
  302     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  303     # output files for each process...
  304     OptionsInfo["Psi4Initialized"] = False
  305 
  306 
  307 def WorkerProcess(EncodedMolInfo):
  308     """Process data for a worker process."""
  309 
  310     if not OptionsInfo["Psi4Initialized"]:
  311         InitializePsi4ForWorkerProcess()
  312 
  313     MolIndex, EncodedMol = EncodedMolInfo
  314 
  315     MolNum = MolIndex + 1
  316     CalcStatus = False
  317     ConfID = None
  318     Energy = None
  319     SolvationEnergy = None
  320 
  321     if EncodedMol is None:
  322         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  323 
  324     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  325 
  326     if not CheckAndValidateMolecule(Mol, MolNum):
  327         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  328 
  329     # Setup 2D coordinates for SMILES input file...
  330     if OptionsInfo["SMILESInfileStatus"]:
  331         AllChem.Compute2DCoords(Mol)
  332 
  333     Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(OptionsInfo["psi4"], Mol, MolNum)
  334 
  335     return [
  336         MolIndex,
  337         RDKitUtil.MolToBase64EncodedMolString(
  338             Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
  339         ),
  340         CalcStatus,
  341         ConfID,
  342         Energy,
  343         SolvationEnergy,
  344     ]
  345 
  346 
  347 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer):
  348     """Process and minimize molecules using multiprocessing at conformers level."""
  349 
  350     MiscUtil.PrintInfo(
  351         "\nPerforming minimization with generation of conformers using multiprocessing at conformers level..."
  352     )
  353 
  354     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
  355     for Mol in Mols:
  356         MolCount += 1
  357 
  358         if not CheckAndValidateMolecule(Mol, MolCount):
  359             continue
  360 
  361         # Setup 2D coordinates for SMILES input file...
  362         if OptionsInfo["SMILESInfileStatus"]:
  363             AllChem.Compute2DCoords(Mol)
  364 
  365         ValidMolCount += 1
  366 
  367         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = ProcessConformersUsingMultipleProcesses(Mol, MolCount)
  368 
  369         if not CalcStatus:
  370             if not OptionsInfo["QuietMode"]:
  371                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  372                 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
  373 
  374             MinimizationFailedCount += 1
  375             continue
  376 
  377         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
  378 
  379         if not WriteStatus:
  380             WriteFailedCount += 1
  381 
  382     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
  383 
  384 
  385 def ProcessConformersUsingMultipleProcesses(Mol, MolNum):
  386     """Generate conformers and minimize them using multiple processes."""
  387 
  388     # Add hydrogens...
  389     Mol = Chem.AddHs(Mol)
  390 
  391     # Setup conformers...
  392     ConfIDs = EmbedMolecule(Mol, MolNum)
  393     if not len(ConfIDs):
  394         if not OptionsInfo["QuietMode"]:
  395             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  396             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
  397         return (Mol, False, None, None, None)
  398 
  399     MPParams = OptionsInfo["MPParams"]
  400 
  401     # Setup data for initializing a worker process...
  402     InitializeWorkerProcessArgs = (
  403         MiscUtil.ObjectToBase64EncodedString(Options),
  404         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  405     )
  406 
  407     # Setup a encoded mols data iterable for a worker process...
  408     MolIndex = MolNum - 1
  409     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs)
  410 
  411     # Setup process pool along with data initialization for each process...
  412     if not OptionsInfo["QuietMode"]:
  413         MiscUtil.PrintInfo(
  414             "\nConfiguring multiprocessing using %s method..."
  415             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  416         )
  417         MiscUtil.PrintInfo(
  418             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  419             % (
  420                 MPParams["NumProcesses"],
  421                 MPParams["InputDataMode"],
  422                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  423             )
  424         )
  425 
  426     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
  427 
  428     # Start processing...
  429     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  430         Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  431     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  432         Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  433     else:
  434         MiscUtil.PrintError(
  435             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  436         )
  437 
  438     CalcEnergyMap = {}
  439     CalcSolvationEnergyMap = {}
  440     CalcFailedCount = 0
  441     for Result in Results:
  442         MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
  443 
  444         if EncodedMol is None:
  445             CalcFailedCount += 1
  446             continue
  447 
  448         if not CalcStatus:
  449             CalcFailedCount += 1
  450             continue
  451 
  452         # Retrieve minimized atom positions...
  453         MinimizedMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  454         AtomPositions = RDKitUtil.GetAtomPositions(MinimizedMol, ConfID=ConfID)
  455 
  456         # Update atom positions...
  457         RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
  458 
  459         CalcEnergyMap[ConfID] = Energy
  460         CalcSolvationEnergyMap[ConfID] = SolvationEnergy
  461 
  462     if CalcFailedCount:
  463         return (Mol, False, None, None, None)
  464 
  465     SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
  466     MinEnergyConfID = SortedConfIDs[0]
  467     MinEnergy = CalcEnergyMap[MinEnergyConfID]
  468     SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
  469 
  470     return (Mol, True, MinEnergyConfID, MinEnergy, SolvationEnergy)
  471 
  472 
  473 def InitializeConformerWorkerProcess(*EncodedArgs):
  474     """Initialize data for a conformer worker process."""
  475 
  476     global Options, OptionsInfo
  477 
  478     if not OptionsInfo["QuietMode"]:
  479         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  480 
  481     # Decode Options and OptionInfo...
  482     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  483     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  484 
  485     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  486     # output files for each process...
  487     OptionsInfo["Psi4Initialized"] = False
  488 
  489 
  490 def ConformerWorkerProcess(EncodedMolInfo):
  491     """Process data for a conformer worker process."""
  492 
  493     if not OptionsInfo["Psi4Initialized"]:
  494         InitializePsi4ForWorkerProcess()
  495 
  496     MolIndex, EncodedMol, ConfID = EncodedMolInfo
  497 
  498     MolNum = MolIndex + 1
  499 
  500     CalcStatus = False
  501     Energy = None
  502     SolvationEnergy = None
  503 
  504     if EncodedMol is None:
  505         return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
  506 
  507     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  508     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  509 
  510     if not OptionsInfo["QuietMode"]:
  511         MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
  512 
  513     Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
  514     if not Status:
  515         return [MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy]
  516 
  517     if ConvergeStatus != 0:
  518         if not OptionsInfo["QuietMode"]:
  519             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  520             MiscUtil.PrintWarning(
  521                 'Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for "maxIters" in "--confParams" option...\n'
  522                 % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])
  523             )
  524 
  525     # Perform Psi4 minimization...
  526     CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], Mol, MolNum, ConfID)
  527     if not CalcStatus:
  528         if not OptionsInfo["QuietMode"]:
  529             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  530             return [MolIndex, EncodedMol, False, ConfID, Energy, SolvationEnergy]
  531 
  532     return [
  533         MolIndex,
  534         RDKitUtil.MolToBase64EncodedMolString(
  535             Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
  536         ),
  537         CalcStatus,
  538         ConfID,
  539         Energy,
  540         SolvationEnergy,
  541     ]
  542 
  543 
  544 def InitializePsi4ForWorkerProcess():
  545     """Initialize Psi4 for a worker process."""
  546 
  547     if OptionsInfo["Psi4Initialized"]:
  548         return
  549 
  550     OptionsInfo["Psi4Initialized"] = True
  551 
  552     if OptionsInfo["MPLevelConformersMode"] and re.match(
  553         "auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I
  554     ):
  555         # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
  556         OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
  557     else:
  558         # Update output file...
  559         OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
  560             OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
  561         )
  562 
  563     # Intialize Psi4...
  564     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
  565         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  566         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  567         PrintVersion=False,
  568         PrintHeader=True,
  569     )
  570 
  571     if OptionsInfo["Psi4DDXSolvationMode"]:
  572         Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"])
  573 
  574     # Setup max iterations global variable...
  575     Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
  576 
  577     # Setup conversion factor for energy units...
  578     SetupEnergyConversionFactor(OptionsInfo["psi4"])
  579 
  580 
  581 def MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolNum):
  582     """Minimize molecule or conformers."""
  583 
  584     ConfID = None
  585     if OptionsInfo["SkipConformerGeneration"]:
  586         Mol, CalcStatus, Energy, SolvationEnergy = MinimizeMolecule(Psi4Handle, Mol, MolNum)
  587     else:
  588         Mol, CalcStatus, ConfID, Energy, SolvationEnergy = GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum)
  589 
  590     return (Mol, CalcStatus, ConfID, Energy, SolvationEnergy)
  591 
  592 
  593 def GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum=None):
  594     """Generate and minimize conformers for a molecule to get the lowest energy conformer."""
  595 
  596     # Add hydrogens..
  597     Mol = Chem.AddHs(Mol)
  598 
  599     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  600 
  601     # Setup conformers...
  602     ConfIDs = EmbedMolecule(Mol, MolNum)
  603     if not len(ConfIDs):
  604         if not OptionsInfo["QuietMode"]:
  605             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
  606         return (Mol, False, None, None, None)
  607 
  608     # Minimize conformers...
  609     CalcEnergyMap = {}
  610     CalcSolvationEnergyMap = {}
  611     for ConfID in ConfIDs:
  612         if not OptionsInfo["QuietMode"]:
  613             MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
  614 
  615         # Perform forcefield minimization...
  616         Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
  617         if not Status:
  618             return (Mol, False, None, None, None)
  619 
  620         if ConvergeStatus != 0:
  621             if not OptionsInfo["QuietMode"]:
  622                 MiscUtil.PrintWarning(
  623                     'Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for "maxIters" in "--confParams" option...\n'
  624                     % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])
  625                 )
  626 
  627         # Perform Psi4 minimization...
  628         CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID)
  629         if not CalcStatus:
  630             if not OptionsInfo["QuietMode"]:
  631                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  632             return (Mol, False, None, None, None)
  633 
  634         CalcEnergyMap[ConfID] = Energy
  635         CalcSolvationEnergyMap[ConfID] = SolvationEnergy
  636 
  637     # Select the lowest energy conformer..
  638     SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
  639     MinEnergyConfID = SortedConfIDs[0]
  640     Energy = CalcEnergyMap[MinEnergyConfID]
  641     SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
  642 
  643     return (Mol, True, MinEnergyConfID, Energy, SolvationEnergy)
  644 
  645 
  646 def MinimizeMolecule(Psi4Handle, Mol, MolNum):
  647     """Minimize molecule."""
  648 
  649     # Skip conformation generation and forcefield minimization...
  650     CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum)
  651 
  652     return (Mol, CalcStatus, Energy, SolvationEnergy)
  653 
  654 
  655 def MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID=-1):
  656     """Minimize molecule using Psi4."""
  657 
  658     # Setup a Psi4Mol...
  659     Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
  660     if Psi4Mol is None:
  661         return (False, None, None)
  662 
  663     #  Setup reference wave function...
  664     Reference = SetupReferenceWavefunction(Mol)
  665     Psi4Handle.set_options({"Reference": Reference})
  666 
  667     # Setup method name and basis set...
  668     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  669 
  670     # Setup constrained torsions...
  671     if OptionsInfo["Psi4ConstrainTorsionsMode"]:
  672         SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol)
  673 
  674     # Optimize geometry...
  675     Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(
  676         Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
  677     )
  678 
  679     if not Status:
  680         PerformPsi4Cleanup(Psi4Handle)
  681         return (False, None, None)
  682 
  683     SolvationEnergy = None
  684     if OptionsInfo["Psi4DDXSolvationMode"]:
  685         SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction)
  686 
  687     # Update atom positions...
  688     AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms=True)
  689     RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
  690 
  691     # Convert energy units...
  692     if OptionsInfo["ApplyEnergyConversionFactor"]:
  693         Energy = Energy * OptionsInfo["EnergyConversionFactor"]
  694         if SolvationEnergy is not None:
  695             SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"]
  696 
  697     # Clean up
  698     PerformPsi4Cleanup(Psi4Handle)
  699 
  700     return (True, Energy, SolvationEnergy)
  701 
  702 
  703 def RetrievePsi4DDXSolvationEnergy(WaveFunction):
  704     """Retrieve DDX solvation energy."""
  705 
  706     SolvationEnergy = None
  707     try:
  708         SolvationEnergy = WaveFunction.variable("DD Solvation Energy")
  709     except Exception as ErrMsg:
  710         SolvationEnergy = None
  711         if not OptionsInfo["QuietMode"]:
  712             MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg)
  713 
  714     return SolvationEnergy
  715 
  716 
  717 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID=-1):
  718     """Minimize molecule using forcefield available in RDKit."""
  719 
  720     try:
  721         if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
  722             ConvergeStatus = AllChem.UFFOptimizeMolecule(
  723                 Mol, confId=ConfID, maxIters=OptionsInfo["ConfGenerationParams"]["MaxIters"]
  724             )
  725         elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]:
  726             ConvergeStatus = AllChem.MMFFOptimizeMolecule(
  727                 Mol,
  728                 confId=ConfID,
  729                 maxIters=OptionsInfo["ConfGenerationParams"]["MaxIters"],
  730                 mmffVariant=OptionsInfo["ConfGenerationParams"]["MMFFVariant"],
  731             )
  732         else:
  733             MiscUtil.PrintError(
  734                 "Minimization couldn't be performed: Specified forcefield, %s, is not supported"
  735                 % OptionsInfo["ConfGenerationParams"]["ForceField"]
  736             )
  737     except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
  738         if not OptionsInfo["QuietMode"]:
  739             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  740             MiscUtil.PrintWarning(
  741                 "Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)
  742             )
  743         return (False, None)
  744 
  745     return (True, ConvergeStatus)
  746 
  747 
  748 def EmbedMolecule(Mol, MolNum=None):
  749     """Embed conformations."""
  750 
  751     ConfIDs = []
  752 
  753     MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
  754     RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"]
  755     EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
  756     UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
  757     ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
  758     UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
  759     EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
  760 
  761     try:
  762         ConfIDs = AllChem.EmbedMultipleConfs(
  763             Mol,
  764             numConfs=MaxConfs,
  765             randomSeed=RandomSeed,
  766             pruneRmsThresh=EmbedRMSDCutoff,
  767             enforceChirality=EnforceChirality,
  768             useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
  769             useBasicKnowledge=UseBasicKnowledge,
  770             ETversion=ETVersion,
  771         )
  772     except ValueError as ErrMsg:
  773         if not OptionsInfo["QuietMode"]:
  774             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  775             MiscUtil.PrintWarning("Embedding failed  for molecule %s:\n%s\n" % (MolName, ErrMsg))
  776         ConfIDs = []
  777 
  778     if not OptionsInfo["QuietMode"]:
  779         if EmbedRMSDCutoff > 0:
  780             MiscUtil.PrintInfo(
  781                 "Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s"
  782                 % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, MaxConfs, len(ConfIDs))
  783             )
  784         else:
  785             MiscUtil.PrintInfo(
  786                 "Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: None; Size: %s"
  787                 % (RDKitUtil.GetMolName(Mol, MolNum), len(ConfIDs))
  788             )
  789 
  790     return ConfIDs
  791 
  792 
  793 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID=-1):
  794     """Setup a Psi4 molecule object."""
  795 
  796     if OptionsInfo["RecenterAndReorient"]:
  797         MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=False, NoReorient=False)
  798     else:
  799         MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=True, NoReorient=True)
  800 
  801     try:
  802         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  803     except Exception as ErrMsg:
  804         Psi4Mol = None
  805         if not OptionsInfo["QuietMode"]:
  806             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  807             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  808             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  809 
  810     if OptionsInfo["Symmetrize"]:
  811         Psi4Mol.symmetrize(OptionsInfo["SymmetrizeTolerance"])
  812 
  813     return Psi4Mol
  814 
  815 
  816 def PerformPsi4Cleanup(Psi4Handle):
  817     """Perform clean up."""
  818 
  819     # Clean up after Psi4 run...
  820     Psi4Handle.core.clean()
  821 
  822     # Clean up any leftover scratch files...
  823     if OptionsInfo["MPMode"]:
  824         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  825 
  826 
  827 def CheckAndValidateMolecule(Mol, MolCount=None):
  828     """Validate molecule for Psi4 calculations."""
  829 
  830     if Mol is None:
  831         if not OptionsInfo["QuietMode"]:
  832             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
  833         return False
  834 
  835     MolName = RDKitUtil.GetMolName(Mol, MolCount)
  836     if not OptionsInfo["QuietMode"]:
  837         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
  838 
  839     if RDKitUtil.IsMolEmpty(Mol):
  840         if not OptionsInfo["QuietMode"]:
  841             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
  842         return False
  843 
  844     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
  845         if not OptionsInfo["QuietMode"]:
  846             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
  847         return False
  848 
  849     # Check for 3D flag...
  850     if OptionsInfo["SkipConformerGeneration"]:
  851         if not Mol.GetConformer().Is3D():
  852             if not OptionsInfo["QuietMode"]:
  853                 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
  854 
  855     # Check for missing hydrogens...
  856     if OptionsInfo["SkipConformerGeneration"]:
  857         # Otherwise, Hydrogens are always added...
  858         if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
  859             if not OptionsInfo["QuietMode"]:
  860                 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
  861 
  862     return True
  863 
  864 
  865 def SetupMethodNameAndBasisSet(Mol):
  866     """Setup method name and basis set."""
  867 
  868     MethodName = OptionsInfo["MethodName"]
  869     if OptionsInfo["MethodNameAuto"]:
  870         MethodName = "B3LYP"
  871 
  872     BasisSet = OptionsInfo["BasisSet"]
  873     if OptionsInfo["BasisSetAuto"]:
  874         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
  875 
  876     return (MethodName, BasisSet)
  877 
  878 
  879 def SetupReferenceWavefunction(Mol):
  880     """Setup reference wavefunction."""
  881 
  882     Reference = OptionsInfo["Reference"]
  883     if OptionsInfo["ReferenceAuto"]:
  884         Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
  885 
  886     return Reference
  887 
  888 
  889 def SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol):
  890     """Setup constrained torsions around rotatable bonds."""
  891 
  892     if not OptionsInfo["Psi4ConstrainTorsionsMode"]:
  893         return
  894 
  895     # Get torsions around rotatable bonds...
  896     TorsionAtomsIndicesList = RDKitUtil.GetTorsionsAroundRotatableBonds(
  897         Mol, GetConstrainTorsionsRotBondsPatternMol(), OptionsInfo["Psi4ConstrainTorsionsParams"]["IgnoreHydrogens"]
  898     )
  899 
  900     # Freeze dihedrals corresponding to torsions around rotatable bonds...
  901     FreezeDihedralsString = SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList)
  902     if FreezeDihedralsString is None:
  903         # Clear any frozen dihedral values
  904         Psi4Handle.set_options({"OPTKING__frozen_dihedral": ""})
  905     else:
  906         # Setup frozen dihedral values...
  907         Psi4Handle.set_options({"OPTKING__frozen_dihedral": FreezeDihedralsString})
  908 
  909 
  910 def SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList):
  911     """Setup freeze dihedrals string for Psi4."""
  912 
  913     FreezeDihedralsString = None
  914     if TorsionAtomsIndicesList is None or len(TorsionAtomsIndicesList) == 0:
  915         return FreezeDihedralsString
  916 
  917     TorsionsList = []
  918     for TorsionAtomIndices in TorsionAtomsIndicesList:
  919         AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4 = TorsionAtomIndices
  920 
  921         # Atom indices start from 1 for Psi4 instead 0 for RDKit...
  922         TorsionString = "%s %s %s %s" % (AtomIndex1 + 1, AtomIndex2 + 1, AtomIndex3 + 1, AtomIndex4 + 1)
  923         TorsionsList.append(TorsionString)
  924 
  925     if len(TorsionsList):
  926         FreezeDihedralsString = "%s" % "\n".join(TorsionsList)
  927 
  928     return FreezeDihedralsString
  929 
  930 
  931 def GetConstrainTorsionsRotBondsPatternMol():
  932     """Get constrain torsions rotatable bonds pattern molecule."""
  933 
  934     if "RotBondsSMARTSPatternMol" not in OptionsInfo["Psi4ConstrainTorsionsParams"]:
  935         OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"] = Chem.MolFromSmarts(
  936             OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"]
  937         )
  938 
  939     return OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"]
  940 
  941 
  942 def SetupEnergyConversionFactor(Psi4Handle):
  943     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
  944 
  945     EnergyUnits = OptionsInfo["EnergyUnits"]
  946 
  947     ApplyConversionFactor = True
  948     if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
  949         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
  950     elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
  951         ConversionFactor = Psi4Handle.constants.hartree2kJmol
  952     elif re.match("^eV$", EnergyUnits, re.I):
  953         ConversionFactor = Psi4Handle.constants.hartree2ev
  954     else:
  955         ApplyConversionFactor = False
  956         ConversionFactor = 1.0
  957 
  958     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
  959     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
  960 
  961 
  962 def WriteMolecule(Writer, Mol, MolNum=None, ConfID=None, Energy=None, SolvationEnergy=None):
  963     """Write molecule."""
  964 
  965     if OptionsInfo["EnergyOut"] and Energy is not None:
  966         Energy = "%.*f" % (OptionsInfo["Precision"], Energy)
  967         Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
  968 
  969         if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None:
  970             SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy)
  971             Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy)
  972 
  973     try:
  974         if ConfID is None:
  975             Writer.write(Mol)
  976         else:
  977             Writer.write(Mol, confId=ConfID)
  978     except (ValueError, RuntimeError) as ErrMsg:
  979         if not OptionsInfo["QuietMode"]:
  980             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  981             MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
  982         return False
  983 
  984     return True
  985 
  986 
  987 def CheckAndSetupDDX():
  988     """Load pyddx and return its handle."""
  989 
  990     try:
  991         import pyddx
  992     except ImportError as ErrMsg:
  993         sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg)
  994         sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n")
  995         sys.stderr.write("You may need to install it from pypi or conda-forge.\n")
  996         sys.stderr.write("Check/update your environment and try again.\n\n")
  997         sys.exit(1)
  998 
  999     return pyddx
 1000 
 1001 
 1002 def ProcessDDXListSolventsOption():
 1003     """List solvent information provided by tge DDX module for the calculation
 1004     of solvation energy."""
 1005 
 1006     PyDDXHandle = CheckAndSetupDDX()
 1007 
 1008     MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n")
 1009     MiscUtil.PrintInfo("Solvent\tEpsilon")
 1010     Count = 0
 1011     for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()):
 1012         Count += 1
 1013         Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent]
 1014         MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon))
 1015 
 1016     MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count)
 1017 
 1018 
 1019 def ProcessOptions():
 1020     """Process and validate command line arguments and options."""
 1021 
 1022     MiscUtil.PrintInfo("Processing options...")
 1023 
 1024     # Validate options...
 1025     ValidateOptions()
 1026 
 1027     OptionsInfo["Infile"] = Options["--infile"]
 1028     OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
 1029     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1030     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
 1031         "--infileParams",
 1032         Options["--infileParams"],
 1033         InfileName=Options["--infile"],
 1034         ParamsDefaultInfo=ParamsDefaultInfoOverride,
 1035     )
 1036 
 1037     OptionsInfo["Outfile"] = Options["--outfile"]
 1038     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
 1039         "--outfileParams", Options["--outfileParams"]
 1040     )
 1041 
 1042     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1043 
 1044     # Method, basis set, and reference wavefunction...
 1045     OptionsInfo["BasisSet"] = Options["--basisSet"]
 1046     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
 1047 
 1048     OptionsInfo["MethodName"] = Options["--methodName"]
 1049     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
 1050 
 1051     OptionsInfo["Reference"] = Options["--reference"]
 1052     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
 1053 
 1054     # Run and options parameters...
 1055     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
 1056         "--psi4OptionsParams", Options["--psi4OptionsParams"]
 1057     )
 1058     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
 1059         "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
 1060     )
 1061 
 1062     # Psi4 constrain torsions paramaters...
 1063     OptionsInfo["Psi4ConstrainTorsionsMode"] = (
 1064         True if re.match("^yes$", Options["--psi4ConstrainTorsions"], re.I) else False
 1065     )
 1066     ParamsDefaultInfoOverride = {"RotBondsSMARTSMode": "NonStrict"}
 1067     Psi4ConstrainTorsionsParams = Psi4Util.ProcessPsi4ConstrainTorsionsParameters(
 1068         "--psi4ConstrainTorsionsParams", Options["--psi4ConstrainTorsionsParams"], ParamsDefaultInfoOverride
 1069     )
 1070     OptionsInfo["Psi4ConstrainTorsionsParams"] = Psi4ConstrainTorsionsParams
 1071 
 1072     if OptionsInfo["Psi4ConstrainTorsionsMode"]:
 1073         RotBondsSMARTSPattern = OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"]
 1074         RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern)
 1075         if RotBondsPatternMol is None:
 1076             MiscUtil.PrintError(
 1077                 'Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, "%s", specified for parameter name "--rotBondsSMARTSPattern" using "--psi4ConstrainTorsions"option is not valid.'
 1078                 % (RotBondsSMARTSPattern)
 1079             )
 1080 
 1081     # DDX solvation parameters...
 1082     OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False
 1083     Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters(
 1084         "--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"]
 1085     )
 1086     Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(
 1087         OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams
 1088     )
 1089     OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams
 1090     OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions
 1091 
 1092     if OptionsInfo["Psi4DDXSolvationMode"]:
 1093         CheckAndSetupDDX()
 1094 
 1095     # Conformer generation paramaters...
 1096     OptionsInfo["ConfGeneration"] = Options["--confGeneration"]
 1097     OptionsInfo["SkipConformerGeneration"] = False if re.match("^yes$", Options["--confGeneration"], re.I) else True
 1098 
 1099     if OptionsInfo["SkipConformerGeneration"]:
 1100         if OptionsInfo["SMILESInfileStatus"]:
 1101             MiscUtil.PrintError(
 1102                 'The value, %s, specified for option "-c --confGeneration" is not allowed during, %s, value for option "-i, --infile" . The input file must be a 3D SD or MOL file. '
 1103                 % (Options["--confGeneration"], Options["--infile"])
 1104             )
 1105 
 1106     if not OptionsInfo["SkipConformerGeneration"]:
 1107         if OptionsInfo["Psi4ConstrainTorsionsMode"]:
 1108             MiscUtil.PrintError(
 1109                 'The value, %s, specified for option "-c --confGeneration" is not allowed during, %s, value for option "--psi4ConstrainTorsions".'
 1110                 % (Options["--confGeneration"], Options["--psi4ConstrainTorsions"])
 1111             )
 1112 
 1113     ParamsDefaultInfoOverride = {"MaxConfs": 50, "MaxIters": 250}
 1114     OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters(
 1115         "--confParams", Options["--confParams"], ParamsDefaultInfoOverride
 1116     )
 1117 
 1118     # Write energy parameters...
 1119     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 1120     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
 1121 
 1122     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
 1123     if re.match("^auto$", EnergyDataFieldLabel, re.I):
 1124         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
 1125     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
 1126 
 1127     SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"]
 1128     OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel
 1129 
 1130     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
 1131 
 1132     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1133     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1134 
 1135     # Multiprocessing level...
 1136     MPLevelMoleculesMode = False
 1137     MPLevelConformersMode = False
 1138     MPLevel = Options["--mpLevel"]
 1139     if re.match("^Molecules$", MPLevel, re.I):
 1140         MPLevelMoleculesMode = True
 1141     elif re.match("^Conformers$", MPLevel, re.I):
 1142         if OptionsInfo["SkipConformerGeneration"]:
 1143             MiscUtil.PrintError(
 1144                 'The value, %s, specified for option "--mpLevel" is not allowed during, %s, value of option "--confGeneration"  . '
 1145                 % (MPLevel, Options["--confGeneration"])
 1146             )
 1147         MPLevelConformersMode = True
 1148     else:
 1149         MiscUtil.PrintError('The value, %s, specified for option "--mpLevel" is not valid. ' % MPLevel)
 1150     OptionsInfo["MPLevel"] = MPLevel
 1151     OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
 1152     OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
 1153 
 1154     OptionsInfo["Precision"] = int(Options["--precision"])
 1155     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 1156 
 1157     RecenterAndReorient = Options["--recenterAndReorient"]
 1158     if re.match("^auto$", RecenterAndReorient, re.I):
 1159         RecenterAndReorient = False if OptionsInfo["SkipConformerGeneration"] else True
 1160     else:
 1161         RecenterAndReorient = True if re.match("^yes$", RecenterAndReorient, re.I) else False
 1162     OptionsInfo["RecenterAndReorient"] = RecenterAndReorient
 1163 
 1164     Symmetrize = Options["--symmetrize"]
 1165     if re.match("^auto$", Symmetrize, re.I):
 1166         Symmetrize = True if RecenterAndReorient else False
 1167     else:
 1168         Symmetrize = True if re.match("^yes$", Symmetrize, re.I) else False
 1169     OptionsInfo["Symmetrize"] = Symmetrize
 1170 
 1171     OptionsInfo["SymmetrizeTolerance"] = float(Options["--symmetrizeTolerance"])
 1172 
 1173 
 1174 def RetrieveOptions():
 1175     """Retrieve command line arguments and options."""
 1176 
 1177     # Get options...
 1178     global Options
 1179     Options = docopt(_docoptUsage_)
 1180 
 1181     # Set current working directory to the specified directory...
 1182     WorkingDir = Options["--workingdir"]
 1183     if WorkingDir:
 1184         os.chdir(WorkingDir)
 1185 
 1186     # Handle examples option...
 1187     if "--examples" in Options and Options["--examples"]:
 1188         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1189         sys.exit(0)
 1190 
 1191     # Handle listing of solvent information...
 1192     if Options and Options["--psi4DDXListSolvents"]:
 1193         ProcessDDXListSolventsOption()
 1194         sys.exit(0)
 1195 
 1196 
 1197 def ValidateOptions():
 1198     """Validate option values."""
 1199 
 1200     MiscUtil.ValidateOptionTextValue("-c, --confGeneration", Options["--confGeneration"], "yes no")
 1201 
 1202     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 1203     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 1204 
 1205     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1206     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 1207 
 1208     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 1209 
 1210     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1211     MiscUtil.ValidateOptionsOutputFileOverwrite(
 1212         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
 1213     )
 1214     MiscUtil.ValidateOptionsDistinctFileNames(
 1215         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
 1216     )
 1217 
 1218     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1219     MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
 1220 
 1221     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1222 
 1223     MiscUtil.ValidateOptionTextValue("--psi4ConstrainTorsions", Options["--psi4ConstrainTorsions"], "yes no")
 1224 
 1225     MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no")
 1226 
 1227     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 1228 
 1229     MiscUtil.ValidateOptionTextValue("--recenterAndReorient", Options["--recenterAndReorient"], "yes no auto")
 1230     MiscUtil.ValidateOptionTextValue("--symmetrize", Options["--symmetrize"], "yes no auto")
 1231     MiscUtil.ValidateOptionFloatValue("--symmetrizeTolerance", Options["--symmetrizeTolerance"], {">": 0})
 1232 
 1233 
 1234 # Setup a usage string for docopt...
 1235 _docoptUsage_ = """
 1236 Psi4PerformMinimization.py - Perform structure minimization
 1237 
 1238 Usage:
 1239     Psi4PerformMinimization.py [--basisSet <text>] [--confGeneration <yes or no> ] [--confParams <Name,Value,...>]
 1240                                [--energyOut <yes or no>] [--energyDataFieldLabel <text>] [--energyUnits <text>]
 1241                                [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>]
 1242                                [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
 1243                                [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
 1244                                [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 1245                                [--psi4ConstrainTorsions <yes or no>] [--psi4ConstrainTorsionsParams <Name,Value,...>]
 1246                                [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>]
 1247                                [--quiet <yes or no>] [--reference <text>] [--recenterAndReorient <yes or no>]
 1248                                [--symmetrize <yes or no>] [--symmetrizeTolerance <number>] [-w <dir>] -i <infile> -o <outfile> 
 1249     Psi4PerformMinimization.py --psi4DDXListSolvents
 1250     Psi4PerformMinimization.py -h | --help | -e | --examples
 1251 
 1252 Description:
 1253     Generate 3D structures for molecules using a combination of distance geometry
 1254     and forcefield minimization followed by geometry optimization using a quantum
 1255     chemistry method. A set of initial 3D structures are generated for a molecule 
 1256     employing distance geometry. The 3D structures in the conformation ensemble
 1257     are sequentially minimized using forcefield and a quantum chemistry method.
 1258     The conformation with lowest energy is selected to represent the final structure.
 1259     An option is available to skip the generation of the conformation ensemble along
 1260     with forcefield minimization and simply perform minimization on the initial 3D
 1261     structure using a quantum chemistry method.
 1262 
 1263     A Psi4 XYZ format geometry string is automatically generated for each molecule
 1264     in input file. It contains atom symbols and 3D coordinates for each atom in a
 1265     molecule. In addition, the formal charge and spin multiplicity are present in the
 1266     the geometry string. These values are either retrieved from molecule properties
 1267     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 1268     molecule.
 1269 
 1270     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 1271     .csv, .tsv, .txt)
 1272 
 1273     The supported output file formats are: SD (.sdf, .sd)
 1274 
 1275 Options:
 1276     -b, --basisSet <text>  [default: auto]
 1277         Basis set to use for energy minimization. Default: 6-31+G** for sulfur
 1278         containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 1279         value must be a valid Psi4 basis set. No validation is performed.
 1280         
 1281         The following list shows a representative sample of basis sets available
 1282         in Psi4:
 1283             
 1284             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 1285             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 1286             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 1287             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 1288             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 1289             
 1290     -c, --confGeneration <yes or no>  [default: yes]
 1291         Generate an initial 3D conformation ensemble using distance geometry and
 1292         forcefield minimization before final geometry optimization by a specified
 1293         method name and basis set. Possible values: yes or no.
 1294         
 1295         The 'No' value skips the generation of conformations employing distance
 1296         geometry and forcefield minimization. The 3D structures in input file are
 1297         minimized by a quantum method. It is not allowed for SMILES input file.
 1298     --confParams <Name,Value,...>  [default: auto]
 1299         A comma delimited list of parameter name and value pairs for generating
 1300         initial sets of 3D conformations for molecules. The 3D conformation ensemble
 1301         is generated using distance geometry and forcefield functionality available
 1302         in RDKit. The 3D structures in the conformation ensemble are subsequently
 1303         minimized by a quantum chemistry method available in Psi4.
 1304        
 1305         The supported parameter names along with their default values are shown
 1306         below:
 1307             
 1308             confMethod,ETKDGv2,
 1309             forceField,MMFF, forceFieldMMFFVariant,MMFF94,
 1310             enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
 1311             maxIters,250,randomSeed,auto
 1312             
 1313             confMethod,ETKDGv2   [ Possible values: SDG, KDG, ETDG,
 1314                 ETKDG , or ETKDGv2]
 1315             forceField, MMFF   [ Possible values: UFF or MMFF ]
 1316             forceFieldMMFFVariant,MMFF94   [ Possible values: MMFF94 or MMFF94s ]
 1317             enforceChirality,yes   [ Possible values: yes or no ]
 1318             embedRMSDCutoff,0.5   [ Possible values: number or None]
 1319             
 1320         confMethod: Conformation generation methodology for generating initial 3D
 1321         coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
 1322         Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
 1323         with Distance Geometry (KDG) and Experimental Torsion-angle preference
 1324         along with basic Knowledge-terms and Distance Geometry (ETKDG or
 1325         ETKDGv2) [Ref 129, 167] .
 1326         
 1327         forceField: Forcefield method to use for energy minimization. Possible
 1328         values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics
 1329         Force Field [ Ref 83-87 ] .
 1330         
 1331         enforceChirality: Enforce chirality for defined chiral centers during
 1332         forcefield minimization.
 1333         
 1334         maxConfs: Maximum number of conformations to generate for each molecule
 1335         during the generation of an initial 3D conformation ensemble using 
 1336         conformation generation methodology. The conformations are minimized
 1337         using the specified forcefield and a quantum chemistry method. The lowest
 1338         energy conformation is written to the output file.
 1339         
 1340         embedRMSDCutoff: RMSD cutoff for retaining initial set conformers embedded
 1341         using distance geometry and before forcefield minimization. All embedded
 1342         conformers are kept for 'None' value. Otherwise, only those conformers which
 1343         are different from each other by the specified RMSD cutoff, 0.5 by default,
 1344         are kept. The first embedded conformer is always retained.
 1345         
 1346         maxIters: Maximum number of iterations to perform for each conformation
 1347         during forcefield minimization.
 1348         
 1349         randomSeed: Seed for the random number generator for reproducing initial
 1350         3D coordinates in a conformation ensemble. Default is to use a random seed.
 1351     --energyOut <yes or no>  [default: yes]
 1352         Write out energy values.
 1353     --energyDataFieldLabel <text>  [default: auto]
 1354         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 1355     --energyUnits <text>  [default: kcal/mol]
 1356         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 1357     -e, --examples
 1358         Print examples.
 1359     -h, --help
 1360         Print this help message.
 1361     -i, --infile <infile>
 1362         Input file name.
 1363     --infileParams <Name,Value,...>  [default: auto]
 1364         A comma delimited list of parameter name and value pairs for reading
 1365         molecules from files. The supported parameter names for different file
 1366         formats, along with their default values, are shown below:
 1367             
 1368             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1369             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1370                 smilesTitleLine,auto,sanitize,yes
 1371             
 1372         Possible values for smilesDelimiter: space, comma or tab.
 1373     --maxIters <number>  [default: 50]
 1374         Maximum number of iterations to perform for each molecule or conformer
 1375         during energy minimization by a quantum chemistry method.
 1376     -m, --methodName <text>  [default: auto]
 1377         Method to use for energy minimization. Default: B3LYP [ Ref 150 ]. The
 1378         specified value must be a valid Psi4 method name. No validation is
 1379         performed.
 1380         
 1381         The following list shows a representative sample of methods available
 1382         in Psi4:
 1383             
 1384             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1385             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1386             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1387             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1388             
 1389     --mp <yes or no>  [default: no]
 1390         Use multiprocessing.
 1391          
 1392         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1393         function employing lazy RDKit data iterable. This allows processing of
 1394         arbitrary large data sets without any additional requirements memory.
 1395         
 1396         All input data may be optionally loaded into memory by mp.Pool.map()
 1397         before starting worker processes in a process pool by setting the value
 1398         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1399         
 1400         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1401         data mode may adversely impact the performance. The '--mpParams' section
 1402         provides additional information to tune the value of 'chunkSize'.
 1403     --mpLevel <Molecules or Conformers>  [default: Molecules]
 1404         Perform multiprocessing at molecules or conformers level. Possible values:
 1405         Molecules or Conformers. The 'Molecules' value starts a process pool at the
 1406         molecules level. All conformers of a molecule are processed in a single
 1407         process. The 'Conformers' value, however, starts a process pool at the 
 1408         conformers level. Each conformer of a molecule is processed in an individual
 1409         process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
 1410         using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
 1411         a large number of Psi4 output files.
 1412     --mpParams <Name,Value,...>  [default: auto]
 1413         A comma delimited list of parameter name and value pairs to configure
 1414         multiprocessing.
 1415         
 1416         The supported parameter names along with their default and possible
 1417         values are shown below:
 1418         
 1419             chunkSize, auto
 1420             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1421             numProcesses, auto   [ Default: mp.cpu_count() ]
 1422         
 1423         These parameters are used by the following functions to configure and
 1424         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1425         mp.Pool.imap().
 1426         
 1427         The chunkSize determines chunks of input data passed to each worker
 1428         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1429         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1430         
 1431         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1432         automatically converts RDKit data iterable into a list, loads all data into
 1433         memory, and calculates the default chunkSize using the following method
 1434         as shown in its code:
 1435         
 1436             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1437             if extra: chunkSize += 1
 1438         
 1439         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1440         and 100 data items.
 1441         
 1442         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1443         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1444         data into memory. Consequently, the size of input data is not known a priori.
 1445         It's not possible to estimate an optimal value for the chunkSize. The default 
 1446         chunkSize is set to 1.
 1447         
 1448         The default value for the chunkSize during 'Lazy' data mode may adversely
 1449         impact the performance due to the overhead associated with exchanging
 1450         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1451         a larger value during 'Lazy' input data mode, based on the size of your input
 1452         data and number of processes in the process pool.
 1453         
 1454         The mp.Pool.map() function waits for all worker processes to process all
 1455         the data and return the results. The mp.Pool.imap() function, however,
 1456         returns the the results obtained from worker processes as soon as the
 1457         results become available for specified chunks of data.
 1458         
 1459         The order of data in the results returned by both mp.Pool.map() and 
 1460         mp.Pool.imap() functions always corresponds to the input data.
 1461     -o, --outfile <outfile>
 1462         Output file name.
 1463     --outfileParams <Name,Value,...>  [default: auto]
 1464         A comma delimited list of parameter name and value pairs for writing
 1465         molecules to files. The supported parameter names for different file
 1466         formats, along with their default values, are shown below:
 1467             
 1468             SD: kekulize,yes,forceV3000,no
 1469             
 1470     --overwrite
 1471         Overwrite existing files.
 1472     --precision <number>  [default: 6]
 1473         Floating point precision for writing energy values.
 1474     --psi4OptionsParams <Name,Value,...>  [default: none]
 1475         A comma delimited list of Psi4 option name and value pairs for setting
 1476         global and module options. The names are 'option_name' for global options
 1477         and 'module_name__option_name' for options local to a module. The
 1478         specified option names must be valid Psi4 names. No validation is
 1479         performed.
 1480         
 1481         The specified option name and  value pairs are processed and passed to
 1482         psi4.set_options() as a dictionary. The supported value types are float,
 1483         integer, boolean, or string. The float value string is converted into a float.
 1484         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1485     --psi4RunParams <Name,Value,...>  [default: auto]
 1486         A comma delimited list of parameter name and value pairs for configuring
 1487         Psi4 jobs.
 1488         
 1489         The supported parameter names along with their default and possible
 1490         values are shown below:
 1491              
 1492             MemoryInGB, 1
 1493             NumThreads, 1
 1494             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1495             ScratchDir, auto   [ Possivle values: DirName]
 1496             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1497             
 1498         These parameters control the runtime behavior of Psi4.
 1499         
 1500         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1501         is appended to output file name during multiprocessing as shown:
 1502         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1503         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1504         all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 
 1505         'Conformers' of '--mpLevel' option.
 1506         
 1507         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1508         of any existing Psi4 before creating new files to append output from
 1509         multiple Psi4 runs.
 1510         
 1511         The option 'ScratchDir' is a directory path to the location of scratch
 1512         files. The default value corresponds to Psi4 default. It may be used to
 1513         override the deafult path.
 1514     --psi4ConstrainTorsions <yes or no>  [default: no]
 1515         Constrain torsions around rotatable bonds during Psi4 minimization.
 1516     --psi4ConstrainTorsionsParams <Name,Value,...>  [default: auto]
 1517         A space delimited list of parameter name and value  pairs for constraining
 1518         torsions around rotatable bonds during Psi4 minimization. The supported
 1519         parameter names, along with their default values, are shown below:
 1520             
 1521             ignoreHydrogens, yes,  [ Possible values: yes or no ]
 1522             rotBondsSMARTSMode, NonStrict,   [ Possible  values: NonStrict,
 1523                 SemiStrict, Strict, Specify ]
 1524             rotBondsSMARTSPattern, auto  [ Possible values: Valid SMARTS ]
 1525         
 1526         ignoreHydrogens: Ignore torsions around rotatable bonds containing
 1527         hydrogens during Psi4 minimization.
 1528         
 1529         rotBondsSMARTSMode: SMARTS pattern to use for identifying rotatable bonds
 1530         for constraining torsions during Psi4 minimization. Possible values: NonStrict,
 1531         SemiStrict, Strict or Specify. The following SMARTS patterns are used to identify
 1532         rotatable bonds for different modes:
 1533             
 1534             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 1535             
 1536             SemiStrict:
 1537             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1538             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1539             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1540             
 1541             Strict:
 1542             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1543             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 1544             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 1545             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1546             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1547             
 1548         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 1549         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 1550          specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 1551          derived from 'Strict' SMARTS patterns for its usage in this script.
 1552         
 1553         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 1554         choosing 'Specify' value for 'rotBondsSMARTSMode' parameter and providing
 1555         its value via '--rotBondsSMARTSPattern'.
 1556         
 1557         rotBondsSMARTSPattern: SMARTS pattern for identifying rotatable bonds for
 1558         constraining torsions during Psi4 minimization. This option is only valid for
 1559         'Specify' value of 'rotBondsSMARTSMode' parameter.
 1560     --psi4DDXSolvation <yes or no>  [default: no]
 1561         Perform minimization in solution using domain-decomposition-based
 1562         continuum solvation models [ Ref 160-161 ]. The script relies on Psi4
 1563         interface to the DDX module to perform these calculations. The DDX library
 1564         provides a linear-scaling implementation of standard continuum solvation
 1565         models using a domain- decomposition ansatz. Two solvation models are
 1566         supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and
 1567         Polarizable Continuum Model (PCM) [ Ref 164-165 ].
 1568         
 1569         The solvation energy is included in the value of the total energy written to 
 1570         the output SD file. In addition, the value of solvation energy is written to the
 1571         output file under its own data field label.
 1572         
 1573         Psi4 relies on Python module PYDDX to calculate solvation energy. It must be
 1574         present in your environment.
 1575     --psi4DDXSolvationParams <Name,Value,...>  [default: auto]
 1576         A space delimited list of parameter name and value  pairs for calculating
 1577         solvation energy using the DDX module. The supported parameter names,
 1578         along with their default values, are shown below:
 1579             
 1580             solvationModel PCM   [ Possible  values: COSMO or PCM]
 1581             solvent water   [ Possible  values: A valid solvent name]
 1582             solventEpsilon None
 1583             radiiSet UFF   [ Possible  values: Bondi or UFF]
 1584             radiiScaling auto   [ Default: 1.2 for Bondi; 1.1 for UFF]
 1585             
 1586             solvationModel: Solvation model for calculating solvation energy.
 1587             The corresponding Psi4 option is DDX_MODEL.
 1588             
 1589             solvent: Solvent to use. The corresponding Ps4 option is
 1590             DDX_SOLVENT
 1591             
 1592             solventEpsilon: Dielectric constant of the solvent. The
 1593             corresponding Psi4 option is DDX_SOLVENT_EPSILON.
 1594             
 1595             radiiSet: Radius set for cavity spheres. The corresponding Psi4
 1596             option is DDX_RADII_SET.
 1597             
 1598             radiiScaling: Scaling factor for cavity spheres. The default value
 1599             depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding
 1600             Psi4 option is DDX_RADII_SCALING.
 1601             
 1602         These parameter names are automatically mapped to appropriate DDX keywords.
 1603         
 1604         You may specify the solvent either by directly providing a dielectric constant using
 1605         'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter.
 1606         The solvent name must be a valid named supported by the DDX module. For example:
 1607         water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent'
 1608         parameter is ignored for non-zero value of 'solvent' option.
 1609         
 1610         The DDX module contains a variety of addtional options to confgure the calculation
 1611         of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may
 1612         use '--psi4OptionsParams' to modify values for additional DDX options.
 1613     --psi4DDXListSolvents
 1614         List solvent names, along with dielectric values, supported by the DDX module
 1615         for the calculation of solvent energy without performing any calculation.
 1616     -q, --quiet <yes or no>  [default: no]
 1617         Use quiet mode. The warning and information messages will not be printed.
 1618     -r, --reference <text>  [default: auto]
 1619         Reference wave function to use for energy calculation. Default: RHF or UHF.
 1620         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 1621         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 1622         molecules with unpaired electrons.
 1623         
 1624         The specified value must be a valid Psi4 reference wave function. No validation
 1625         is performed. For example: ROHF, CUHF, RKS, etc.
 1626         
 1627         The spin multiplicity determines the default value of reference wave function
 1628         for input molecules. It is calculated from number of free radical electrons using
 1629         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1630         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1631         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1632         over the calculated value of spin multiplicity.
 1633     --recenterAndReorient <yes or no>  [default: auto]
 1634         Recenter and reorient a molecule during creation of a Psi4 molecule from
 1635         a geometry string. Default: 'No' during 'No' value of '--confGeneration';
 1636         Otherwise, 'Yes'.
 1637         
 1638         The 'No' values allows the minimization of a molecule in its initial 3D
 1639         coordinate space in input file or conformer ensemble.
 1640     --symmetrize <yes or no>  [default: auto]
 1641         Symmetrize molecules before energy minimization. Default: 'Yes' during
 1642         'Yes' value of '--recenterAndReorient'; Otherwise, 'No'. The psi4 function,
 1643         psi4mol.symmetrize( SymmetrizeTolerance), is called to symmetrize
 1644         the molecule before calling psi4.optimize().
 1645         
 1646         The 'No' value of '--symmetrize' during 'Yes' value of '--recenterAndReorient'
 1647         may cause psi4.optimize() to fail with a 'Point group changed...' error
 1648         message.
 1649     --symmetrizeTolerance <number>  [default: 0.01]
 1650         Symmetry tolerance for '--symmetrize'.
 1651     -w, --workingdir <dir>
 1652         Location of working directory which defaults to the current directory.
 1653 
 1654 Examples:
 1655     To generate an initial conformer ensemble of up to 50 conformations using a
 1656     combination of ETKDGv2 distance geometry methodology, applying RMSD cutoff
 1657     of 0.5 and MMFF forcefield minimization, followed by energy minimization
 1658     using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and sulfur containing
 1659     molecules in a SMILES file and  write out a SD file containing minimum energy
 1660     structure corresponding to each molecule, type:
 1661 
 1662         % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1663 
 1664     To run the first example in a quiet mode and write out a SD file, type:
 1665 
 1666         % Psi4PerformMinimization.py -q yes -i Psi4Sample.smi -o
 1667           Psi4SampleOut.sdf
 1668 
 1669     To run the first example along with performing energy mimimization in solution
 1670     using default DDX solvation parameters for water and write out a SD file, type:
 1671 
 1672         % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1673           --psi4DDXSolvation yes
 1674 
 1675     To run the first example in multiprocessing mode at molecules level on all
 1676     available CPUs without loading all data into memory and write out a SD file,
 1677     type:
 1678 
 1679         % Psi4PerformMinimization.py --mp yes -i Psi4Sample.smi -o
 1680           Psi4SampleOut.sdf
 1681 
 1682     To run the first example in multiprocessing mode at conformers level on all
 1683     available CPUs without loading all data into memory and write out a SD file,
 1684     type:
 1685 
 1686         % Psi4PerformMinimization.py --mp yes --mpLevel Conformers
 1687           -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1688 
 1689     To run the first example in multiprocessing mode at molecules level on all
 1690     available CPUs by loading all data into memory and write out a SD file, type:
 1691 
 1692         % Psi4PerformMinimization.py  --mp yes --mpParams "inputDataMode,
 1693           InMemory" -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1694 
 1695     To run the first example in multiprocessing mode at molecules level on specific
 1696     number of CPUs and chunk size without loading all data into memory and write
 1697     out a SD file, type:
 1698 
 1699         % Psi4PerformMinimization.py  --mp yes --mpParams "inputDataMode,Lazy,
 1700           numProcesses,4,chunkSize,8" -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1701 
 1702     To generate an initial conformer ensemble of up to 20 conformations using a
 1703     combination of ETKDGv2 distance geometry methodology and MMFF94s forcefield
 1704     minimization followed by energy minimization for a maxium of 20 iterations
 1705     using B3LYP/6-31+G** molecules in a SMILES file and  write out a SD file
 1706     containing minimum energy structure along with energy in specific units,
 1707     type:
 1708 
 1709         % Psi4PerformMinimization.py --confGeneration yes --confParams
 1710           "confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
 1711           maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
 1712           -b "6-31+G**" --maxIters 20 -i Psi4Sample.smi -o Psi4SampleOut.sdf
 1713 
 1714     To minimize molecules in a 3D files using B3LYP/6-31G** and B3LYP/6-31+G**
 1715     for non-sulfur and sulfur containing molecules for a maximum of 25 iterations
 1716     without generating any conformations and write out a SD file containing 
 1717     minimum energy structures corresponding to each molecule, type:
 1718 
 1719         % Psi4PerformMinimization.py --confGeneration no --maxIters 25
 1720           -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1721 
 1722     To run the first example for molecules in a CSV SMILES file, SMILES strings
 1723     in column 1, name column 2, and write out a SD file, type:
 1724 
 1725         % Psi4PerformMinimization.py --infileParams "smilesDelimiter,comma,
 1726           smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
 1727           -i Psi4Sample.csv -o Psi4SampleOut.sdf
 1728 
 1729 Author:
 1730 
 1731     Manish Sud(msud@san.rr.com)
 1732 
 1733 Contributor:
 1734     Francois Berenger
 1735 
 1736 See also:
 1737     Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py
 1738 
 1739 Copyright:
 1740     Copyright (C) 2025 Manish Sud. All rights reserved.
 1741 
 1742     The functionality available in this script is implemented using Psi4, an
 1743     open source quantum chemistry software package, and RDKit, an open
 1744     source toolkit for cheminformatics developed by Greg Landrum.
 1745 
 1746     This file is part of MayaChemTools.
 1747 
 1748     MayaChemTools is free software; you can redistribute it and/or modify it under
 1749     the terms of the GNU Lesser General Public License as published by the Free
 1750     Software Foundation; either version 3 of the License, or (at your option) any
 1751     later version.
 1752 
 1753 """
 1754 
 1755 if __name__ == "__main__":
 1756     main()