MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4PerformConstrainedMinimization.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2024 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using Psi4, an
    9 # open source quantum chemistry software package, and RDKit, an open
   10 # source toolkit for cheminformatics developed by Greg Landrum.
   11 #
   12 # This file is part of MayaChemTools.
   13 #
   14 # MayaChemTools is free software; you can redistribute it and/or modify it under
   15 # the terms of the GNU Lesser General Public License as published by the Free
   16 # Software Foundation; either version 3 of the License, or (at your option) any
   17 # later version.
   18 #
   19 # MayaChemTools is distributed in the hope that it will be useful, but without
   20 # any warranty; without even the implied warranty of merchantability of fitness
   21 # for a particular purpose.  See the GNU Lesser General Public License for more
   22 # details.
   23 #
   24 # You should have received a copy of the GNU Lesser General Public License
   25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   27 # Boston, MA, 02111-1307, USA.
   28 #
   29 
   30 from __future__ import print_function
   31 
   32 # Add local python path to the global path and import standard library modules...
   33 import os
   34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   35 import time
   36 import re
   37 import shutil
   38 import multiprocessing as mp
   39 
   40 # Psi4 imports...
   41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None):
   42     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   43     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   44     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   45     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   46     sys.stderr.write("Psi4 environment and try again.\n\n")
   47 
   48 # RDKit imports...
   49 try:
   50     from rdkit import rdBase
   51     from rdkit import Chem
   52     from rdkit.Chem import AllChem, rdMolAlign
   53     from rdkit.Chem import rdFMCS
   54 except ImportError as ErrMsg:
   55     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   56     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   57     sys.exit(1)
   58 
   59 # MayaChemTools imports...
   60 try:
   61     from docopt import docopt
   62     import MiscUtil
   63     import Psi4Util
   64     import RDKitUtil
   65 except ImportError as ErrMsg:
   66     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   67     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   68     sys.exit(1)
   69 
   70 ScriptName = os.path.basename(sys.argv[0])
   71 Options = {}
   72 OptionsInfo = {}
   73 
   74 def main():
   75     """Start execution of the script."""
   76     
   77     MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   78     
   79     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   80     
   81     # Retrieve command line arguments and options...
   82     RetrieveOptions()
   83     
   84     # Process and validate command line arguments and options...
   85     ProcessOptions()
   86     
   87     # Perform actions required by the script...
   88     PerformConstrainedMinimization()
   89     
   90     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   91     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   92 
   93 def PerformConstrainedMinimization():
   94     """Perform constrained minimization."""
   95     
   96     # Read and validate reference molecule...
   97     RefMol = RetrieveReferenceMolecule()
   98     
   99     # Setup a molecule reader for input file...
  100     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  101     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  102     
  103     # Set up a molecule writer...
  104     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  105     if Writer is None:
  106         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  107     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  108 
  109     MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(RefMol, Mols, Writer)
  110 
  111     if Writer is not None:
  112         Writer.close()
  113     
  114     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  115     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  116     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
  117     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
  118     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  119     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount + WriteFailedCount))
  120 
  121 def ProcessMolecules(RefMol, Mols, Writer):
  122     """Process and minimize molecules."""
  123     
  124     if OptionsInfo["MPMode"]:
  125         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
  126     else:
  127         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
  128 
  129 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
  130     """Process and minimize molecules using a single process."""
  131 
  132     # Intialize Psi4...
  133     MiscUtil.PrintInfo("\nInitializing Psi4...")
  134     Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True)
  135     OptionsInfo["psi4"] = Psi4Handle
  136 
  137     # Setup max iterations global variable...
  138     Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  139     
  140     # Setup conversion factor for energy units...
  141     SetupEnergyConversionFactor(Psi4Handle)
  142     
  143     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  144     
  145     for Mol in Mols:
  146         MolCount += 1
  147 
  148         if not CheckAndValidateMolecule(Mol, MolCount):
  149             continue
  150 
  151         # Setup 2D coordinates for SMILES input file...
  152         if OptionsInfo["SMILESInfileStatus"]:
  153             AllChem.Compute2DCoords(Mol)
  154         
  155         ValidMolCount += 1
  156         
  157         # Setup a reference molecule core containing common scaffold atoms...
  158         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  159         if RefMolCore is None:
  160             CoreScaffoldMissingCount += 1
  161             continue
  162         
  163         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolCount)
  164         
  165         if not CalcStatus:
  166             MinimizationFailedCount += 1
  167             continue
  168         
  169         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  170         
  171         if not WriteStatus:
  172             WriteFailedCount += 1
  173         
  174     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  175 
  176 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
  177     """Process and minimize molecules using multiprocessing."""
  178     
  179     if OptionsInfo["MPLevelConformersMode"]:
  180         return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer)
  181     elif OptionsInfo["MPLevelMoleculesMode"]:
  182         return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer)
  183     else:
  184         MiscUtil.PrintError("The value, %s,  option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"]))
  185         
  186 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer):
  187     """Process and minimize molecules using multiprocessing at molecules level."""
  188 
  189     MiscUtil.PrintInfo("\nPerforming constrained minimization with generation of conformers using multiprocessing at molecules level...")
  190     
  191     MPParams = OptionsInfo["MPParams"]
  192 
  193     # Setup data for initializing a worker process...
  194     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
  195     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  196     
  197     # Setup a encoded mols data iterable for a worker process...
  198     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  199     
  200     # Setup process pool along with data initialization for each process...
  201     if not OptionsInfo["QuietMode"]:
  202         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  203         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  204     
  205     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  206     
  207     # Start processing...
  208     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  209         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  210     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  211         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  212     else:
  213         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  214     
  215     # Print out Psi4 version in the main process...
  216     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  217     Psi4Handle  = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False)
  218     OptionsInfo["psi4"] = Psi4Handle
  219 
  220     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  221 
  222     for Result in Results:
  223         MolCount += 1
  224         MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD  = Result
  225         
  226         if EncodedMol is None:
  227             continue
  228         ValidMolCount += 1
  229 
  230         if CoreScaffoldMissingStatus:
  231             CoreScaffoldMissingStatus += 1
  232             continue
  233         
  234         if not CalcStatus:
  235             MinimizationFailedCount += 1
  236             continue
  237             
  238         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  239         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  240         
  241         if not WriteStatus:
  242             WriteFailedCount += 1
  243     
  244     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  245     
  246 def InitializeWorkerProcess(*EncodedArgs):
  247     """Initialize data for a worker process."""
  248     
  249     global Options, OptionsInfo
  250 
  251     if not OptionsInfo["QuietMode"]:
  252         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  253     
  254     # Decode Options and OptionInfo...
  255     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  256     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  257     
  258     # Decode RefMol...
  259     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
  260     
  261     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  262     # output files for each process...
  263     OptionsInfo["Psi4Initialized"]  = False
  264 
  265 def WorkerProcess(EncodedMolInfo):
  266     """Process data for a worker process."""
  267 
  268     if not OptionsInfo["Psi4Initialized"]:
  269         InitializePsi4ForWorkerProcess()
  270     
  271     MolIndex, EncodedMol = EncodedMolInfo
  272 
  273     MolNum = MolIndex + 1
  274     CoreScaffoldMissingStatus = False
  275     CalcStatus = False
  276     Energy = None
  277     ScaffoldEmbedRMSD = None
  278 
  279     if EncodedMol is None:
  280         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  281 
  282     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  283     if not CheckAndValidateMolecule(Mol, MolNum):
  284         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  285     
  286     # Setup 2D coordinates for SMILES input file...
  287     if OptionsInfo["SMILESInfileStatus"]:
  288         AllChem.Compute2DCoords(Mol)
  289         
  290     # Setup a reference molecule core containing common scaffold atoms...
  291     RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum)
  292     if RefMolCore is None:
  293         CoreScaffoldMissingStatus = True
  294         return [MolIndex, EncodedMol, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
  295     
  296     Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(OptionsInfo["psi4"], Mol, RefMolCore, MolNum)
  297     
  298     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  299 
  300 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer):
  301     """Process and minimize molecules using multiprocessing at conformers level."""
  302 
  303     MiscUtil.PrintInfo("\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level...")
  304     
  305     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  306     
  307     for Mol in Mols:
  308         MolCount += 1
  309 
  310         if not CheckAndValidateMolecule(Mol, MolCount):
  311             continue
  312 
  313         # Setup 2D coordinates for SMILES input file...
  314         if OptionsInfo["SMILESInfileStatus"]:
  315             AllChem.Compute2DCoords(Mol)
  316         
  317         ValidMolCount += 1
  318         
  319         # Setup a reference molecule core containing common scaffold atoms...
  320         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  321         if RefMolCore is None:
  322             CoreScaffoldMissingCount += 1
  323             continue
  324 
  325         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount)
  326         
  327         if not CalcStatus:
  328             MinimizationFailedCount += 1
  329             continue
  330         
  331         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  332         
  333         if not WriteStatus:
  334             WriteFailedCount += 1
  335         
  336     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  337 
  338 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum):
  339     """Generate conformers and minimize them using multiple processes. """
  340     
  341     # Add hydrogens...
  342     Mol = Chem.AddHs(Mol, addCoords = True)
  343 
  344     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  345     
  346     # Setup constrained conformers...
  347     MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  348     if not MolConfsStatus:
  349         if not OptionsInfo["QuietMode"]:
  350             MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName)
  351         return (Mol, False, None, None)
  352     
  353     MPParams = OptionsInfo["MPParams"]
  354 
  355     # Setup data for initializing a worker process...
  356     OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore)
  357     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  358 
  359     # Setup a encoded mols data iterable for a worker process...
  360     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(MolConfs)
  361     
  362     # Setup process pool along with data initialization for each process...
  363     if not OptionsInfo["QuietMode"]:
  364         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  365         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  366     
  367     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
  368     
  369     # Start processing...
  370     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  371         Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  372     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  373         Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  374     else:
  375         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  376 
  377     ConfNums = []
  378     MolConfsMap = {}
  379     CalcEnergyMap = {}
  380     CalcFailedCount = 0
  381     
  382     for Result in Results:
  383         ConfNum, EncodedMolConf, CalcStatus, Energy = Result
  384 
  385         if EncodedMolConf is None:
  386             CalcFailedCount += 1
  387             continue
  388         
  389         if not CalcStatus:
  390             CalcFailedCount += 1
  391             continue
  392         
  393         MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  394         
  395         ConfNums.append(ConfNum)
  396         CalcEnergyMap[ConfNum] = Energy
  397         MolConfsMap[ConfNum] = MolConf
  398     
  399     if CalcFailedCount:
  400         return (Mol, False, None, None)
  401     
  402     SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum])
  403     MinEnergyConfNum = SortedConfNums[0]
  404     
  405     MinEnergy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum])  if OptionsInfo["EnergyOut"] else None
  406     MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
  407     
  408     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None
  409     MinEnergyMolConf.ClearProp('EmbedRMS')
  410 
  411     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
  412 
  413 def InitializeConformerWorkerProcess(*EncodedArgs):
  414     """Initialize data for a conformer worker process."""
  415     
  416     global Options, OptionsInfo
  417 
  418     if not OptionsInfo["QuietMode"]:
  419         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  420     
  421     # Decode Options and OptionInfo...
  422     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  423     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  424     
  425     # Decode RefMol...
  426     OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"])
  427     
  428     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  429     # output files for each process...
  430     OptionsInfo["Psi4Initialized"]  = False
  431 
  432 def ConformerWorkerProcess(EncodedMolInfo):
  433     """Process data for a conformer worker process."""
  434 
  435     if not OptionsInfo["Psi4Initialized"]:
  436         InitializePsi4ForWorkerProcess()
  437     
  438     MolConfIndex, EncodedMolConf = EncodedMolInfo
  439 
  440     MolConfNum = MolConfIndex
  441     CalcStatus = False
  442     Energy = None
  443     
  444     if EncodedMolConf is None:
  445         return [MolConfIndex, None, CalcStatus, Energy]
  446     
  447     MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  448     MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum)
  449 
  450     if not OptionsInfo["QuietMode"]:
  451         MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum))
  452     
  453     # Perform Psi4 constrained minimization...
  454     CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum)
  455     if not CalcStatus:
  456         if not OptionsInfo["QuietMode"]:
  457             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  458         return [MolConfIndex, EncodedMolConf, CalcStatus, Energy]
  459 
  460     return [MolConfIndex, RDKitUtil.MolToBase64EncodedMolString(MolConf, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, Energy]
  461     
  462 def InitializePsi4ForWorkerProcess():
  463     """Initialize Psi4 for a worker process."""
  464     
  465     if OptionsInfo["Psi4Initialized"]:
  466         return
  467 
  468     OptionsInfo["Psi4Initialized"] = True
  469 
  470     if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I):
  471         # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
  472         OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
  473     else:
  474         # Update output file...
  475         OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid())
  476             
  477     # Intialize Psi4...
  478     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True)
  479     
  480     # Setup max iterations global variable...
  481     Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]})
  482     
  483     # Setup conversion factor for energy units...
  484     SetupEnergyConversionFactor(OptionsInfo["psi4"])
  485 
  486 def RetrieveReferenceMolecule():
  487     """Retrieve and validate reference molecule."""
  488     
  489     RefFile = OptionsInfo["RefFile"]
  490     
  491     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
  492     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
  493     ValidRefMols, RefMolCount, ValidRefMolCount  = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"])
  494     
  495     if ValidRefMolCount == 0:
  496         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
  497     elif ValidRefMolCount > 1:
  498         MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount))
  499     
  500     RefMol = ValidRefMols[0]
  501 
  502     if OptionsInfo["UseScaffoldSMARTS"]:
  503         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  504         if ScaffoldPatternMol is None:
  505             MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"]))
  506         
  507         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
  508             MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"]))
  509             
  510     return RefMol
  511 
  512 def SetupCoreScaffold(RefMol, Mol, MolCount):
  513     """Setup a reference molecule core containing common scaffold atoms between
  514     a pair of molecules."""
  515 
  516     if OptionsInfo["UseScaffoldMCS"]:
  517         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
  518     elif OptionsInfo["UseScaffoldSMARTS"]:
  519         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
  520     else:
  521         MiscUtil.PrintError("The  value, %s, specified for  \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"]))
  522         
  523 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
  524     """Setup a reference molecule core containing common scaffold atoms between
  525     a pair of molecules using MCS."""
  526     
  527     MCSParams = OptionsInfo["MCSParams"]
  528     Mols = [RefMol, Mol]
  529 
  530     MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 
  531     
  532     if MCSResultObject.canceled:
  533         if not OptionsInfo["QuietMode"]:
  534             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
  535         return None
  536     
  537     CoreNumAtoms = MCSResultObject.numAtoms
  538     CoreNumBonds = MCSResultObject.numBonds
  539     
  540     SMARTSCore = MCSResultObject.smartsString
  541     
  542     if not len(SMARTSCore):
  543         if not OptionsInfo["QuietMode"]:
  544             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
  545         return None
  546         
  547     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
  548         if not OptionsInfo["QuietMode"]:
  549             MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in  \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"]))
  550         return None
  551     
  552     if CoreNumBonds < MCSParams["MinNumBonds"]:
  553         if not OptionsInfo["QuietMode"]:
  554             MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in  \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"]))
  555         return None
  556 
  557     return GenerateCoreMol(RefMol, SMARTSCore)
  558     
  559 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
  560     """Setup a reference molecule core containing common scaffold atoms between
  561     a pair of molecules using specified SMARTS."""
  562     
  563     if OptionsInfo["ScaffoldPatternMol"] is None:
  564         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  565         
  566     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
  567         if not OptionsInfo["QuietMode"]:
  568             MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule,  %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount)))
  569         return None
  570 
  571     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
  572 
  573 def GenerateCoreMol(RefMol, SMARTSCore):
  574     """Generate core molecule for embedding. """
  575 
  576     # Create a molecule corresponding to core atoms...
  577     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
  578 
  579     # Setup a ref molecule containing core atoms with dummy atoms as
  580     # attachment points for atoms around the core atoms...
  581     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
  582 
  583     # Delete any substructures containing dummy atoms..
  584     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*'))
  585     RefMolCore.UpdatePropertyCache()
  586     
  587     return RefMolCore
  588 
  589 def ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolNum = None):
  590     """Constrain and minimize molecule."""
  591 
  592     # Add hydrogens...
  593     Mol = Chem.AddHs(Mol, addCoords = True)
  594 
  595     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  596     
  597     # Setup constrained conformers...
  598     MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  599     if not MolConfsStatus:
  600         if not OptionsInfo["QuietMode"]:
  601             MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName)
  602         return (Mol, False, None, None)
  603 
  604     # Minimize conformers...
  605     ConfNums = []
  606     CalcEnergyMap = {}
  607     MolConfsMap = {}
  608     
  609     for ConfNum, MolConf in enumerate(MolConfs):
  610         if not OptionsInfo["QuietMode"]:
  611             MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, ConfNum))
  612             
  613         CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum)
  614         if not CalcStatus:
  615             if not OptionsInfo["QuietMode"]:
  616                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  617             return (Mol, False, None, None)
  618         
  619         ConfNums.append(ConfNum)
  620         CalcEnergyMap[ConfNum] = Energy
  621         MolConfsMap[ConfNum] = MolConf
  622     
  623     SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum])
  624     MinEnergyConfNum = SortedConfNums[0]
  625     
  626     MinEnergy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum])  if OptionsInfo["EnergyOut"] else None
  627     MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
  628     
  629     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None
  630     MinEnergyMolConf.ClearProp('EmbedRMS')
  631     
  632     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
  633 
  634 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID = -1):
  635     """Minimize molecule using Psi4."""
  636 
  637     # Setup a list for constrained atoms...
  638     ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore)
  639     if len(ConstrainedAtomIndices) == 0:
  640         return (False, None)
  641 
  642     # Setup a Psi4Mol...
  643     Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
  644     if Psi4Mol is None:
  645         return (False, None)
  646         
  647     #  Setup reference wave function...
  648     Reference = SetupReferenceWavefunction(Mol)
  649     Psi4Handle.set_options({'Reference': Reference})
  650     
  651     # Setup method name and basis set...
  652     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  653 
  654     # Setup freeze list for constrained atoms...
  655     FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices]
  656     FreezeXYZString = " %s " % " ".join(FreezeXYZList)
  657     Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString})
  658     
  659     # Optimize geometry...
  660     Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"])
  661     
  662     if not Status:
  663         PerformPsi4Cleanup(Psi4Handle)
  664         return (False, None)
  665 
  666     # Update atom positions...
  667     AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True)
  668     RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID)
  669 
  670     # Convert energy units...
  671     if OptionsInfo["ApplyEnergyConversionFactor"]:
  672         Energy = Energy * OptionsInfo["EnergyConversionFactor"]
  673     
  674     # Clean up
  675     PerformPsi4Cleanup(Psi4Handle)
  676     
  677     return (True, Energy)
  678 
  679 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum = None):
  680     """Constrain, embed, and minimize molecule."""
  681 
  682     # Setup forcefield function to use for constrained minimization...
  683     ForceFieldFunction = None
  684     ForceFieldName = None
  685     if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
  686         ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId)
  687         ForeceFieldName = "UFF"
  688     else:
  689         ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) , confId = confId)
  690         ForeceFieldName = "MMFF"
  691 
  692     if ForceFieldFunction is None:
  693         if not OptionsInfo["QuietMode"]:
  694             MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum)))
  695         return (None, False)
  696     
  697     MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
  698     EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
  699     UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
  700     ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
  701     UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
  702     UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"]
  703 
  704     MolConfs = []
  705     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
  706     
  707     for ConfID in ConfIDs:
  708         try:
  709             MolConf = Chem.Mol(Mol)
  710             AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion)
  711         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException)  as ErrMsg:
  712             if not OptionsInfo["QuietMode"]:
  713                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
  714                 MiscUtil.PrintWarning("Constrained embedding couldn't  be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg))
  715             return (None, False)
  716         
  717         MolConfs.append(MolConf)
  718 
  719     return FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum)
  720 
  721 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum = None):
  722     """Filter contarained conformations by RMSD."""
  723     
  724     EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
  725     if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0:
  726         if not OptionsInfo["QuietMode"]:
  727             MiscUtil.PrintInfo("\nGenerating initial ensemble of  constrained conformations by distance geometry  and forcefield for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs)))
  728         return (MolConfs, True)
  729 
  730     FirstMolConf = True
  731     SelectedMolConfs = []
  732     for MolConfIndex, MolConf in enumerate(MolConfs):
  733         if FirstMolConf:
  734             FirstMolConf = False
  735             SelectedMolConfs.append(MolConf)
  736             continue
  737         
  738         # Compare RMSD against all selected conformers...
  739         ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf))
  740         IgnoreConf = False
  741         for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs):
  742             RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf))
  743             CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
  744             
  745             if CalcRMSD < EmbedRMSDCutoff:
  746                 IgnoreConf = True
  747                 break
  748         
  749         if IgnoreConf:
  750             continue
  751         
  752         SelectedMolConfs.append(MolConf)
  753         
  754     if not OptionsInfo["QuietMode"]:
  755         MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs)))
  756     
  757     return (SelectedMolConfs, True)
  758 
  759 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens = False):
  760     """Setup a list of atom indices to be constrained during Psi4 minimizaiton."""
  761 
  762     AtomIndices = []
  763     
  764     # Collect matched heavy atoms along with attached hydrogens...
  765     for AtomIndex in Mol.GetSubstructMatch(RefMolCore):
  766         Atom = Mol.GetAtomWithIdx(AtomIndex)
  767         if Atom.GetAtomicNum() == 1:
  768             continue
  769         
  770         AtomIndices.append(AtomIndex)
  771         for AtomNbr in Atom.GetNeighbors():
  772             if AtomNbr.GetAtomicNum() == 1:
  773                 if ConstrainHydrogens:
  774                     AtomNbrIndex = AtomNbr.GetIdx()
  775                     AtomIndices.append(AtomNbrIndex)
  776     
  777     AtomIndices = sorted(AtomIndices)
  778 
  779     # Atom indices start from 1 for Psi4 instead 0 for RDKit...
  780     AtomIndices = [ AtomIndex + 1 for AtomIndex in AtomIndices]
  781     
  782     return AtomIndices
  783     
  784 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1):
  785     """Setup a Psi4 molecule object."""
  786 
  787     # Turn off recentering and reorientation to perform optimization in the
  788     # constrained coordinate space...
  789     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True)
  790 
  791     try:
  792         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  793     except Exception as ErrMsg:
  794         Psi4Mol = None
  795         if not OptionsInfo["QuietMode"]:
  796             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  797             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  798             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  799 
  800     return Psi4Mol
  801 
  802 def PerformPsi4Cleanup(Psi4Handle):
  803     """Perform clean up."""
  804 
  805     # Clean up after Psi4 run...
  806     Psi4Handle.core.clean()
  807     
  808     # Clean up any leftover scratch files...
  809     if OptionsInfo["MPMode"]:
  810         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  811 
  812 def CheckAndValidateMolecule(Mol, MolCount = None):
  813     """Validate molecule for Psi4 calculations."""
  814     
  815     if Mol is None:
  816         if not OptionsInfo["QuietMode"]:
  817             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
  818         return False
  819     
  820     MolName = RDKitUtil.GetMolName(Mol, MolCount)
  821     if not OptionsInfo["QuietMode"]:
  822         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
  823     
  824     if RDKitUtil.IsMolEmpty(Mol):
  825         if not OptionsInfo["QuietMode"]:
  826             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
  827         return False
  828     
  829     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
  830         if not OptionsInfo["QuietMode"]:
  831             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
  832         return False
  833     
  834     return True
  835 
  836 def SetupMethodNameAndBasisSet(Mol):
  837     """Setup method name and basis set."""
  838     
  839     MethodName = OptionsInfo["MethodName"]
  840     if OptionsInfo["MethodNameAuto"]:
  841         MethodName = "B3LYP"
  842     
  843     BasisSet = OptionsInfo["BasisSet"]
  844     if OptionsInfo["BasisSetAuto"]:
  845         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
  846     
  847     return (MethodName, BasisSet)
  848 
  849 def SetupReferenceWavefunction(Mol):
  850     """Setup reference wavefunction."""
  851     
  852     Reference = OptionsInfo["Reference"]
  853     if OptionsInfo["ReferenceAuto"]:
  854         Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF'
  855     
  856     return Reference
  857 
  858 def SetupEnergyConversionFactor(Psi4Handle):
  859     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
  860     
  861     EnergyUnits = OptionsInfo["EnergyUnits"]
  862     
  863     ApplyConversionFactor = True
  864     if re.match("^kcal\/mol$", EnergyUnits, re.I):
  865         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
  866     elif re.match("^kJ\/mol$", EnergyUnits, re.I):
  867         ConversionFactor = Psi4Handle.constants.hartree2kJmol
  868     elif re.match("^eV$", EnergyUnits, re.I):
  869         ConversionFactor = Psi4Handle.constants.hartree2ev
  870     else:
  871         ApplyConversionFactor = False
  872         ConversionFactor = 1.0
  873     
  874     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
  875     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
  876 
  877 def WriteMolecule(Writer, Mol, MolNum = None, Energy = None, ScaffoldEmbedRMSD = None, ConfID = None,):
  878     """Write molecule."""
  879 
  880     if ScaffoldEmbedRMSD is not None:
  881         Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
  882             
  883     if OptionsInfo["EnergyOut"]  and Energy is not None:
  884         Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
  885             
  886     try:
  887         if ConfID is None:
  888             Writer.write(Mol)
  889         else:
  890             Writer.write(Mol, confId = ConfID)
  891     except (ValueError, RuntimeError) as ErrMsg:
  892         if not OptionsInfo["QuietMode"]:
  893             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  894             MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
  895         return False
  896 
  897     return True
  898 
  899 def ProcessMCSParameters():
  900     """Set up and process MCS parameters."""
  901 
  902     SetupMCSParameters()
  903     ProcessSpecifiedMCSParameters()
  904 
  905 def SetupMCSParameters():
  906     """Set up default MCS parameters."""
  907     
  908     OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0}
  909     
  910 def ProcessSpecifiedMCSParameters():
  911     """Process specified MCS parameters."""
  912 
  913     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
  914         # Nothing to process...
  915         return
  916     
  917     # Parse specified parameters...
  918     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
  919     if not MCSParams:
  920         MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.")
  921 
  922     MCSParamsWords = MCSParams.split(",")
  923     if len(MCSParamsWords) % 2:
  924         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords)))
  925     
  926     # Setup  canonical parameter names...
  927     ValidParamNames = []
  928     CanonicalParamNamesMap = {}
  929     for ParamName in sorted(OptionsInfo["MCSParams"]):
  930         ValidParamNames.append(ParamName)
  931         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  932 
  933     # Validate and set paramater names and value...
  934     for Index in range(0, len(MCSParamsWords), 2):
  935         Name = MCSParamsWords[Index]
  936         Value = MCSParamsWords[Index + 1]
  937 
  938         CanonicalName = Name.lower()
  939         if  not CanonicalName in CanonicalParamNamesMap:
  940             MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name,  " ".join(ValidParamNames)))
  941 
  942         ParamName = CanonicalParamNamesMap[CanonicalName]
  943         if re.match("^Threshold$", ParamName, re.I):
  944             Value = float(Value)
  945             if Value <= 0.0 or Value > 1.0 :
  946                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name))
  947             ParamValue = Value
  948         elif re.match("^Timeout$", ParamName, re.I):
  949             Value = int(Value)
  950             if Value <= 0:
  951                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name))
  952             ParamValue = Value
  953         elif re.match("^MinNumAtoms$", ParamName, re.I):
  954             Value = int(Value)
  955             if Value < 1:
  956                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name))
  957             ParamValue = Value
  958         elif re.match("^MinNumBonds$", ParamName, re.I):
  959             Value = int(Value)
  960             if Value < 0:
  961                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name))
  962             ParamValue = Value
  963         elif re.match("^AtomCompare$", ParamName, re.I):
  964             if re.match("^CompareAny$", Value, re.I):
  965                 ParamValue = rdFMCS.AtomCompare.CompareAny
  966             elif re.match("^CompareElements$", Value, re.I):
  967                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
  968             elif re.match("^CompareIsotopes$", Value, re.I):
  969                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
  970             else:
  971                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name))
  972         elif re.match("^BondCompare$", ParamName, re.I):
  973             if re.match("^CompareAny$", Value, re.I):
  974                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
  975             elif re.match("^CompareOrder$", Value, re.I):
  976                 ParamValue = rdFMCS.BondCompare.CompareOrder
  977             elif re.match("^CompareOrderExact$", Value, re.I):
  978                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
  979             else:
  980                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name))
  981         elif re.match("^SeedSMARTS$", ParamName, re.I):
  982             if not len(Value):
  983                 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option  for parameter, %s, is empty. " % (Name))
  984             ParamValue = Value
  985         else:
  986             if not re.match("^(Yes|No|True|False)$", Value, re.I):
  987                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name))
  988             ParamValue = False
  989             if re.match("^(Yes|True)$", Value, re.I):
  990                 ParamValue = True
  991         
  992         # Set value...
  993         OptionsInfo["MCSParams"][ParamName] = ParamValue
  994 
  995 def ProcessOptions():
  996     """Process and validate command line arguments and options."""
  997     
  998     MiscUtil.PrintInfo("Processing options...")
  999     
 1000     # Validate options...
 1001     ValidateOptions()
 1002 
 1003     OptionsInfo["Infile"] = Options["--infile"]
 1004     OptionsInfo["SMILESInfileStatus"] = True if  MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
 1005     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1006     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 1007 
 1008     OptionsInfo["RefFile"] = Options["--reffile"]
 1009 
 1010     OptionsInfo["Outfile"] = Options["--outfile"]
 1011     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 1012     
 1013     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1014     
 1015     OptionsInfo["Scaffold"] = Options["--scaffold"]
 1016     if re.match("^auto$", Options["--scaffold"], re.I):
 1017         UseScaffoldMCS = True
 1018         UseScaffoldSMARTS = False
 1019         ScaffoldSMARTS = None
 1020     else:
 1021         UseScaffoldMCS = False
 1022         UseScaffoldSMARTS = True
 1023         ScaffoldSMARTS = OptionsInfo["Scaffold"]
 1024     
 1025     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
 1026     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
 1027     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
 1028     OptionsInfo["ScaffoldPatternMol"] = None
 1029 
 1030     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
 1031     ProcessMCSParameters()
 1032     
 1033     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
 1034     
 1035     # Method, basis set, and reference wavefunction...
 1036     OptionsInfo["BasisSet"] = Options["--basisSet"]
 1037     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
 1038     
 1039     OptionsInfo["MethodName"] = Options["--methodName"]
 1040     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
 1041     
 1042     OptionsInfo["Reference"] = Options["--reference"]
 1043     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
 1044     
 1045     # Run and options parameters...
 1046     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"])
 1047     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"])
 1048 
 1049     # Conformer generation paramaters...
 1050     ParamsDefaultInfoOverride = {"MaxConfs": 50}
 1051     OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride)
 1052     
 1053     # Write energy parameters...
 1054     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 1055     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
 1056     
 1057     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
 1058     if re.match("^auto$", EnergyDataFieldLabel, re.I):
 1059         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
 1060     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
 1061     
 1062     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
 1063 
 1064     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1065     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1066 
 1067     # Multiprocessing level...
 1068     MPLevelMoleculesMode = False
 1069     MPLevelConformersMode = False
 1070     MPLevel = Options["--mpLevel"]
 1071     if re.match("^Molecules$", MPLevel, re.I):
 1072         MPLevelMoleculesMode = True
 1073     elif re.match("^Conformers$", MPLevel, re.I):
 1074         MPLevelConformersMode = True
 1075     else:
 1076         MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel)
 1077     OptionsInfo["MPLevel"] = MPLevel
 1078     OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
 1079     OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
 1080     
 1081     OptionsInfo["Precision"] = int(Options["--precision"])
 1082     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 1083 
 1084 def RetrieveOptions():
 1085     """Retrieve command line arguments and options."""
 1086     
 1087     # Get options...
 1088     global Options
 1089     Options = docopt(_docoptUsage_)
 1090 
 1091     # Set current working directory to the specified directory...
 1092     WorkingDir = Options["--workingdir"]
 1093     if WorkingDir:
 1094         os.chdir(WorkingDir)
 1095     
 1096     # Handle examples option...
 1097     if "--examples" in Options and Options["--examples"]:
 1098         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1099         sys.exit(0)
 1100 
 1101 def ValidateOptions():
 1102     """Validate option values."""
 1103 
 1104     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 1105     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 1106     
 1107     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1108     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 1109     
 1110     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
 1111     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
 1112     
 1113     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 1114     
 1115     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1116     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1117     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1118     
 1119     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1120     MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
 1121     
 1122     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1123     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 1124     
 1125     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
 1126     
 1127 # Setup a usage string for docopt...
 1128 _docoptUsage_ = """
 1129 Psi4PerformConstrainedMinimization.py - Perform constrained minimization
 1130 
 1131 Usage:
 1132     Psi4PerformConstrainedMinimization.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>]
 1133                                           [--energyDataFieldLabel <text>] [--energyUnits <text>] [--infileParams <Name,Value,...>]
 1134                                           [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>]
 1135                                           [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
 1136                                           [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
 1137                                           [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 1138                                           [--quiet <yes or no>]  [--reference <text>] [--scaffold <auto or SMARTS>]
 1139                                           [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
 1140     Psi4PerformConstrainedMinimization.py -h | --help | -e | --examples
 1141 
 1142 Description:
 1143     Generate 3D structures for molecules by performing a constrained energy
 1144     minimization against a reference molecule. The 3D structures for molecules
 1145     are generated using a combination of distance geometry and forcefield
 1146     minimization followed by geometry optimization using a quantum chemistry
 1147     method.
 1148 
 1149     An initial set of 3D conformers are generated for input molecules using
 1150     distance geometry. A common core scaffold, corresponding to a Maximum
 1151     Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
 1152     between a pair of input and reference molecules. The core scaffold atoms in
 1153     input molecules are aligned against the same atoms in the reference molecule.
 1154     The energy of aligned structures are sequentially minimized using the forcefield
 1155     and a quantum chemistry method. The conformation with the lowest energy is
 1156     selected to represent the final structure.
 1157 
 1158     A Psi4 XYZ format geometry string is automatically generated for each molecule
 1159     in input file. It contains atom symbols and 3D coordinates for each atom in a
 1160     molecule. In addition, the formal charge and spin multiplicity are present in the
 1161     the geometry string. These values are either retrieved from molecule properties
 1162     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 1163     molecule.
 1164 
 1165     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 1166     .csv, .tsv, .txt)
 1167 
 1168     The supported output file formats are: SD (.sdf, .sd)
 1169 
 1170 Options:
 1171     -b, --basisSet <text>  [default: auto]
 1172         Basis set to use for constrained energy minimization. Default: 6-31+G**
 1173         for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 1174         value must be a valid Psi4 basis set. No validation is performed.
 1175         
 1176         The following list shows a representative sample of basis sets available
 1177         in Psi4:
 1178             
 1179             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 1180             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 1181             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 1182             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 1183             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 1184             
 1185     --confParams <Name,Value,...>  [default: auto]
 1186         A comma delimited list of parameter name and value pairs for generating
 1187         initial 3D coordinates for molecules in input file. A common core scaffold is
 1188         identified between a pair of input and reference molecules. The atoms in
 1189         common core scaffold of input molecules are aligned against the reference
 1190         molecule followed by constrained energy minimization using forcefield
 1191         available in RDKit. The 3D structures are subsequently constrained and 
 1192         minimized by a quantum chemistry method available in Psi4.
 1193         
 1194         The supported parameter names along with their default values are shown
 1195         below:
 1196             
 1197             confMethod,ETKDGv2,
 1198             forceField,MMFF, forceFieldMMFFVariant,MMFF94,
 1199             enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
 1200             useTethers,yes
 1201             
 1202             confMethod,ETKDGv2   [ Possible values: SDG, KDG, ETDG,
 1203                 ETKDG , or ETKDGv2]
 1204             forceField, MMFF   [ Possible values: UFF or MMFF ]
 1205             forceFieldMMFFVariant,MMFF94   [ Possible values: MMFF94 or MMFF94s ]
 1206             enforceChirality,yes   [ Possible values: yes or no ]
 1207             useTethers,yes   [ Possible values: yes or no ]
 1208             
 1209         confMethod: Conformation generation methodology for generating initial 3D
 1210         coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
 1211         Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
 1212         with Distance Geometry (KDG) and Experimental Torsion-angle preference
 1213         along with basic Knowledge-terms with Distance Geometry (ETKDG or
 1214         ETKDGv2) [Ref 129, 167] .
 1215         
 1216         forceField: Forcefield method to use for constrained energy minimization.
 1217         Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular
 1218         Mechanics Force Field [ Ref 83-87 ] .
 1219         
 1220         enforceChirality: Enforce chirality for defined chiral centers during
 1221         forcefield minimization.
 1222         
 1223         maxConfs: Maximum number of conformations to generate for each molecule
 1224         during the generation of an initial 3D conformation ensemble using conformation
 1225         generation methodology. The conformations are constrained and minimized using
 1226         the specified forcefield and a quantum chemistry method. The lowest energy
 1227         conformation is written to the output file.
 1228         
 1229         embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded
 1230         using distance geometry and forcefield minimization. All embedded conformers
 1231         are kept for 'None' value. Otherwise, only those conformers which are different
 1232         from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first
 1233         embedded conformer is always retained.
 1234         
 1235         useTethers: Use tethers to optimize the final embedded conformation by
 1236         applying a series of extra forces to align matching atoms to the positions of
 1237         the core atoms. Otherwise, use simple distance constraints during the
 1238         optimization.
 1239     --energyOut <yes or no>  [default: yes]
 1240         Write out energy values.
 1241     --energyDataFieldLabel <text>  [default: auto]
 1242         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 1243     --energyUnits <text>  [default: kcal/mol]
 1244         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 1245     -e, --examples
 1246         Print examples.
 1247     -h, --help
 1248         Print this help message.
 1249     -i, --infile <infile>
 1250         Input file name.
 1251     --infileParams <Name,Value,...>  [default: auto]
 1252         A comma delimited list of parameter name and value pairs for reading
 1253         molecules from files. The supported parameter names for different file
 1254         formats, along with their default values, are shown below:
 1255             
 1256             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1257             
 1258             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1259                 smilesTitleLine,auto,sanitize,yes
 1260             
 1261         Possible values for smilesDelimiter: space, comma or tab.
 1262     --maxIters <number>  [default: 50]
 1263         Maximum number of iterations to perform for each molecule or conformer
 1264         during energy minimization by a quantum chemistry method.
 1265     -m, --methodName <text>  [default: auto]
 1266         Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ].
 1267         The specified value must be a valid Psi4 method name. No validation is
 1268         performed.
 1269         
 1270         The following list shows a representative sample of methods available
 1271         in Psi4:
 1272             
 1273             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1274             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1275             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1276             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1277             
 1278     --mcsParams <Name,Value,...>  [default: auto]
 1279         Parameter values to use for identifying a maximum common substructure
 1280         (MCS) in between a pair of reference and input molecules.In general, it is a
 1281         comma delimited list of parameter name and value pairs. The supported
 1282         parameter names along with their default values are shown below:
 1283             
 1284             atomCompare,CompareElements,bondCompare,CompareOrder,
 1285             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
 1286             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
 1287             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
 1288             
 1289         Possible values for atomCompare: CompareAny, CompareElements,
 1290         CompareIsotopes. Possible values for bondCompare: CompareAny,
 1291         CompareOrder, CompareOrderExact.
 1292         
 1293         A brief description of MCS parameters taken from RDKit documentation is
 1294         as follows:
 1295             
 1296             atomCompare - Controls match between two atoms
 1297             bondCompare - Controls match between two bonds
 1298             maximizeBonds - Maximize number of bonds instead of atoms
 1299             matchValences - Include atom valences in the MCS match
 1300             matchChiralTag - Include atom chirality in the MCS match
 1301             minNumAtoms - Minimum number of atoms in the MCS match
 1302             minNumBonds - Minimum number of bonds in the MCS match
 1303             ringMatchesRingOnly - Ring bonds only match other ring bonds
 1304             completeRingsOnly - Partial rings not allowed during the match
 1305             threshold - Fraction of the dataset that must contain the MCS
 1306             seedSMARTS - SMARTS string as the seed of the MCS
 1307             timeout - Timeout for the MCS calculation in seconds
 1308             
 1309     --mp <yes or no>  [default: no]
 1310         Use multiprocessing.
 1311          
 1312         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1313         function employing lazy RDKit data iterable. This allows processing of
 1314         arbitrary large data sets without any additional requirements memory.
 1315         
 1316         All input data may be optionally loaded into memory by mp.Pool.map()
 1317         before starting worker processes in a process pool by setting the value
 1318         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1319         
 1320         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1321         data mode may adversely impact the performance. The '--mpParams' section
 1322         provides additional information to tune the value of 'chunkSize'.
 1323     --mpLevel <Molecules or Conformers>  [default: Molecules]
 1324         Perform multiprocessing at molecules or conformers level. Possible values:
 1325         Molecules or Conformers. The 'Molecules' value starts a process pool at the
 1326         molecules level. All conformers of a molecule are processed in a single
 1327         process. The 'Conformers' value, however, starts a process pool at the 
 1328         conformers level. Each conformer of a molecule is processed in an individual
 1329         process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
 1330         using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
 1331         a large number of Psi4 output files.
 1332     --mpParams <Name,Value,...>  [default: auto]
 1333         A comma delimited list of parameter name and value pairs to configure
 1334         multiprocessing.
 1335         
 1336         The supported parameter names along with their default and possible
 1337         values are shown below:
 1338         
 1339             chunkSize, auto
 1340             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1341             numProcesses, auto   [ Default: mp.cpu_count() ]
 1342         
 1343         These parameters are used by the following functions to configure and
 1344         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1345         mp.Pool.imap().
 1346         
 1347         The chunkSize determines chunks of input data passed to each worker
 1348         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1349         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1350         
 1351         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1352         automatically converts RDKit data iterable into a list, loads all data into
 1353         memory, and calculates the default chunkSize using the following method
 1354         as shown in its code:
 1355         
 1356             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1357             if extra: chunkSize += 1
 1358         
 1359         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1360         and 100 data items.
 1361         
 1362         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1363         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1364         data into memory. Consequently, the size of input data is not known a priori.
 1365         It's not possible to estimate an optimal value for the chunkSize. The default 
 1366         chunkSize is set to 1.
 1367         
 1368         The default value for the chunkSize during 'Lazy' data mode may adversely
 1369         impact the performance due to the overhead associated with exchanging
 1370         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1371         a larger value during 'Lazy' input data mode, based on the size of your input
 1372         data and number of processes in the process pool.
 1373         
 1374         The mp.Pool.map() function waits for all worker processes to process all
 1375         the data and return the results. The mp.Pool.imap() function, however,
 1376         returns the the results obtained from worker processes as soon as the
 1377         results become available for specified chunks of data.
 1378         
 1379         The order of data in the results returned by both mp.Pool.map() and 
 1380         mp.Pool.imap() functions always corresponds to the input data.
 1381     -o, --outfile <outfile>
 1382         Output file name.
 1383     --outfileParams <Name,Value,...>  [default: auto]
 1384         A comma delimited list of parameter name and value pairs for writing
 1385         molecules to files. The supported parameter names for different file
 1386         formats, along with their default values, are shown below:
 1387             
 1388             SD: kekulize,yes,forceV3000,no
 1389             
 1390     --overwrite
 1391         Overwrite existing files.
 1392     --precision <number>  [default: 6]
 1393         Floating point precision for writing energy values.
 1394     --psi4OptionsParams <Name,Value,...>  [default: none]
 1395         A comma delimited list of Psi4 option name and value pairs for setting
 1396         global and module options. The names are 'option_name' for global options
 1397         and 'module_name__option_name' for options local to a module. The
 1398         specified option names must be valid Psi4 names. No validation is
 1399         performed.
 1400         
 1401         The specified option name and  value pairs are processed and passed to
 1402         psi4.set_options() as a dictionary. The supported value types are float,
 1403         integer, boolean, or string. The float value string is converted into a float.
 1404         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1405     --psi4RunParams <Name,Value,...>  [default: auto]
 1406         A comma delimited list of parameter name and value pairs for configuring
 1407         Psi4 jobs.
 1408         
 1409         The supported parameter names along with their default and possible
 1410         values are shown below:
 1411              
 1412             MemoryInGB, 1
 1413             NumThreads, 1
 1414             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1415             ScratchDir, auto   [ Possivle values: DirName]
 1416             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1417             
 1418         These parameters control the runtime behavior of Psi4.
 1419         
 1420         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1421         is appended to output file name during multiprocessing as shown:
 1422         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1423         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1424         all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 
 1425         'Conformers' of '--mpLevel' option.
 1426         
 1427         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1428         of any existing Psi4 before creating new files to append output from
 1429         multiple Psi4 runs.
 1430         
 1431         The option 'ScratchDir' is a directory path to the location of scratch
 1432         files. The default value corresponds to Psi4 default. It may be used to
 1433         override the deafult path.
 1434     -q, --quiet <yes or no>  [default: no]
 1435         Use quiet mode. The warning and information messages will not be printed.
 1436     -r, --reffile <reffile>
 1437         Reference input file name containing a 3D reference molecule. A common
 1438         core scaffold must be present in a pair of an input and reference molecules.
 1439         Otherwise, no constrained minimization is performed on the input molecule.
 1440     --reference <text>  [default: auto]
 1441         Reference wave function to use for energy calculation. Default: RHF or UHF.
 1442         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 1443         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 1444         molecules with unpaired electrons.
 1445         
 1446         The specified value must be a valid Psi4 reference wave function. No validation
 1447         is performed. For example: ROHF, CUHF, RKS, etc.
 1448         
 1449         The spin multiplicity determines the default value of reference wave function
 1450         for input molecules. It is calculated from number of free radical electrons using
 1451         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1452         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1453         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1454         over the calculated value of spin multiplicity.
 1455     -s, --scaffold <auto or SMARTS>  [default: auto]
 1456         Common core scaffold between a pair of input and reference molecules used for
 1457         constrained minimization of molecules in input file. Possible values: Auto or a
 1458         valid SMARTS pattern. The common core scaffold is automatically detected
 1459         corresponding to the Maximum Common Substructure (MCS) between a pair of
 1460         reference and input molecules. A valid SMARTS pattern may be optionally specified
 1461         for the common core scaffold.
 1462     --scaffoldRMSDOut <yes or no>  [default: No]
 1463         Write out RMSD value for common core alignment between a pair of input and
 1464         reference molecules.
 1465     -w, --workingdir <dir>
 1466         Location of working directory which defaults to the current directory.
 1467 
 1468 Examples:
 1469     To perform constrained energy minimization for molecules in a SMILES file against
 1470     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 1471     input and reference molecules identified using MCS, generating up to 50 conformations
 1472     using ETKDGv2 methodology followed by initial MMFF forcefield minimization and final
 1473     energy minimization using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and
 1474     sulfur containing molecules, and write out a SD file containing minimum energy
 1475     structure corresponding to each constrained molecule, type:
 1476 
 1477        %  Psi4PerformConstrainedMinimization.py  -i Psi4SampleAlkanes.smi
 1478           -r Psi4SampleEthane3D.sdf  -o Psi4SampleAlkanesOut.sdf
 1479 
 1480     To run the first example in a quiet mode and write out a SD file, type:
 1481 
 1482        %  Psi4PerformConstrainedMinimization.py  -q yes
 1483           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1484           -o Psi4SampleAlkanesOut.sdf
 1485 
 1486     To run the first example in multiprocessing mode at molecules level on all
 1487     available CPUs without loading all data into memory and write out a SD file,
 1488     type:
 1489 
 1490        %  Psi4PerformConstrainedMinimization.py  --mp yes
 1491           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1492           -o Psi4SampleAlkanesOut.sdf
 1493 
 1494     To run the first example in multiprocessing mode at conformers level on all
 1495     available CPUs without loading all data into memory and write out a SD file,
 1496     type:
 1497 
 1498        %  Psi4PerformConstrainedMinimization.py  --mp yes --mpLevel Conformers
 1499           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1500           -o Psi4SampleAlkanesOut.sdf
 1501 
 1502     To run the first example in multiprocessing mode at molecules level on all
 1503     available CPUs by loading all data into memory and write out a SD file, type:
 1504 
 1505        %  Psi4PerformConstrainedMinimization.py  --mp yes --mpParams
 1506           "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
 1507           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1508           -o Psi4SampleAlkanesOut.sdf
 1509 
 1510     To rerun the first example using an explicit SMARTS string for a common core
 1511     scaffold and write out a SD file, type:
 1512 
 1513        %  Psi4PerformConstrainedMinimization.py  --scaffold "CC"
 1514           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1515           -o Psi4SampleAlkanesOut.sdf
 1516 
 1517     To run the first example using a specific set of parameters for generating
 1518     an initial set of conformers followed by energy minimization using forcefield
 1519     and a quantum chemistry method and write out a SD file type:
 1520 
 1521        %  Psi4PerformConstrainedMinimization.py  --confParams "
 1522           confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
 1523           maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
 1524           -b "6-31+G**" --maxIters 20 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1525           -o Psi4SampleAlkanesOut.sdf
 1526 
 1527     To run the first example for molecules in a CSV SMILES file, SMILES strings
 1528     in column 1, name column 2, and write out a SD file, type:
 1529 
 1530        %  Psi4PerformConstrainedMinimization.py  --infileParams
 1531           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 1532            smilesNameColumn,2" -i Psi4SampleAlkanes.csv
 1533            -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
 1534 
 1535 Author:
 1536 
 1537     Manish Sud(msud@san.rr.com)
 1538 
 1539 See also:
 1540     Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py,
 1541     Psi4GenerateConstrainedConformers.py, Psi4PerformMinimization.py
 1542 
 1543 Copyright:
 1544     Copyright (C) 2024 Manish Sud. All rights reserved.
 1545 
 1546     The functionality available in this script is implemented using Psi4, an
 1547     open source quantum chemistry software package, and RDKit, an open
 1548     source toolkit for cheminformatics developed by Greg Landrum.
 1549 
 1550     This file is part of MayaChemTools.
 1551 
 1552     MayaChemTools is free software; you can redistribute it and/or modify it under
 1553     the terms of the GNU Lesser General Public License as published by the Free
 1554     Software Foundation; either version 3 of the License, or (at your option) any
 1555     later version.
 1556 
 1557 """
 1558 
 1559 if __name__ == "__main__":
 1560     main()