MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4PerformConstrainedMinimization.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2026 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using Psi4, an
    9 # open source quantum chemistry software package, and RDKit, an open
   10 # source toolkit for cheminformatics developed by Greg Landrum.
   11 #
   12 # This file is part of MayaChemTools.
   13 #
   14 # MayaChemTools is free software; you can redistribute it and/or modify it under
   15 # the terms of the GNU Lesser General Public License as published by the Free
   16 # Software Foundation; either version 3 of the License, or (at your option) any
   17 # later version.
   18 #
   19 # MayaChemTools is distributed in the hope that it will be useful, but without
   20 # any warranty; without even the implied warranty of merchantability of fitness
   21 # for a particular purpose.  See the GNU Lesser General Public License for more
   22 # details.
   23 #
   24 # You should have received a copy of the GNU Lesser General Public License
   25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   27 # Boston, MA, 02111-1307, USA.
   28 #
   29 
   30 from __future__ import print_function
   31 
   32 import os
   33 import sys
   34 import time
   35 import re
   36 import shutil
   37 import multiprocessing as mp
   38 
   39 # Psi4 imports...
   40 if hasattr(shutil, "which") and shutil.which("psi4") is None:
   41     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   42     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   43     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   44     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   45     sys.stderr.write("Psi4 environment and try again.\n\n")
   46 
   47 # RDKit imports...
   48 try:
   49     from rdkit import rdBase
   50     from rdkit import Chem
   51     from rdkit.Chem import AllChem, rdMolAlign
   52     from rdkit.Chem import rdFMCS
   53 except ImportError as ErrMsg:
   54     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   55     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   56     sys.exit(1)
   57 
   58 # MayaChemTools imports...
   59 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   60 try:
   61     from docopt import docopt
   62     import MiscUtil
   63     import Psi4Util
   64     import RDKitUtil
   65 except ImportError as ErrMsg:
   66     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   67     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   68     sys.exit(1)
   69 
   70 ScriptName = os.path.basename(sys.argv[0])
   71 Options = {}
   72 OptionsInfo = {}
   73 
   74 
   75 def main():
   76     """Start execution of the script."""
   77 
   78     MiscUtil.PrintInfo(
   79         "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
   80         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
   81     )
   82 
   83     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   84 
   85     # Retrieve command line arguments and options...
   86     RetrieveOptions()
   87 
   88     # Process and validate command line arguments and options...
   89     ProcessOptions()
   90 
   91     # Perform actions required by the script...
   92     PerformConstrainedMinimization()
   93 
   94     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   95     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   96 
   97 
   98 def PerformConstrainedMinimization():
   99     """Perform constrained minimization."""
  100 
  101     # Read and validate reference molecule...
  102     RefMol = RetrieveReferenceMolecule()
  103 
  104     # Setup a molecule reader for input file...
  105     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  106     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  107 
  108     # Set up a molecule writer...
  109     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  110     if Writer is None:
  111         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  112     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  113 
  114     MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(
  115         RefMol, Mols, Writer
  116     )
  117 
  118     if Writer is not None:
  119         Writer.close()
  120 
  121     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  122     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  123     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
  124     MiscUtil.PrintInfo(
  125         "Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount
  126     )
  127     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  128     MiscUtil.PrintInfo(
  129         "Number of ignored molecules: %d"
  130         % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount + WriteFailedCount)
  131     )
  132 
  133 
  134 def ProcessMolecules(RefMol, Mols, Writer):
  135     """Process and minimize molecules."""
  136 
  137     if OptionsInfo["MPMode"]:
  138         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
  139     else:
  140         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
  141 
  142 
  143 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
  144     """Process and minimize molecules using a single process."""
  145 
  146     # Intialize Psi4...
  147     MiscUtil.PrintInfo("\nInitializing Psi4...")
  148     Psi4Handle = Psi4Util.InitializePsi4(
  149         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  150         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  151         PrintVersion=True,
  152         PrintHeader=True,
  153     )
  154     OptionsInfo["psi4"] = Psi4Handle
  155 
  156     # Setup max iterations global variable...
  157     Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
  158 
  159     # Setup conversion factor for energy units...
  160     SetupEnergyConversionFactor(Psi4Handle)
  161 
  162     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  163 
  164     for Mol in Mols:
  165         MolCount += 1
  166 
  167         if not CheckAndValidateMolecule(Mol, MolCount):
  168             continue
  169 
  170         # Setup 2D coordinates for SMILES input file...
  171         if OptionsInfo["SMILESInfileStatus"]:
  172             AllChem.Compute2DCoords(Mol)
  173 
  174         ValidMolCount += 1
  175 
  176         # Setup a reference molecule core containing common scaffold atoms...
  177         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  178         if RefMolCore is None:
  179             CoreScaffoldMissingCount += 1
  180             continue
  181 
  182         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolCount)
  183 
  184         if not CalcStatus:
  185             MinimizationFailedCount += 1
  186             continue
  187 
  188         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  189 
  190         if not WriteStatus:
  191             WriteFailedCount += 1
  192 
  193     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  194 
  195 
  196 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
  197     """Process and minimize molecules using multiprocessing."""
  198 
  199     if OptionsInfo["MPLevelConformersMode"]:
  200         return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer)
  201     elif OptionsInfo["MPLevelMoleculesMode"]:
  202         return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer)
  203     else:
  204         MiscUtil.PrintError('The value, %s,  option "--mpLevel" is not supported.' % (OptionsInfo["MPLevel"]))
  205 
  206 
  207 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer):
  208     """Process and minimize molecules using multiprocessing at molecules level."""
  209 
  210     MiscUtil.PrintInfo(
  211         "\nPerforming constrained minimization with generation of conformers using multiprocessing at molecules level..."
  212     )
  213 
  214     MPParams = OptionsInfo["MPParams"]
  215 
  216     # Setup data for initializing a worker process...
  217     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
  218     InitializeWorkerProcessArgs = (
  219         MiscUtil.ObjectToBase64EncodedString(Options),
  220         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  221     )
  222 
  223     # Setup a encoded mols data iterable for a worker process...
  224     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  225 
  226     # Setup process pool along with data initialization for each process...
  227     if not OptionsInfo["QuietMode"]:
  228         MiscUtil.PrintInfo(
  229             "\nConfiguring multiprocessing using %s method..."
  230             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  231         )
  232         MiscUtil.PrintInfo(
  233             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  234             % (
  235                 MPParams["NumProcesses"],
  236                 MPParams["InputDataMode"],
  237                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  238             )
  239         )
  240 
  241     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  242 
  243     # Start processing...
  244     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  245         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  246     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  247         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  248     else:
  249         MiscUtil.PrintError(
  250             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  251         )
  252 
  253     # Print out Psi4 version in the main process...
  254     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  255     Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
  256     OptionsInfo["psi4"] = Psi4Handle
  257 
  258     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  259 
  260     for Result in Results:
  261         MolCount += 1
  262         MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD = Result
  263 
  264         if EncodedMol is None:
  265             continue
  266         ValidMolCount += 1
  267 
  268         if CoreScaffoldMissingStatus:
  269             CoreScaffoldMissingStatus += 1
  270             continue
  271 
  272         if not CalcStatus:
  273             MinimizationFailedCount += 1
  274             continue
  275 
  276         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  277         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  278 
  279         if not WriteStatus:
  280             WriteFailedCount += 1
  281 
  282     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  283 
  284 
  285 def InitializeWorkerProcess(*EncodedArgs):
  286     """Initialize data for a worker process."""
  287 
  288     global Options, OptionsInfo
  289 
  290     if not OptionsInfo["QuietMode"]:
  291         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  292 
  293     # Decode Options and OptionInfo...
  294     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  295     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  296 
  297     # Decode RefMol...
  298     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
  299 
  300     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  301     # output files for each process...
  302     OptionsInfo["Psi4Initialized"] = False
  303 
  304 
  305 def WorkerProcess(EncodedMolInfo):
  306     """Process data for a worker process."""
  307 
  308     if not OptionsInfo["Psi4Initialized"]:
  309         InitializePsi4ForWorkerProcess()
  310 
  311     MolIndex, EncodedMol = EncodedMolInfo
  312 
  313     MolNum = MolIndex + 1
  314     CoreScaffoldMissingStatus = False
  315     CalcStatus = False
  316     Energy = None
  317     ScaffoldEmbedRMSD = None
  318 
  319     if EncodedMol is None:
  320         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  321 
  322     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  323     if not CheckAndValidateMolecule(Mol, MolNum):
  324         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  325 
  326     # Setup 2D coordinates for SMILES input file...
  327     if OptionsInfo["SMILESInfileStatus"]:
  328         AllChem.Compute2DCoords(Mol)
  329 
  330     # Setup a reference molecule core containing common scaffold atoms...
  331     RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum)
  332     if RefMolCore is None:
  333         CoreScaffoldMissingStatus = True
  334         return [MolIndex, EncodedMol, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
  335 
  336     Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(
  337         OptionsInfo["psi4"], Mol, RefMolCore, MolNum
  338     )
  339 
  340     return [
  341         MolIndex,
  342         RDKitUtil.MolToBase64EncodedMolString(
  343             Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
  344         ),
  345         CoreScaffoldMissingStatus,
  346         CalcStatus,
  347         Energy,
  348         ScaffoldEmbedRMSD,
  349     ]
  350 
  351 
  352 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer):
  353     """Process and minimize molecules using multiprocessing at conformers level."""
  354 
  355     MiscUtil.PrintInfo(
  356         "\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level..."
  357     )
  358 
  359     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
  360 
  361     for Mol in Mols:
  362         MolCount += 1
  363 
  364         if not CheckAndValidateMolecule(Mol, MolCount):
  365             continue
  366 
  367         # Setup 2D coordinates for SMILES input file...
  368         if OptionsInfo["SMILESInfileStatus"]:
  369             AllChem.Compute2DCoords(Mol)
  370 
  371         ValidMolCount += 1
  372 
  373         # Setup a reference molecule core containing common scaffold atoms...
  374         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  375         if RefMolCore is None:
  376             CoreScaffoldMissingCount += 1
  377             continue
  378 
  379         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount)
  380 
  381         if not CalcStatus:
  382             MinimizationFailedCount += 1
  383             continue
  384 
  385         WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
  386 
  387         if not WriteStatus:
  388             WriteFailedCount += 1
  389 
  390     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
  391 
  392 
  393 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum):
  394     """Generate conformers and minimize them using multiple processes."""
  395 
  396     # Add hydrogens...
  397     Mol = Chem.AddHs(Mol, addCoords=True)
  398 
  399     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  400 
  401     # Setup constrained conformers...
  402     MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  403     if not MolConfsStatus:
  404         if not OptionsInfo["QuietMode"]:
  405             MiscUtil.PrintWarning(
  406                 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
  407                 % MolName
  408             )
  409         return (Mol, False, None, None)
  410 
  411     MPParams = OptionsInfo["MPParams"]
  412 
  413     # Setup data for initializing a worker process...
  414     OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore)
  415     InitializeWorkerProcessArgs = (
  416         MiscUtil.ObjectToBase64EncodedString(Options),
  417         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  418     )
  419 
  420     # Setup a encoded mols data iterable for a worker process...
  421     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(MolConfs)
  422 
  423     # Setup process pool along with data initialization for each process...
  424     if not OptionsInfo["QuietMode"]:
  425         MiscUtil.PrintInfo(
  426             "\nConfiguring multiprocessing using %s method..."
  427             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  428         )
  429         MiscUtil.PrintInfo(
  430             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  431             % (
  432                 MPParams["NumProcesses"],
  433                 MPParams["InputDataMode"],
  434                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  435             )
  436         )
  437 
  438     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
  439 
  440     # Start processing...
  441     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  442         Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  443     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  444         Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  445     else:
  446         MiscUtil.PrintError(
  447             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  448         )
  449 
  450     ConfNums = []
  451     MolConfsMap = {}
  452     CalcEnergyMap = {}
  453     CalcFailedCount = 0
  454 
  455     for Result in Results:
  456         ConfNum, EncodedMolConf, CalcStatus, Energy = Result
  457 
  458         if EncodedMolConf is None:
  459             CalcFailedCount += 1
  460             continue
  461 
  462         if not CalcStatus:
  463             CalcFailedCount += 1
  464             continue
  465 
  466         MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  467 
  468         ConfNums.append(ConfNum)
  469         CalcEnergyMap[ConfNum] = Energy
  470         MolConfsMap[ConfNum] = MolConf
  471 
  472     if CalcFailedCount:
  473         return (Mol, False, None, None)
  474 
  475     SortedConfNums = sorted(ConfNums, key=lambda ConfNum: CalcEnergyMap[ConfNum])
  476     MinEnergyConfNum = SortedConfNums[0]
  477 
  478     MinEnergy = (
  479         "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum]) if OptionsInfo["EnergyOut"] else None
  480     )
  481     MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
  482 
  483     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp("EmbedRMS")) if OptionsInfo["ScaffoldRMSDOut"] else None
  484     MinEnergyMolConf.ClearProp("EmbedRMS")
  485 
  486     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
  487 
  488 
  489 def InitializeConformerWorkerProcess(*EncodedArgs):
  490     """Initialize data for a conformer worker process."""
  491 
  492     global Options, OptionsInfo
  493 
  494     if not OptionsInfo["QuietMode"]:
  495         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  496 
  497     # Decode Options and OptionInfo...
  498     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  499     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  500 
  501     # Decode RefMol...
  502     OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"])
  503 
  504     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  505     # output files for each process...
  506     OptionsInfo["Psi4Initialized"] = False
  507 
  508 
  509 def ConformerWorkerProcess(EncodedMolInfo):
  510     """Process data for a conformer worker process."""
  511 
  512     if not OptionsInfo["Psi4Initialized"]:
  513         InitializePsi4ForWorkerProcess()
  514 
  515     MolConfIndex, EncodedMolConf = EncodedMolInfo
  516 
  517     MolConfNum = MolConfIndex
  518     CalcStatus = False
  519     Energy = None
  520 
  521     if EncodedMolConf is None:
  522         return [MolConfIndex, None, CalcStatus, Energy]
  523 
  524     MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  525     MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum)
  526 
  527     if not OptionsInfo["QuietMode"]:
  528         MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum))
  529 
  530     # Perform Psi4 constrained minimization...
  531     CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(
  532         OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum
  533     )
  534     if not CalcStatus:
  535         if not OptionsInfo["QuietMode"]:
  536             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolConfName))
  537         return [MolConfIndex, EncodedMolConf, CalcStatus, Energy]
  538 
  539     return [
  540         MolConfIndex,
  541         RDKitUtil.MolToBase64EncodedMolString(
  542             MolConf, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
  543         ),
  544         CalcStatus,
  545         Energy,
  546     ]
  547 
  548 
  549 def InitializePsi4ForWorkerProcess():
  550     """Initialize Psi4 for a worker process."""
  551 
  552     if OptionsInfo["Psi4Initialized"]:
  553         return
  554 
  555     OptionsInfo["Psi4Initialized"] = True
  556 
  557     if OptionsInfo["MPLevelConformersMode"] and re.match(
  558         "auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I
  559     ):
  560         # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
  561         OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
  562     else:
  563         # Update output file...
  564         OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
  565             OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
  566         )
  567 
  568     # Intialize Psi4...
  569     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
  570         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  571         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  572         PrintVersion=False,
  573         PrintHeader=True,
  574     )
  575 
  576     # Setup max iterations global variable...
  577     Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
  578 
  579     # Setup conversion factor for energy units...
  580     SetupEnergyConversionFactor(OptionsInfo["psi4"])
  581 
  582 
  583 def RetrieveReferenceMolecule():
  584     """Retrieve and validate reference molecule."""
  585 
  586     RefFile = OptionsInfo["RefFile"]
  587 
  588     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
  589     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
  590     ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(
  591         RefFile, **OptionsInfo["InfileParams"]
  592     )
  593 
  594     if ValidRefMolCount == 0:
  595         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
  596     elif ValidRefMolCount > 1:
  597         MiscUtil.PrintWarning(
  598             "The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..."
  599             % (RefFile, ValidRefMolCount)
  600         )
  601 
  602     RefMol = ValidRefMols[0]
  603 
  604     if OptionsInfo["UseScaffoldSMARTS"]:
  605         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  606         if ScaffoldPatternMol is None:
  607             MiscUtil.PrintError(
  608                 'Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is not valid.'
  609                 % (OptionsInfo["ScaffoldSMARTS"])
  610             )
  611 
  612         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
  613             MiscUtil.PrintError(
  614                 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option, is missing in the first valid reference molecule.'
  615                 % (OptionsInfo["ScaffoldSMARTS"])
  616             )
  617 
  618     return RefMol
  619 
  620 
  621 def SetupCoreScaffold(RefMol, Mol, MolCount):
  622     """Setup a reference molecule core containing common scaffold atoms between
  623     a pair of molecules."""
  624 
  625     if OptionsInfo["UseScaffoldMCS"]:
  626         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
  627     elif OptionsInfo["UseScaffoldSMARTS"]:
  628         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
  629     else:
  630         MiscUtil.PrintError(
  631             'The  value, %s, specified for  "-s, --scaffold" option is not supported.' % (OptionsInfo["Scaffold"])
  632         )
  633 
  634 
  635 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
  636     """Setup a reference molecule core containing common scaffold atoms between
  637     a pair of molecules using MCS."""
  638 
  639     MCSParams = OptionsInfo["MCSParams"]
  640     Mols = [RefMol, Mol]
  641 
  642     MCSResultObject = rdFMCS.FindMCS(
  643         Mols,
  644         maximizeBonds=MCSParams["MaximizeBonds"],
  645         threshold=MCSParams["Threshold"],
  646         timeout=MCSParams["TimeOut"],
  647         verbose=MCSParams["Verbose"],
  648         matchValences=MCSParams["MatchValences"],
  649         ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"],
  650         completeRingsOnly=MCSParams["CompleteRingsOnly"],
  651         matchChiralTag=MCSParams["MatchChiralTag"],
  652         atomCompare=MCSParams["AtomCompare"],
  653         bondCompare=MCSParams["BondCompare"],
  654         seedSmarts=MCSParams["SeedSMARTS"],
  655     )
  656 
  657     if MCSResultObject.canceled:
  658         if not OptionsInfo["QuietMode"]:
  659             MiscUtil.PrintWarning(
  660                 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
  661                 % (RDKitUtil.GetMolName(Mol, MolCount))
  662             )
  663         return None
  664 
  665     CoreNumAtoms = MCSResultObject.numAtoms
  666     CoreNumBonds = MCSResultObject.numBonds
  667 
  668     SMARTSCore = MCSResultObject.smartsString
  669 
  670     if not len(SMARTSCore):
  671         if not OptionsInfo["QuietMode"]:
  672             MiscUtil.PrintWarning(
  673                 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
  674                 % (RDKitUtil.GetMolName(Mol, MolCount))
  675             )
  676         return None
  677 
  678     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
  679         if not OptionsInfo["QuietMode"]:
  680             MiscUtil.PrintWarning(
  681                 'Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumAtoms" parameter in  "-m, --mcsParams" option.'
  682                 % (CoreNumAtoms, MCSParams["MinNumAtoms"])
  683             )
  684         return None
  685 
  686     if CoreNumBonds < MCSParams["MinNumBonds"]:
  687         if not OptionsInfo["QuietMode"]:
  688             MiscUtil.PrintWarning(
  689                 'Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumBonds" parameter in  "-m, --mcsParams" option.'
  690                 % (CoreNumBonds, MCSParams["MinNumBonds"])
  691             )
  692         return None
  693 
  694     return GenerateCoreMol(RefMol, SMARTSCore)
  695 
  696 
  697 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
  698     """Setup a reference molecule core containing common scaffold atoms between
  699     a pair of molecules using specified SMARTS."""
  700 
  701     if OptionsInfo["ScaffoldPatternMol"] is None:
  702         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  703 
  704     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
  705         if not OptionsInfo["QuietMode"]:
  706             MiscUtil.PrintWarning(
  707                 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is missing in input molecule,  %s.'
  708                 % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))
  709             )
  710         return None
  711 
  712     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
  713 
  714 
  715 def GenerateCoreMol(RefMol, SMARTSCore):
  716     """Generate core molecule for embedding."""
  717 
  718     # Create a molecule corresponding to core atoms...
  719     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
  720 
  721     # Setup a ref molecule containing core atoms with dummy atoms as
  722     # attachment points for atoms around the core atoms...
  723     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
  724 
  725     # Delete any substructures containing dummy atoms..
  726     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles("*"))
  727     RefMolCore.UpdatePropertyCache()
  728 
  729     return RefMolCore
  730 
  731 
  732 def ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolNum=None):
  733     """Constrain and minimize molecule."""
  734 
  735     # Add hydrogens...
  736     Mol = Chem.AddHs(Mol, addCoords=True)
  737 
  738     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  739 
  740     # Setup constrained conformers...
  741     MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  742     if not MolConfsStatus:
  743         if not OptionsInfo["QuietMode"]:
  744             MiscUtil.PrintWarning(
  745                 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
  746                 % MolName
  747             )
  748         return (Mol, False, None, None)
  749 
  750     # Minimize conformers...
  751     ConfNums = []
  752     CalcEnergyMap = {}
  753     MolConfsMap = {}
  754 
  755     for ConfNum, MolConf in enumerate(MolConfs):
  756         if not OptionsInfo["QuietMode"]:
  757             MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, ConfNum))
  758 
  759         CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum)
  760         if not CalcStatus:
  761             if not OptionsInfo["QuietMode"]:
  762                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  763             return (Mol, False, None, None)
  764 
  765         ConfNums.append(ConfNum)
  766         CalcEnergyMap[ConfNum] = Energy
  767         MolConfsMap[ConfNum] = MolConf
  768 
  769     SortedConfNums = sorted(ConfNums, key=lambda ConfNum: CalcEnergyMap[ConfNum])
  770     MinEnergyConfNum = SortedConfNums[0]
  771 
  772     MinEnergy = (
  773         "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum]) if OptionsInfo["EnergyOut"] else None
  774     )
  775     MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
  776 
  777     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp("EmbedRMS")) if OptionsInfo["ScaffoldRMSDOut"] else None
  778     MinEnergyMolConf.ClearProp("EmbedRMS")
  779 
  780     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
  781 
  782 
  783 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID=-1):
  784     """Minimize molecule using Psi4."""
  785 
  786     # Setup a list for constrained atoms...
  787     ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore)
  788     if len(ConstrainedAtomIndices) == 0:
  789         return (False, None)
  790 
  791     # Setup a Psi4Mol...
  792     Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
  793     if Psi4Mol is None:
  794         return (False, None)
  795 
  796     #  Setup reference wave function...
  797     Reference = SetupReferenceWavefunction(Mol)
  798     Psi4Handle.set_options({"Reference": Reference})
  799 
  800     # Setup method name and basis set...
  801     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  802 
  803     # Setup freeze list for constrained atoms...
  804     FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices]
  805     FreezeXYZString = " %s " % " ".join(FreezeXYZList)
  806     Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString})
  807 
  808     # Optimize geometry...
  809     Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(
  810         Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
  811     )
  812 
  813     if not Status:
  814         PerformPsi4Cleanup(Psi4Handle)
  815         return (False, None)
  816 
  817     # Update atom positions...
  818     AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms=True)
  819     RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
  820 
  821     # Convert energy units...
  822     if OptionsInfo["ApplyEnergyConversionFactor"]:
  823         Energy = Energy * OptionsInfo["EnergyConversionFactor"]
  824 
  825     # Clean up
  826     PerformPsi4Cleanup(Psi4Handle)
  827 
  828     return (True, Energy)
  829 
  830 
  831 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum=None):
  832     """Constrain, embed, and minimize molecule."""
  833 
  834     # Setup forcefield function to use for constrained minimization...
  835     ForceFieldFunction = None
  836     ForceFieldName = None
  837     if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
  838         ForceFieldFunction = lambda mol, confId=-1: AllChem.UFFGetMoleculeForceField(mol, confId=confId)
  839         ForceFieldName = "UFF"
  840     else:
  841         ForceFieldFunction = lambda mol, confId=-1: AllChem.MMFFGetMoleculeForceField(
  842             mol,
  843             AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=OptionsInfo["ConfGenerationParams"]["MMFFVariant"]),
  844             confId=confId,
  845         )
  846         ForceFieldName = "MMFF"
  847 
  848     if ForceFieldFunction is None:
  849         if not OptionsInfo["QuietMode"]:
  850             MiscUtil.PrintWarning(
  851                 "Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))
  852             )
  853         return (None, False)
  854 
  855     MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
  856     EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
  857     UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
  858     ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
  859     UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
  860     UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"]
  861 
  862     MolConfs = []
  863     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
  864 
  865     for ConfID in ConfIDs:
  866         try:
  867             MolConf = Chem.Mol(Mol)
  868             AllChem.ConstrainedEmbed(
  869                 MolConf,
  870                 RefMolCore,
  871                 useTethers=UseTethers,
  872                 coreConfId=-1,
  873                 randomseed=ConfID,
  874                 getForceField=ForceFieldFunction,
  875                 enforceChirality=EnforceChirality,
  876                 useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
  877                 useBasicKnowledge=UseBasicKnowledge,
  878                 ETversion=ETVersion,
  879             )
  880         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
  881             if not OptionsInfo["QuietMode"]:
  882                 MiscUtil.PrintWarning(
  883                     "Constrained embedding couldn't  be performed for molecule %s:\n%s\n"
  884                     % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)
  885                 )
  886             return (None, False)
  887 
  888         MolConfs.append(MolConf)
  889 
  890     return FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum)
  891 
  892 
  893 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum=None):
  894     """Filter contarained conformations by RMSD."""
  895 
  896     EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
  897     if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0:
  898         if not OptionsInfo["QuietMode"]:
  899             MiscUtil.PrintInfo(
  900                 "\nGenerating initial ensemble of  constrained conformations by distance geometry  and forcefield for %s - EmbedRMSDCutoff: None; Size: %s"
  901                 % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))
  902             )
  903         return (MolConfs, True)
  904 
  905     FirstMolConf = True
  906     SelectedMolConfs = []
  907     for MolConfIndex, MolConf in enumerate(MolConfs):
  908         if FirstMolConf:
  909             FirstMolConf = False
  910             SelectedMolConfs.append(MolConf)
  911             continue
  912 
  913         # Compare RMSD against all selected conformers...
  914         ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf))
  915         IgnoreConf = False
  916         for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs):
  917             RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf))
  918             CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
  919 
  920             if CalcRMSD < EmbedRMSDCutoff:
  921                 IgnoreConf = True
  922                 break
  923 
  924         if IgnoreConf:
  925             continue
  926 
  927         SelectedMolConfs.append(MolConf)
  928 
  929     if not OptionsInfo["QuietMode"]:
  930         MiscUtil.PrintInfo(
  931             "\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s"
  932             % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))
  933         )
  934 
  935     return (SelectedMolConfs, True)
  936 
  937 
  938 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens=False):
  939     """Setup a list of atom indices to be constrained during Psi4 minimizaiton."""
  940 
  941     AtomIndices = []
  942 
  943     # Collect matched heavy atoms along with attached hydrogens...
  944     for AtomIndex in Mol.GetSubstructMatch(RefMolCore):
  945         Atom = Mol.GetAtomWithIdx(AtomIndex)
  946         if Atom.GetAtomicNum() == 1:
  947             continue
  948 
  949         AtomIndices.append(AtomIndex)
  950         for AtomNbr in Atom.GetNeighbors():
  951             if AtomNbr.GetAtomicNum() == 1:
  952                 if ConstrainHydrogens:
  953                     AtomNbrIndex = AtomNbr.GetIdx()
  954                     AtomIndices.append(AtomNbrIndex)
  955 
  956     AtomIndices = sorted(AtomIndices)
  957 
  958     # Atom indices start from 1 for Psi4 instead 0 for RDKit...
  959     AtomIndices = [AtomIndex + 1 for AtomIndex in AtomIndices]
  960 
  961     return AtomIndices
  962 
  963 
  964 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID=-1):
  965     """Setup a Psi4 molecule object."""
  966 
  967     # Turn off recentering and reorientation to perform optimization in the
  968     # constrained coordinate space...
  969     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=True, NoReorient=True)
  970 
  971     try:
  972         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  973     except Exception as ErrMsg:
  974         Psi4Mol = None
  975         if not OptionsInfo["QuietMode"]:
  976             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  977             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  978             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  979 
  980     return Psi4Mol
  981 
  982 
  983 def PerformPsi4Cleanup(Psi4Handle):
  984     """Perform clean up."""
  985 
  986     # Clean up after Psi4 run...
  987     Psi4Handle.core.clean()
  988 
  989     # Clean up any leftover scratch files...
  990     if OptionsInfo["MPMode"]:
  991         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  992 
  993 
  994 def CheckAndValidateMolecule(Mol, MolCount=None):
  995     """Validate molecule for Psi4 calculations."""
  996 
  997     if Mol is None:
  998         if not OptionsInfo["QuietMode"]:
  999             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
 1000         return False
 1001 
 1002     MolName = RDKitUtil.GetMolName(Mol, MolCount)
 1003     if not OptionsInfo["QuietMode"]:
 1004         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
 1005 
 1006     if RDKitUtil.IsMolEmpty(Mol):
 1007         if not OptionsInfo["QuietMode"]:
 1008             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
 1009         return False
 1010 
 1011     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
 1012         if not OptionsInfo["QuietMode"]:
 1013             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
 1014         return False
 1015 
 1016     return True
 1017 
 1018 
 1019 def SetupMethodNameAndBasisSet(Mol):
 1020     """Setup method name and basis set."""
 1021 
 1022     MethodName = OptionsInfo["MethodName"]
 1023     if OptionsInfo["MethodNameAuto"]:
 1024         MethodName = "B3LYP"
 1025 
 1026     BasisSet = OptionsInfo["BasisSet"]
 1027     if OptionsInfo["BasisSetAuto"]:
 1028         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
 1029 
 1030     return (MethodName, BasisSet)
 1031 
 1032 
 1033 def SetupReferenceWavefunction(Mol):
 1034     """Setup reference wavefunction."""
 1035 
 1036     Reference = OptionsInfo["Reference"]
 1037     if OptionsInfo["ReferenceAuto"]:
 1038         Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
 1039 
 1040     return Reference
 1041 
 1042 
 1043 def SetupEnergyConversionFactor(Psi4Handle):
 1044     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
 1045 
 1046     EnergyUnits = OptionsInfo["EnergyUnits"]
 1047 
 1048     ApplyConversionFactor = True
 1049     if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
 1050         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
 1051     elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
 1052         ConversionFactor = Psi4Handle.constants.hartree2kJmol
 1053     elif re.match("^eV$", EnergyUnits, re.I):
 1054         ConversionFactor = Psi4Handle.constants.hartree2ev
 1055     else:
 1056         ApplyConversionFactor = False
 1057         ConversionFactor = 1.0
 1058 
 1059     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
 1060     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
 1061 
 1062 
 1063 def WriteMolecule(
 1064     Writer,
 1065     Mol,
 1066     MolNum=None,
 1067     Energy=None,
 1068     ScaffoldEmbedRMSD=None,
 1069     ConfID=None,
 1070 ):
 1071     """Write molecule."""
 1072 
 1073     if ScaffoldEmbedRMSD is not None:
 1074         Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
 1075 
 1076     if OptionsInfo["EnergyOut"] and Energy is not None:
 1077         Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
 1078 
 1079     try:
 1080         if ConfID is None:
 1081             Writer.write(Mol)
 1082         else:
 1083             Writer.write(Mol, confId=ConfID)
 1084     except (ValueError, RuntimeError) as ErrMsg:
 1085         if not OptionsInfo["QuietMode"]:
 1086             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 1087             MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
 1088         return False
 1089 
 1090     return True
 1091 
 1092 
 1093 def ProcessMCSParameters():
 1094     """Set up and process MCS parameters."""
 1095 
 1096     SetupMCSParameters()
 1097     ProcessSpecifiedMCSParameters()
 1098 
 1099 
 1100 def SetupMCSParameters():
 1101     """Set up default MCS parameters."""
 1102 
 1103     OptionsInfo["MCSParams"] = {
 1104         "MaximizeBonds": True,
 1105         "Threshold": 0.9,
 1106         "TimeOut": 3600,
 1107         "Verbose": False,
 1108         "MatchValences": True,
 1109         "MatchChiralTag": False,
 1110         "RingMatchesRingOnly": True,
 1111         "CompleteRingsOnly": True,
 1112         "AtomCompare": rdFMCS.AtomCompare.CompareElements,
 1113         "BondCompare": rdFMCS.BondCompare.CompareOrder,
 1114         "SeedSMARTS": "",
 1115         "MinNumAtoms": 1,
 1116         "MinNumBonds": 0,
 1117     }
 1118 
 1119 
 1120 def ProcessSpecifiedMCSParameters():
 1121     """Process specified MCS parameters."""
 1122 
 1123     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
 1124         # Nothing to process...
 1125         return
 1126 
 1127     # Parse specified parameters...
 1128     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
 1129     if not MCSParams:
 1130         MiscUtil.PrintError('No valid parameter name and value pairs specified using "-m, --mcsParams" option.')
 1131 
 1132     MCSParamsWords = MCSParams.split(",")
 1133     if len(MCSParamsWords) % 2:
 1134         MiscUtil.PrintError(
 1135             'The number of comma delimited paramater names and values, %d, specified using "-m, --mcsParams" option must be an even number.'
 1136             % (len(MCSParamsWords))
 1137         )
 1138 
 1139     # Setup  canonical parameter names...
 1140     ValidParamNames = []
 1141     CanonicalParamNamesMap = {}
 1142     for ParamName in sorted(OptionsInfo["MCSParams"]):
 1143         ValidParamNames.append(ParamName)
 1144         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 1145 
 1146     # Validate and set paramater names and value...
 1147     for Index in range(0, len(MCSParamsWords), 2):
 1148         Name = MCSParamsWords[Index]
 1149         Value = MCSParamsWords[Index + 1]
 1150 
 1151         CanonicalName = Name.lower()
 1152         if CanonicalName not in CanonicalParamNamesMap:
 1153             MiscUtil.PrintError(
 1154                 'The parameter name, %s, specified using "-m, --mcsParams" option is not a valid name. Supported parameter names: %s'
 1155                 % (Name, " ".join(ValidParamNames))
 1156             )
 1157 
 1158         ParamName = CanonicalParamNamesMap[CanonicalName]
 1159         if re.match("^Threshold$", ParamName, re.I):
 1160             Value = float(Value)
 1161             if Value <= 0.0 or Value > 1.0:
 1162                 MiscUtil.PrintError(
 1163                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0'
 1164                     % (Value, Name)
 1165                 )
 1166             ParamValue = Value
 1167         elif re.match("^Timeout$", ParamName, re.I):
 1168             Value = int(Value)
 1169             if Value <= 0:
 1170                 MiscUtil.PrintError(
 1171                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: > 0'
 1172                     % (Value, Name)
 1173                 )
 1174             ParamValue = Value
 1175         elif re.match("^MinNumAtoms$", ParamName, re.I):
 1176             Value = int(Value)
 1177             if Value < 1:
 1178                 MiscUtil.PrintError(
 1179                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: >= 1'
 1180                     % (Value, Name)
 1181                 )
 1182             ParamValue = Value
 1183         elif re.match("^MinNumBonds$", ParamName, re.I):
 1184             Value = int(Value)
 1185             if Value < 0:
 1186                 MiscUtil.PrintError(
 1187                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: >=0 '
 1188                     % (Value, Name)
 1189                 )
 1190             ParamValue = Value
 1191         elif re.match("^AtomCompare$", ParamName, re.I):
 1192             if re.match("^CompareAny$", Value, re.I):
 1193                 ParamValue = rdFMCS.AtomCompare.CompareAny
 1194             elif re.match("^CompareElements$", Value, re.I):
 1195                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
 1196             elif re.match("^CompareIsotopes$", Value, re.I):
 1197                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
 1198             else:
 1199                 MiscUtil.PrintError(
 1200                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes'
 1201                     % (Value, Name)
 1202                 )
 1203         elif re.match("^BondCompare$", ParamName, re.I):
 1204             if re.match("^CompareAny$", Value, re.I):
 1205                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
 1206             elif re.match("^CompareOrder$", Value, re.I):
 1207                 ParamValue = rdFMCS.BondCompare.CompareOrder
 1208             elif re.match("^CompareOrderExact$", Value, re.I):
 1209                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
 1210             else:
 1211                 MiscUtil.PrintError(
 1212                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact'
 1213                     % (Value, Name)
 1214                 )
 1215         elif re.match("^SeedSMARTS$", ParamName, re.I):
 1216             if not len(Value):
 1217                 MiscUtil.PrintError(
 1218                     'The parameter value specified using "-m, --mcsParams" option  for parameter, %s, is empty. '
 1219                     % (Name)
 1220                 )
 1221             ParamValue = Value
 1222         else:
 1223             if not re.match("^(Yes|No|True|False)$", Value, re.I):
 1224                 MiscUtil.PrintError(
 1225                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: Yes No True False'
 1226                     % (Value, Name)
 1227                 )
 1228             ParamValue = False
 1229             if re.match("^(Yes|True)$", Value, re.I):
 1230                 ParamValue = True
 1231 
 1232         # Set value...
 1233         OptionsInfo["MCSParams"][ParamName] = ParamValue
 1234 
 1235 
 1236 def ProcessOptions():
 1237     """Process and validate command line arguments and options."""
 1238 
 1239     MiscUtil.PrintInfo("Processing options...")
 1240 
 1241     # Validate options...
 1242     ValidateOptions()
 1243 
 1244     OptionsInfo["Infile"] = Options["--infile"]
 1245     OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
 1246     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1247     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
 1248         "--infileParams",
 1249         Options["--infileParams"],
 1250         InfileName=Options["--infile"],
 1251         ParamsDefaultInfo=ParamsDefaultInfoOverride,
 1252     )
 1253 
 1254     OptionsInfo["RefFile"] = Options["--reffile"]
 1255 
 1256     OptionsInfo["Outfile"] = Options["--outfile"]
 1257     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
 1258         "--outfileParams", Options["--outfileParams"]
 1259     )
 1260 
 1261     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1262 
 1263     OptionsInfo["Scaffold"] = Options["--scaffold"]
 1264     if re.match("^auto$", Options["--scaffold"], re.I):
 1265         UseScaffoldMCS = True
 1266         UseScaffoldSMARTS = False
 1267         ScaffoldSMARTS = None
 1268     else:
 1269         UseScaffoldMCS = False
 1270         UseScaffoldSMARTS = True
 1271         ScaffoldSMARTS = OptionsInfo["Scaffold"]
 1272 
 1273     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
 1274     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
 1275     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
 1276     OptionsInfo["ScaffoldPatternMol"] = None
 1277 
 1278     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
 1279     ProcessMCSParameters()
 1280 
 1281     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
 1282 
 1283     # Method, basis set, and reference wavefunction...
 1284     OptionsInfo["BasisSet"] = Options["--basisSet"]
 1285     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
 1286 
 1287     OptionsInfo["MethodName"] = Options["--methodName"]
 1288     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
 1289 
 1290     OptionsInfo["Reference"] = Options["--reference"]
 1291     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
 1292 
 1293     # Run and options parameters...
 1294     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
 1295         "--psi4OptionsParams", Options["--psi4OptionsParams"]
 1296     )
 1297     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
 1298         "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
 1299     )
 1300 
 1301     # Conformer generation paramaters...
 1302     ParamsDefaultInfoOverride = {"MaxConfs": 50}
 1303     OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters(
 1304         "--confParams", Options["--confParams"], ParamsDefaultInfoOverride
 1305     )
 1306 
 1307     # Write energy parameters...
 1308     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 1309     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
 1310 
 1311     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
 1312     if re.match("^auto$", EnergyDataFieldLabel, re.I):
 1313         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
 1314     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
 1315 
 1316     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
 1317 
 1318     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1319     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1320 
 1321     # Multiprocessing level...
 1322     MPLevelMoleculesMode = False
 1323     MPLevelConformersMode = False
 1324     MPLevel = Options["--mpLevel"]
 1325     if re.match("^Molecules$", MPLevel, re.I):
 1326         MPLevelMoleculesMode = True
 1327     elif re.match("^Conformers$", MPLevel, re.I):
 1328         MPLevelConformersMode = True
 1329     else:
 1330         MiscUtil.PrintError('The value, %s, specified for option "--mpLevel" is not valid. ' % MPLevel)
 1331     OptionsInfo["MPLevel"] = MPLevel
 1332     OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
 1333     OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
 1334 
 1335     OptionsInfo["Precision"] = int(Options["--precision"])
 1336     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 1337 
 1338 
 1339 def RetrieveOptions():
 1340     """Retrieve command line arguments and options."""
 1341 
 1342     # Get options...
 1343     global Options
 1344     Options = docopt(_docoptUsage_)
 1345 
 1346     # Set current working directory to the specified directory...
 1347     WorkingDir = Options["--workingdir"]
 1348     if WorkingDir:
 1349         os.chdir(WorkingDir)
 1350 
 1351     # Handle examples option...
 1352     if "--examples" in Options and Options["--examples"]:
 1353         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1354         sys.exit(0)
 1355 
 1356 
 1357 def ValidateOptions():
 1358     """Validate option values."""
 1359 
 1360     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 1361     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 1362 
 1363     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1364     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 1365 
 1366     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
 1367     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
 1368 
 1369     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 1370 
 1371     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1372     MiscUtil.ValidateOptionsOutputFileOverwrite(
 1373         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
 1374     )
 1375     MiscUtil.ValidateOptionsDistinctFileNames(
 1376         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
 1377     )
 1378 
 1379     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1380     MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
 1381 
 1382     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1383     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 1384 
 1385     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
 1386 
 1387 
 1388 # Setup a usage string for docopt...
 1389 _docoptUsage_ = """
 1390 Psi4PerformConstrainedMinimization.py - Perform constrained minimization
 1391 
 1392 Usage:
 1393     Psi4PerformConstrainedMinimization.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>]
 1394                                           [--energyDataFieldLabel <text>] [--energyUnits <text>] [--infileParams <Name,Value,...>]
 1395                                           [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>]
 1396                                           [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
 1397                                           [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
 1398                                           [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 1399                                           [--quiet <yes or no>]  [--reference <text>] [--scaffold <auto or SMARTS>]
 1400                                           [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
 1401     Psi4PerformConstrainedMinimization.py -h | --help | -e | --examples
 1402 
 1403 Description:
 1404     Generate 3D structures for molecules by performing a constrained energy
 1405     minimization against a reference molecule. The 3D structures for molecules
 1406     are generated using a combination of distance geometry and forcefield
 1407     minimization followed by geometry optimization using a quantum chemistry
 1408     method.
 1409 
 1410     An initial set of 3D conformers are generated for input molecules using
 1411     distance geometry. A common core scaffold, corresponding to a Maximum
 1412     Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
 1413     between a pair of input and reference molecules. The core scaffold atoms in
 1414     input molecules are aligned against the same atoms in the reference molecule.
 1415     The energy of aligned structures are sequentially minimized using the forcefield
 1416     and a quantum chemistry method. The conformation with the lowest energy is
 1417     selected to represent the final structure.
 1418 
 1419     A Psi4 XYZ format geometry string is automatically generated for each molecule
 1420     in input file. It contains atom symbols and 3D coordinates for each atom in a
 1421     molecule. In addition, the formal charge and spin multiplicity are present in the
 1422     the geometry string. These values are either retrieved from molecule properties
 1423     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 1424     molecule.
 1425 
 1426     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 1427     .csv, .tsv, .txt)
 1428 
 1429     The supported output file formats are: SD (.sdf, .sd)
 1430 
 1431 Options:
 1432     -b, --basisSet <text>  [default: auto]
 1433         Basis set to use for constrained energy minimization. Default: 6-31+G**
 1434         for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 1435         value must be a valid Psi4 basis set. No validation is performed.
 1436         
 1437         The following list shows a representative sample of basis sets available
 1438         in Psi4:
 1439             
 1440             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 1441             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 1442             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 1443             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 1444             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 1445             
 1446     --confParams <Name,Value,...>  [default: auto]
 1447         A comma delimited list of parameter name and value pairs for generating
 1448         initial 3D coordinates for molecules in input file. A common core scaffold is
 1449         identified between a pair of input and reference molecules. The atoms in
 1450         common core scaffold of input molecules are aligned against the reference
 1451         molecule followed by constrained energy minimization using forcefield
 1452         available in RDKit. The 3D structures are subsequently constrained and 
 1453         minimized by a quantum chemistry method available in Psi4.
 1454         
 1455         The supported parameter names along with their default values are shown
 1456         below:
 1457             
 1458             confMethod,ETKDGv2,
 1459             forceField,MMFF, forceFieldMMFFVariant,MMFF94,
 1460             enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
 1461             useTethers,yes
 1462             
 1463             confMethod,ETKDGv2   [ Possible values: SDG, KDG, ETDG,
 1464                 ETKDG , or ETKDGv2]
 1465             forceField, MMFF   [ Possible values: UFF or MMFF ]
 1466             forceFieldMMFFVariant,MMFF94   [ Possible values: MMFF94 or MMFF94s ]
 1467             enforceChirality,yes   [ Possible values: yes or no ]
 1468             useTethers,yes   [ Possible values: yes or no ]
 1469             
 1470         confMethod: Conformation generation methodology for generating initial 3D
 1471         coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
 1472         Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
 1473         with Distance Geometry (KDG) and Experimental Torsion-angle preference
 1474         along with basic Knowledge-terms with Distance Geometry (ETKDG or
 1475         ETKDGv2) [Ref 129, 167] .
 1476         
 1477         forceField: Forcefield method to use for constrained energy minimization.
 1478         Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular
 1479         Mechanics Force Field [ Ref 83-87 ] .
 1480         
 1481         enforceChirality: Enforce chirality for defined chiral centers during
 1482         forcefield minimization.
 1483         
 1484         maxConfs: Maximum number of conformations to generate for each molecule
 1485         during the generation of an initial 3D conformation ensemble using conformation
 1486         generation methodology. The conformations are constrained and minimized using
 1487         the specified forcefield and a quantum chemistry method. The lowest energy
 1488         conformation is written to the output file.
 1489         
 1490         embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded
 1491         using distance geometry and forcefield minimization. All embedded conformers
 1492         are kept for 'None' value. Otherwise, only those conformers which are different
 1493         from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first
 1494         embedded conformer is always retained.
 1495         
 1496         useTethers: Use tethers to optimize the final embedded conformation by
 1497         applying a series of extra forces to align matching atoms to the positions of
 1498         the core atoms. Otherwise, use simple distance constraints during the
 1499         optimization.
 1500     --energyOut <yes or no>  [default: yes]
 1501         Write out energy values.
 1502     --energyDataFieldLabel <text>  [default: auto]
 1503         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 1504     --energyUnits <text>  [default: kcal/mol]
 1505         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 1506     -e, --examples
 1507         Print examples.
 1508     -h, --help
 1509         Print this help message.
 1510     -i, --infile <infile>
 1511         Input file name.
 1512     --infileParams <Name,Value,...>  [default: auto]
 1513         A comma delimited list of parameter name and value pairs for reading
 1514         molecules from files. The supported parameter names for different file
 1515         formats, along with their default values, are shown below:
 1516             
 1517             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1518             
 1519             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1520                 smilesTitleLine,auto,sanitize,yes
 1521             
 1522         Possible values for smilesDelimiter: space, comma or tab.
 1523     --maxIters <number>  [default: 50]
 1524         Maximum number of iterations to perform for each molecule or conformer
 1525         during energy minimization by a quantum chemistry method.
 1526     -m, --methodName <text>  [default: auto]
 1527         Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ].
 1528         The specified value must be a valid Psi4 method name. No validation is
 1529         performed.
 1530         
 1531         The following list shows a representative sample of methods available
 1532         in Psi4:
 1533             
 1534             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1535             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1536             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1537             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1538             
 1539     --mcsParams <Name,Value,...>  [default: auto]
 1540         Parameter values to use for identifying a maximum common substructure
 1541         (MCS) in between a pair of reference and input molecules.In general, it is a
 1542         comma delimited list of parameter name and value pairs. The supported
 1543         parameter names along with their default values are shown below:
 1544             
 1545             atomCompare,CompareElements,bondCompare,CompareOrder,
 1546             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
 1547             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
 1548             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
 1549             
 1550         Possible values for atomCompare: CompareAny, CompareElements,
 1551         CompareIsotopes. Possible values for bondCompare: CompareAny,
 1552         CompareOrder, CompareOrderExact.
 1553         
 1554         A brief description of MCS parameters taken from RDKit documentation is
 1555         as follows:
 1556             
 1557             atomCompare - Controls match between two atoms
 1558             bondCompare - Controls match between two bonds
 1559             maximizeBonds - Maximize number of bonds instead of atoms
 1560             matchValences - Include atom valences in the MCS match
 1561             matchChiralTag - Include atom chirality in the MCS match
 1562             minNumAtoms - Minimum number of atoms in the MCS match
 1563             minNumBonds - Minimum number of bonds in the MCS match
 1564             ringMatchesRingOnly - Ring bonds only match other ring bonds
 1565             completeRingsOnly - Partial rings not allowed during the match
 1566             threshold - Fraction of the dataset that must contain the MCS
 1567             seedSMARTS - SMARTS string as the seed of the MCS
 1568             timeout - Timeout for the MCS calculation in seconds
 1569             
 1570     --mp <yes or no>  [default: no]
 1571         Use multiprocessing.
 1572          
 1573         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1574         function employing lazy RDKit data iterable. This allows processing of
 1575         arbitrary large data sets without any additional requirements memory.
 1576         
 1577         All input data may be optionally loaded into memory by mp.Pool.map()
 1578         before starting worker processes in a process pool by setting the value
 1579         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1580         
 1581         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1582         data mode may adversely impact the performance. The '--mpParams' section
 1583         provides additional information to tune the value of 'chunkSize'.
 1584     --mpLevel <Molecules or Conformers>  [default: Molecules]
 1585         Perform multiprocessing at molecules or conformers level. Possible values:
 1586         Molecules or Conformers. The 'Molecules' value starts a process pool at the
 1587         molecules level. All conformers of a molecule are processed in a single
 1588         process. The 'Conformers' value, however, starts a process pool at the 
 1589         conformers level. Each conformer of a molecule is processed in an individual
 1590         process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
 1591         using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
 1592         a large number of Psi4 output files.
 1593     --mpParams <Name,Value,...>  [default: auto]
 1594         A comma delimited list of parameter name and value pairs to configure
 1595         multiprocessing.
 1596         
 1597         The supported parameter names along with their default and possible
 1598         values are shown below:
 1599         
 1600             chunkSize, auto
 1601             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1602             numProcesses, auto   [ Default: mp.cpu_count() ]
 1603         
 1604         These parameters are used by the following functions to configure and
 1605         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1606         mp.Pool.imap().
 1607         
 1608         The chunkSize determines chunks of input data passed to each worker
 1609         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1610         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1611         
 1612         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1613         automatically converts RDKit data iterable into a list, loads all data into
 1614         memory, and calculates the default chunkSize using the following method
 1615         as shown in its code:
 1616         
 1617             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1618             if extra: chunkSize += 1
 1619         
 1620         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1621         and 100 data items.
 1622         
 1623         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1624         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1625         data into memory. Consequently, the size of input data is not known a priori.
 1626         It's not possible to estimate an optimal value for the chunkSize. The default 
 1627         chunkSize is set to 1.
 1628         
 1629         The default value for the chunkSize during 'Lazy' data mode may adversely
 1630         impact the performance due to the overhead associated with exchanging
 1631         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1632         a larger value during 'Lazy' input data mode, based on the size of your input
 1633         data and number of processes in the process pool.
 1634         
 1635         The mp.Pool.map() function waits for all worker processes to process all
 1636         the data and return the results. The mp.Pool.imap() function, however,
 1637         returns the the results obtained from worker processes as soon as the
 1638         results become available for specified chunks of data.
 1639         
 1640         The order of data in the results returned by both mp.Pool.map() and 
 1641         mp.Pool.imap() functions always corresponds to the input data.
 1642     -o, --outfile <outfile>
 1643         Output file name.
 1644     --outfileParams <Name,Value,...>  [default: auto]
 1645         A comma delimited list of parameter name and value pairs for writing
 1646         molecules to files. The supported parameter names for different file
 1647         formats, along with their default values, are shown below:
 1648             
 1649             SD: kekulize,yes,forceV3000,no
 1650             
 1651     --overwrite
 1652         Overwrite existing files.
 1653     --precision <number>  [default: 6]
 1654         Floating point precision for writing energy values.
 1655     --psi4OptionsParams <Name,Value,...>  [default: none]
 1656         A comma delimited list of Psi4 option name and value pairs for setting
 1657         global and module options. The names are 'option_name' for global options
 1658         and 'module_name__option_name' for options local to a module. The
 1659         specified option names must be valid Psi4 names. No validation is
 1660         performed.
 1661         
 1662         The specified option name and  value pairs are processed and passed to
 1663         psi4.set_options() as a dictionary. The supported value types are float,
 1664         integer, boolean, or string. The float value string is converted into a float.
 1665         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1666     --psi4RunParams <Name,Value,...>  [default: auto]
 1667         A comma delimited list of parameter name and value pairs for configuring
 1668         Psi4 jobs.
 1669         
 1670         The supported parameter names along with their default and possible
 1671         values are shown below:
 1672              
 1673             MemoryInGB, 1
 1674             NumThreads, 1
 1675             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1676             ScratchDir, auto   [ Possivle values: DirName]
 1677             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1678             
 1679         These parameters control the runtime behavior of Psi4.
 1680         
 1681         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1682         is appended to output file name during multiprocessing as shown:
 1683         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1684         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1685         all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 
 1686         'Conformers' of '--mpLevel' option.
 1687         
 1688         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1689         of any existing Psi4 before creating new files to append output from
 1690         multiple Psi4 runs.
 1691         
 1692         The option 'ScratchDir' is a directory path to the location of scratch
 1693         files. The default value corresponds to Psi4 default. It may be used to
 1694         override the deafult path.
 1695     -q, --quiet <yes or no>  [default: no]
 1696         Use quiet mode. The warning and information messages will not be printed.
 1697     -r, --reffile <reffile>
 1698         Reference input file name containing a 3D reference molecule. A common
 1699         core scaffold must be present in a pair of an input and reference molecules.
 1700         Otherwise, no constrained minimization is performed on the input molecule.
 1701     --reference <text>  [default: auto]
 1702         Reference wave function to use for energy calculation. Default: RHF or UHF.
 1703         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 1704         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 1705         molecules with unpaired electrons.
 1706         
 1707         The specified value must be a valid Psi4 reference wave function. No validation
 1708         is performed. For example: ROHF, CUHF, RKS, etc.
 1709         
 1710         The spin multiplicity determines the default value of reference wave function
 1711         for input molecules. It is calculated from number of free radical electrons using
 1712         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1713         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1714         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1715         over the calculated value of spin multiplicity.
 1716     -s, --scaffold <auto or SMARTS>  [default: auto]
 1717         Common core scaffold between a pair of input and reference molecules used for
 1718         constrained minimization of molecules in input file. Possible values: Auto or a
 1719         valid SMARTS pattern. The common core scaffold is automatically detected
 1720         corresponding to the Maximum Common Substructure (MCS) between a pair of
 1721         reference and input molecules. A valid SMARTS pattern may be optionally specified
 1722         for the common core scaffold.
 1723     --scaffoldRMSDOut <yes or no>  [default: No]
 1724         Write out RMSD value for common core alignment between a pair of input and
 1725         reference molecules.
 1726     -w, --workingdir <dir>
 1727         Location of working directory which defaults to the current directory.
 1728 
 1729 Examples:
 1730     To perform constrained energy minimization for molecules in a SMILES file against
 1731     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 1732     input and reference molecules identified using MCS, generating up to 50 conformations
 1733     using ETKDGv2 methodology followed by initial MMFF forcefield minimization and final
 1734     energy minimization using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and
 1735     sulfur containing molecules, and write out a SD file containing minimum energy
 1736     structure corresponding to each constrained molecule, type:
 1737 
 1738        %  Psi4PerformConstrainedMinimization.py  -i Psi4SampleAlkanes.smi
 1739           -r Psi4SampleEthane3D.sdf  -o Psi4SampleAlkanesOut.sdf
 1740 
 1741     To run the first example in a quiet mode and write out a SD file, type:
 1742 
 1743        %  Psi4PerformConstrainedMinimization.py  -q yes
 1744           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1745           -o Psi4SampleAlkanesOut.sdf
 1746 
 1747     To run the first example in multiprocessing mode at molecules level on all
 1748     available CPUs without loading all data into memory and write out a SD file,
 1749     type:
 1750 
 1751        %  Psi4PerformConstrainedMinimization.py  --mp yes
 1752           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1753           -o Psi4SampleAlkanesOut.sdf
 1754 
 1755     To run the first example in multiprocessing mode at conformers level on all
 1756     available CPUs without loading all data into memory and write out a SD file,
 1757     type:
 1758 
 1759        %  Psi4PerformConstrainedMinimization.py  --mp yes --mpLevel Conformers
 1760           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1761           -o Psi4SampleAlkanesOut.sdf
 1762 
 1763     To run the first example in multiprocessing mode at molecules level on all
 1764     available CPUs by loading all data into memory and write out a SD file, type:
 1765 
 1766        %  Psi4PerformConstrainedMinimization.py  --mp yes --mpParams
 1767           "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
 1768           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1769           -o Psi4SampleAlkanesOut.sdf
 1770 
 1771     To rerun the first example using an explicit SMARTS string for a common core
 1772     scaffold and write out a SD file, type:
 1773 
 1774        %  Psi4PerformConstrainedMinimization.py  --scaffold "CC"
 1775           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1776           -o Psi4SampleAlkanesOut.sdf
 1777 
 1778     To run the first example using a specific set of parameters for generating
 1779     an initial set of conformers followed by energy minimization using forcefield
 1780     and a quantum chemistry method and write out a SD file type:
 1781 
 1782        %  Psi4PerformConstrainedMinimization.py  --confParams "
 1783           confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
 1784           maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
 1785           -b "6-31+G**" --maxIters 20 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1786           -o Psi4SampleAlkanesOut.sdf
 1787 
 1788     To run the first example for molecules in a CSV SMILES file, SMILES strings
 1789     in column 1, name column 2, and write out a SD file, type:
 1790 
 1791        %  Psi4PerformConstrainedMinimization.py  --infileParams
 1792           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 1793            smilesNameColumn,2" -i Psi4SampleAlkanes.csv
 1794            -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
 1795 
 1796 Author:
 1797 
 1798     Manish Sud(msud@san.rr.com)
 1799 
 1800 See also:
 1801     Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py,
 1802     Psi4GenerateConstrainedConformers.py, Psi4PerformMinimization.py
 1803 
 1804 Copyright:
 1805     Copyright (C) 2026 Manish Sud. All rights reserved.
 1806 
 1807     The functionality available in this script is implemented using Psi4, an
 1808     open source quantum chemistry software package, and RDKit, an open
 1809     source toolkit for cheminformatics developed by Greg Landrum.
 1810 
 1811     This file is part of MayaChemTools.
 1812 
 1813     MayaChemTools is free software; you can redistribute it and/or modify it under
 1814     the terms of the GNU Lesser General Public License as published by the Free
 1815     Software Foundation; either version 3 of the License, or (at your option) any
 1816     later version.
 1817 
 1818 """
 1819 
 1820 if __name__ == "__main__":
 1821     main()