MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4GenerateConstrainedConformers.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2026 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using Psi4, an
    9 # open source quantum chemistry software package, and RDKit, an open
   10 # source toolkit for cheminformatics developed by Greg Landrum.
   11 #
   12 # This file is part of MayaChemTools.
   13 #
   14 # MayaChemTools is free software; you can redistribute it and/or modify it under
   15 # the terms of the GNU Lesser General Public License as published by the Free
   16 # Software Foundation; either version 3 of the License, or (at your option) any
   17 # later version.
   18 #
   19 # MayaChemTools is distributed in the hope that it will be useful, but without
   20 # any warranty; without even the implied warranty of merchantability of fitness
   21 # for a particular purpose.  See the GNU Lesser General Public License for more
   22 # details.
   23 #
   24 # You should have received a copy of the GNU Lesser General Public License
   25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   27 # Boston, MA, 02111-1307, USA.
   28 #
   29 
   30 from __future__ import print_function
   31 
   32 import os
   33 import sys
   34 import time
   35 import re
   36 import shutil
   37 import multiprocessing as mp
   38 
   39 # Psi4 imports...
   40 if hasattr(shutil, "which") and shutil.which("psi4") is None:
   41     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   42     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   43     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   44     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   45     sys.stderr.write("Psi4 environment and try again.\n\n")
   46 
   47 # RDKit imports...
   48 try:
   49     from rdkit import rdBase
   50     from rdkit import Chem
   51     from rdkit.Chem import AllChem, rdMolAlign
   52     from rdkit.Chem import rdFMCS
   53 except ImportError as ErrMsg:
   54     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   55     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   56     sys.exit(1)
   57 
   58 # MayaChemTools imports...
   59 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   60 try:
   61     from docopt import docopt
   62     import MiscUtil
   63     import Psi4Util
   64     import RDKitUtil
   65 except ImportError as ErrMsg:
   66     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   67     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   68     sys.exit(1)
   69 
   70 ScriptName = os.path.basename(sys.argv[0])
   71 Options = {}
   72 OptionsInfo = {}
   73 
   74 
   75 def main():
   76     """Start execution of the script."""
   77 
   78     MiscUtil.PrintInfo(
   79         "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
   80         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
   81     )
   82 
   83     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   84 
   85     # Retrieve command line arguments and options...
   86     RetrieveOptions()
   87 
   88     # Process and validate command line arguments and options...
   89     ProcessOptions()
   90 
   91     # Perform actions required by the script...
   92     GenerateConstrainedConformers()
   93 
   94     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   95     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   96 
   97 
   98 def GenerateConstrainedConformers():
   99     """Generate constrained conformers."""
  100 
  101     # Read and validate reference molecule...
  102     RefMol = RetrieveReferenceMolecule()
  103 
  104     # Setup a molecule reader for input file...
  105     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  106     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  107 
  108     # Set up a molecule writer...
  109     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  110     if Writer is None:
  111         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  112     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  113 
  114     MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount = ProcessMolecules(RefMol, Mols, Writer)
  115 
  116     if Writer is not None:
  117         Writer.close()
  118 
  119     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  120     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  121     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
  122     MiscUtil.PrintInfo(
  123         "Number of molecules failed during conformation generation or minimization: %d" % ConfGenFailedCount
  124     )
  125     MiscUtil.PrintInfo(
  126         "Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + ConfGenFailedCount)
  127     )
  128 
  129 
  130 def ProcessMolecules(RefMol, Mols, Writer):
  131     """Process molecules to generate constrained conformers."""
  132 
  133     if OptionsInfo["MPMode"]:
  134         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
  135     else:
  136         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
  137 
  138 
  139 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
  140     """Process molecules to generate constrained conformers using a single process."""
  141 
  142     # Intialize Psi4...
  143     MiscUtil.PrintInfo("\nInitializing Psi4...")
  144     Psi4Handle = Psi4Util.InitializePsi4(
  145         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  146         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  147         PrintVersion=True,
  148         PrintHeader=True,
  149     )
  150     OptionsInfo["psi4"] = Psi4Handle
  151 
  152     # Setup max iterations global variable...
  153     Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
  154 
  155     # Setup conversion factor for energy units...
  156     SetupEnergyConversionFactor(Psi4Handle)
  157 
  158     (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
  159 
  160     MiscUtil.PrintInfo("\nGenerating constrained conformers and performing energy minimization...")
  161 
  162     for Mol in Mols:
  163         MolCount += 1
  164 
  165         if not CheckAndValidateMolecule(Mol, MolCount):
  166             continue
  167 
  168         # Setup 2D coordinates for SMILES input file...
  169         if OptionsInfo["SMILESInfileStatus"]:
  170             AllChem.Compute2DCoords(Mol)
  171 
  172         ValidMolCount += 1
  173 
  174         # Setup a reference molecule core containing common scaffold atoms...
  175         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  176         if RefMolCore is None:
  177             CoreScaffoldMissingCount += 1
  178             continue
  179 
  180         ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(
  181             Psi4Handle, Mol, RefMolCore, MolCount
  182         )
  183 
  184         if not CalcStatus:
  185             ConfGenFailedCount += 1
  186             continue
  187 
  188         WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
  189 
  190     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
  191 
  192 
  193 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
  194     """Process molecules to generate constrained conformers using multiprocessing."""
  195 
  196     if OptionsInfo["MPLevelConformersMode"]:
  197         return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer)
  198     elif OptionsInfo["MPLevelMoleculesMode"]:
  199         return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer)
  200     else:
  201         MiscUtil.PrintError('The value, %s,  option "--mpLevel" is not supported.' % (OptionsInfo["MPLevel"]))
  202 
  203 
  204 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer):
  205     """Process molecules to generate constrained conformers using multiprocessing at molecules level."""
  206 
  207     MiscUtil.PrintInfo(
  208         "\nGenerating constrained conformers and performing energy minimization using multiprocessing at molecules level..."
  209     )
  210 
  211     MPParams = OptionsInfo["MPParams"]
  212 
  213     # Setup data for initializing a worker process...
  214     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
  215     InitializeWorkerProcessArgs = (
  216         MiscUtil.ObjectToBase64EncodedString(Options),
  217         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  218     )
  219 
  220     # Setup a encoded mols data iterable for a worker process...
  221     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  222 
  223     # Setup process pool along with data initialization for each process...
  224     if not OptionsInfo["QuietMode"]:
  225         MiscUtil.PrintInfo(
  226             "\nConfiguring multiprocessing using %s method..."
  227             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  228         )
  229         MiscUtil.PrintInfo(
  230             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  231             % (
  232                 MPParams["NumProcesses"],
  233                 MPParams["InputDataMode"],
  234                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  235             )
  236         )
  237 
  238     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  239 
  240     # Start processing...
  241     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  242         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  243     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  244         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  245     else:
  246         MiscUtil.PrintError(
  247             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  248         )
  249 
  250     # Print out Psi4 version in the main process...
  251     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  252     Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
  253     OptionsInfo["psi4"] = Psi4Handle
  254 
  255     (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
  256 
  257     for Result in Results:
  258         MolCount += 1
  259         (
  260             MolIndex,
  261             EncodedMol,
  262             EncodedConfMols,
  263             CoreScaffoldMissingStatus,
  264             CalcStatus,
  265             ConfIDs,
  266             ConfEnergyValues,
  267             ConfScaffoldEmbedRMSDValues,
  268         ) = Result
  269 
  270         if EncodedMol is None:
  271             continue
  272         ValidMolCount += 1
  273 
  274         if CoreScaffoldMissingStatus:
  275             CoreScaffoldMissingCount += 1
  276             continue
  277 
  278         if not CalcStatus:
  279             ConfGenFailedCount += 1
  280             continue
  281 
  282         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  283         ConfMols = [RDKitUtil.MolFromBase64EncodedMolString(EncodedConfMol) for EncodedConfMol in EncodedConfMols]
  284 
  285         WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
  286 
  287     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
  288 
  289 
  290 def InitializeWorkerProcess(*EncodedArgs):
  291     """Initialize data for a worker process."""
  292 
  293     global Options, OptionsInfo
  294 
  295     if not OptionsInfo["QuietMode"]:
  296         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  297 
  298     # Decode Options and OptionInfo...
  299     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  300     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  301 
  302     # Decode RefMol...
  303     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
  304 
  305     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  306     # output files for each process...
  307     OptionsInfo["Psi4Initialized"] = False
  308 
  309 
  310 def WorkerProcess(EncodedMolInfo):
  311     """Process data for a worker process."""
  312 
  313     if not OptionsInfo["Psi4Initialized"]:
  314         InitializePsi4ForWorkerProcess()
  315 
  316     MolIndex, EncodedMol = EncodedMolInfo
  317 
  318     MolNum = MolIndex + 1
  319     ConfMols = None
  320     CoreScaffoldMissingStatus = False
  321     CalcStatus = False
  322     ConfIDs = None
  323     ConfEnergyValues = None
  324     ConfScaffoldEmbedRMSDValues = None
  325 
  326     if EncodedMol is None:
  327         return [
  328             MolIndex,
  329             None,
  330             ConfMols,
  331             CoreScaffoldMissingStatus,
  332             CalcStatus,
  333             ConfIDs,
  334             ConfEnergyValues,
  335             ConfScaffoldEmbedRMSDValues,
  336         ]
  337 
  338     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  339     if not CheckAndValidateMolecule(Mol, MolNum):
  340         return [
  341             MolIndex,
  342             None,
  343             ConfMols,
  344             CoreScaffoldMissingStatus,
  345             CalcStatus,
  346             ConfIDs,
  347             ConfEnergyValues,
  348             ConfScaffoldEmbedRMSDValues,
  349         ]
  350 
  351     # Setup 2D coordinates for SMILES input file...
  352     if OptionsInfo["SMILESInfileStatus"]:
  353         AllChem.Compute2DCoords(Mol)
  354 
  355     # Setup a reference molecule core containing common scaffold atoms...
  356     RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum)
  357     if RefMolCore is None:
  358         CoreScaffoldMissingStatus = True
  359         return [
  360             MolIndex,
  361             EncodedMol,
  362             ConfMols,
  363             CoreScaffoldMissingStatus,
  364             CalcStatus,
  365             ConfIDs,
  366             ConfEnergyValues,
  367             ConfScaffoldEmbedRMSDValues,
  368         ]
  369 
  370     # Generate conformers...
  371     ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(
  372         OptionsInfo["psi4"], Mol, RefMolCore, MolNum
  373     )
  374 
  375     EncodedConfMols = None
  376     if ConfMols is not None:
  377         EncodedConfMols = [
  378             RDKitUtil.MolToBase64EncodedMolString(
  379                 ConfMol,
  380                 PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps,
  381             )
  382             for ConfMol in ConfMols
  383         ]
  384 
  385     return [
  386         MolIndex,
  387         EncodedMol,
  388         EncodedConfMols,
  389         CoreScaffoldMissingStatus,
  390         CalcStatus,
  391         ConfIDs,
  392         ConfEnergyValues,
  393         ConfScaffoldEmbedRMSDValues,
  394     ]
  395 
  396 
  397 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer):
  398     """Process and minimize molecules using multiprocessing at conformers level."""
  399 
  400     MiscUtil.PrintInfo(
  401         "\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level..."
  402     )
  403 
  404     (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
  405 
  406     for Mol in Mols:
  407         MolCount += 1
  408 
  409         if not CheckAndValidateMolecule(Mol, MolCount):
  410             continue
  411 
  412         # Setup 2D coordinates for SMILES input file...
  413         if OptionsInfo["SMILESInfileStatus"]:
  414             AllChem.Compute2DCoords(Mol)
  415 
  416         ValidMolCount += 1
  417 
  418         # Setup a reference molecule core containing common scaffold atoms...
  419         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  420         if RefMolCore is None:
  421             CoreScaffoldMissingCount += 1
  422             continue
  423 
  424         ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = (
  425             ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount)
  426         )
  427 
  428         if not CalcStatus:
  429             ConfGenFailedCount += 1
  430             continue
  431 
  432         WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
  433 
  434     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
  435 
  436 
  437 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum):
  438     """Generate constrained conformers and minimize them using multiple processes."""
  439 
  440     # Add hydrogens...
  441     Mol = Chem.AddHs(Mol, addCoords=True)
  442 
  443     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  444 
  445     # Setup constrained conformers...
  446     MolConfs, MolConfsStatus, MolConfIDs = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  447     if not MolConfsStatus:
  448         if not OptionsInfo["QuietMode"]:
  449             MiscUtil.PrintWarning(
  450                 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
  451                 % MolName
  452             )
  453         return (None, False, None, None, None)
  454 
  455     MPParams = OptionsInfo["MPParams"]
  456 
  457     # Setup data for initializing a worker process...
  458     OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore)
  459     InitializeWorkerProcessArgs = (
  460         MiscUtil.ObjectToBase64EncodedString(Options),
  461         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  462     )
  463 
  464     # Setup a encoded mols data iterable for a worker process...
  465     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringsWithIDs(MolConfs, MolConfIDs)
  466 
  467     # Setup process pool along with data initialization for each process...
  468     if not OptionsInfo["QuietMode"]:
  469         MiscUtil.PrintInfo(
  470             "\nConfiguring multiprocessing using %s method..."
  471             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  472         )
  473         MiscUtil.PrintInfo(
  474             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  475             % (
  476                 MPParams["NumProcesses"],
  477                 MPParams["InputDataMode"],
  478                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  479             )
  480         )
  481 
  482     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
  483 
  484     # Start processing...
  485     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  486         Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  487     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  488         Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  489     else:
  490         MiscUtil.PrintError(
  491             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  492         )
  493 
  494     MolConfIDs = []
  495     MolConfs = []
  496     CalcEnergyMap = {}
  497     ScaffoldEmbedRMSDMap = {}
  498     CalcFailedCount = 0
  499 
  500     for Result in Results:
  501         ConfID, EncodedMolConf, CalcStatus, Energy, ScaffoldEmbedRMSD = Result
  502 
  503         if EncodedMolConf is None:
  504             CalcFailedCount += 1
  505             continue
  506 
  507         if not CalcStatus:
  508             CalcFailedCount += 1
  509             continue
  510 
  511         MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  512 
  513         MolConfIDs.append(ConfID)
  514         MolConfs.append(MolConf)
  515         CalcEnergyMap[ConfID] = Energy
  516         ScaffoldEmbedRMSDMap[ConfID] = ScaffoldEmbedRMSD
  517 
  518     if CalcFailedCount:
  519         return (None, False, None, None, None)
  520 
  521     # Filter conformers...
  522     SelectedMolConfs, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues = (
  523         FilterMolConformers(Mol, MolNum, MolConfs, MolConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap)
  524     )
  525 
  526     return (SelectedMolConfs, True, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues)
  527 
  528 
  529 def InitializeConformerWorkerProcess(*EncodedArgs):
  530     """Initialize data for a conformer worker process."""
  531 
  532     global Options, OptionsInfo
  533 
  534     if not OptionsInfo["QuietMode"]:
  535         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  536 
  537     # Decode Options and OptionInfo...
  538     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  539     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  540 
  541     # Decode RefMol...
  542     OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"])
  543 
  544     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  545     # output files for each process...
  546     OptionsInfo["Psi4Initialized"] = False
  547 
  548 
  549 def ConformerWorkerProcess(EncodedMolInfo):
  550     """Process data for a conformer worker process."""
  551 
  552     if not OptionsInfo["Psi4Initialized"]:
  553         InitializePsi4ForWorkerProcess()
  554 
  555     MolConfID, EncodedMolConf = EncodedMolInfo
  556 
  557     MolConfNum = MolConfID
  558     CalcStatus = False
  559     Energy = None
  560     ScaffoldEmbedRMSD = None
  561 
  562     if EncodedMolConf is None:
  563         return [MolConfNum, None, CalcStatus, Energy, ScaffoldEmbedRMSD]
  564 
  565     MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
  566     MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum)
  567 
  568     if not OptionsInfo["QuietMode"]:
  569         MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum))
  570 
  571     # Perform Psi4 constrained minimization...
  572     CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(
  573         OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum
  574     )
  575     if not CalcStatus:
  576         if not OptionsInfo["QuietMode"]:
  577             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolConfName))
  578         return [MolConfNum, None, CalcStatus, Energy, ScaffoldEmbedRMSD]
  579 
  580     if OptionsInfo["ScaffoldRMSDOut"]:
  581         ScaffoldEmbedRMSD = "%.4f" % float(MolConf.GetProp("EmbedRMS"))
  582     MolConf.ClearProp("EmbedRMS")
  583 
  584     return [
  585         MolConfNum,
  586         RDKitUtil.MolToBase64EncodedMolString(
  587             MolConf, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
  588         ),
  589         CalcStatus,
  590         Energy,
  591         ScaffoldEmbedRMSD,
  592     ]
  593 
  594 
  595 def InitializePsi4ForWorkerProcess():
  596     """Initialize Psi4 for a worker process."""
  597 
  598     if OptionsInfo["Psi4Initialized"]:
  599         return
  600 
  601     OptionsInfo["Psi4Initialized"] = True
  602 
  603     if OptionsInfo["MPLevelConformersMode"] and re.match(
  604         "auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I
  605     ):
  606         # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
  607         OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
  608     else:
  609         # Update output file...
  610         OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
  611             OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
  612         )
  613 
  614     # Intialize Psi4...
  615     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
  616         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  617         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  618         PrintVersion=False,
  619         PrintHeader=True,
  620     )
  621 
  622     # Setup max iterations global variable...
  623     Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
  624 
  625     # Setup conversion factor for energy units...
  626     SetupEnergyConversionFactor(OptionsInfo["psi4"])
  627 
  628 
  629 def RetrieveReferenceMolecule():
  630     """Retrieve and validate reference molecule."""
  631 
  632     RefFile = OptionsInfo["RefFile"]
  633 
  634     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
  635     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
  636     ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(
  637         RefFile, **OptionsInfo["InfileParams"]
  638     )
  639 
  640     if ValidRefMolCount == 0:
  641         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
  642     elif ValidRefMolCount > 1:
  643         MiscUtil.PrintWarning(
  644             "The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..."
  645             % (RefFile, ValidRefMolCount)
  646         )
  647 
  648     RefMol = ValidRefMols[0]
  649 
  650     if OptionsInfo["UseScaffoldSMARTS"]:
  651         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  652         if ScaffoldPatternMol is None:
  653             MiscUtil.PrintError(
  654                 'Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is not valid.'
  655                 % (OptionsInfo["ScaffoldSMARTS"])
  656             )
  657 
  658         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
  659             MiscUtil.PrintError(
  660                 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option, is missing in the first valid reference molecule.'
  661                 % (OptionsInfo["ScaffoldSMARTS"])
  662             )
  663 
  664     return RefMol
  665 
  666 
  667 def SetupCoreScaffold(RefMol, Mol, MolCount):
  668     """Setup a reference molecule core containing common scaffold atoms between
  669     a pair of molecules."""
  670 
  671     if OptionsInfo["UseScaffoldMCS"]:
  672         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
  673     elif OptionsInfo["UseScaffoldSMARTS"]:
  674         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
  675     else:
  676         MiscUtil.PrintError(
  677             'The  value, %s, specified for  "-s, --scaffold" option is not supported.' % (OptionsInfo["Scaffold"])
  678         )
  679 
  680 
  681 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
  682     """Setup a reference molecule core containing common scaffold atoms between
  683     a pair of molecules using MCS."""
  684 
  685     MCSParams = OptionsInfo["MCSParams"]
  686     Mols = [RefMol, Mol]
  687 
  688     MCSResultObject = rdFMCS.FindMCS(
  689         Mols,
  690         maximizeBonds=MCSParams["MaximizeBonds"],
  691         threshold=MCSParams["Threshold"],
  692         timeout=MCSParams["TimeOut"],
  693         verbose=MCSParams["Verbose"],
  694         matchValences=MCSParams["MatchValences"],
  695         ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"],
  696         completeRingsOnly=MCSParams["CompleteRingsOnly"],
  697         matchChiralTag=MCSParams["MatchChiralTag"],
  698         atomCompare=MCSParams["AtomCompare"],
  699         bondCompare=MCSParams["BondCompare"],
  700         seedSmarts=MCSParams["SeedSMARTS"],
  701     )
  702 
  703     if MCSResultObject.canceled:
  704         if not OptionsInfo["QuietMode"]:
  705             MiscUtil.PrintWarning(
  706                 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
  707                 % (RDKitUtil.GetMolName(Mol, MolCount))
  708             )
  709         return None
  710 
  711     CoreNumAtoms = MCSResultObject.numAtoms
  712     CoreNumBonds = MCSResultObject.numBonds
  713 
  714     SMARTSCore = MCSResultObject.smartsString
  715 
  716     if not len(SMARTSCore):
  717         if not OptionsInfo["QuietMode"]:
  718             MiscUtil.PrintWarning(
  719                 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
  720                 % (RDKitUtil.GetMolName(Mol, MolCount))
  721             )
  722         return None
  723 
  724     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
  725         if not OptionsInfo["QuietMode"]:
  726             MiscUtil.PrintWarning(
  727                 'Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumAtoms" parameter in  "-m, --mcsParams" option.'
  728                 % (CoreNumAtoms, MCSParams["MinNumAtoms"])
  729             )
  730         return None
  731 
  732     if CoreNumBonds < MCSParams["MinNumBonds"]:
  733         if not OptionsInfo["QuietMode"]:
  734             MiscUtil.PrintWarning(
  735                 'Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumBonds" parameter in  "-m, --mcsParams" option.'
  736                 % (CoreNumBonds, MCSParams["MinNumBonds"])
  737             )
  738         return None
  739 
  740     return GenerateCoreMol(RefMol, SMARTSCore)
  741 
  742 
  743 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
  744     """Setup a reference molecule core containing common scaffold atoms between
  745     a pair of molecules using specified SMARTS."""
  746 
  747     if OptionsInfo["ScaffoldPatternMol"] is None:
  748         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  749 
  750     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
  751         if not OptionsInfo["QuietMode"]:
  752             MiscUtil.PrintWarning(
  753                 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is missing in input molecule,  %s.'
  754                 % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))
  755             )
  756         return None
  757 
  758     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
  759 
  760 
  761 def GenerateCoreMol(RefMol, SMARTSCore):
  762     """Generate core molecule for embedding."""
  763 
  764     # Create a molecule corresponding to core atoms...
  765     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
  766 
  767     # Setup a ref molecule containing core atoms with dummy atoms as
  768     # attachment points for atoms around the core atoms...
  769     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
  770 
  771     # Delete any substructures containing dummy atoms..
  772     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles("*"))
  773     RefMolCore.UpdatePropertyCache()
  774 
  775     return RefMolCore
  776 
  777 
  778 def GenerateMolConformers(Psi4Handle, Mol, RefMolCore, MolNum=None):
  779     """Generate constrained conformers."""
  780 
  781     # Add hydrogens...
  782     Mol = Chem.AddHs(Mol, addCoords=True)
  783 
  784     MolName = RDKitUtil.GetMolName(Mol, MolNum)
  785 
  786     # Setup constrained conformers...
  787     MolConfs, MolConfsStatus, MolConfIDs = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
  788     if not MolConfsStatus:
  789         if not OptionsInfo["QuietMode"]:
  790             MiscUtil.PrintWarning(
  791                 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
  792                 % MolName
  793             )
  794         return (None, False, None, None, None)
  795 
  796     CalcEnergyMap = {}
  797     ScaffoldEmbedRMSDMap = {}
  798 
  799     for MolConfIndex, MolConf in enumerate(MolConfs):
  800         MolConfID = MolConfIDs[MolConfIndex]
  801 
  802         if not OptionsInfo["QuietMode"]:
  803             MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, MolConfID))
  804 
  805         # Perform Psi4 minimization...
  806         CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum)
  807         if not CalcStatus:
  808             if not OptionsInfo["QuietMode"]:
  809                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
  810             return (None, False, None, None, None)
  811 
  812         CalcEnergyMap[MolConfID] = Energy
  813 
  814         if OptionsInfo["ScaffoldRMSDOut"]:
  815             ScaffoldEmbedRMSDMap[MolConfID] = "%.4f" % float(MolConf.GetProp("EmbedRMS"))
  816         MolConf.ClearProp("EmbedRMS")
  817 
  818     # Filter conformers...
  819     SelectedMolConfs, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues = (
  820         FilterMolConformers(Mol, MolNum, MolConfs, MolConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap)
  821     )
  822 
  823     return (SelectedMolConfs, True, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues)
  824 
  825 
  826 def FilterMolConformers(Mol, MolNum, MolConfs, ConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap):
  827     """Filter conformers for a molecule."""
  828 
  829     # Sort conformers by energy...
  830     SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
  831 
  832     # Setup a map from ConfID to MolConf...
  833     MolConfsMap = {}
  834     for MolConfIndex, ConfID in enumerate(ConfIDs):
  835         MolConfsMap[ConfID] = MolConfs[MolConfIndex]
  836 
  837     MinEnergyConfID = SortedConfIDs[0]
  838     MinConfEnergy = CalcEnergyMap[MinEnergyConfID]
  839     MinEnergyMolConf = MolConfsMap[MinEnergyConfID]
  840 
  841     EnergyWindow = OptionsInfo["EnergyWindow"]
  842 
  843     EnergyRMSDCutoff = OptionsInfo["EnergyRMSDCutoff"]
  844     ApplyEnergyRMSDCutoff = True if EnergyRMSDCutoff > 0 else False
  845 
  846     EnergyRMSDCutoffLowest = OptionsInfo["EnergyRMSDCutoffModeLowest"]
  847     EnergyRMSDCalcModeBest = OptionsInfo["EnergyRMSDCalcModeBest"]
  848 
  849     SelectedConfIDs = []
  850 
  851     ConfCount = 0
  852     IgnoredByEnergyConfCount = 0
  853     IgnoredByRMSDConfCount = 0
  854 
  855     FirstConf = True
  856 
  857     for ConfID in SortedConfIDs:
  858         if FirstConf:
  859             FirstConf = False
  860             ConfCount += 1
  861             SelectedConfIDs.append(ConfID)
  862             continue
  863 
  864         ConfEnergyDiff = abs(CalcEnergyMap[ConfID] - MinConfEnergy)
  865         if ConfEnergyDiff > EnergyWindow:
  866             IgnoredByEnergyConfCount += 1
  867             continue
  868 
  869         if ApplyEnergyRMSDCutoff:
  870             IgnoreConf = False
  871             ProbeMolConf = Chem.Mol(MolConfsMap[ConfID])
  872 
  873             if EnergyRMSDCutoffLowest:
  874                 # Compare RMSD with the lowest energy conformation...
  875                 RefMolConf = Chem.Mol(MinEnergyMolConf)
  876                 if EnergyRMSDCalcModeBest:
  877                     CalcRMSD = AllChem.GetBestRMS(ProbeMolConf, RefMolConf)
  878                 else:
  879                     CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
  880                 if CalcRMSD < EnergyRMSDCutoff:
  881                     IgnoreConf = True
  882             else:
  883                 for SelectedConfID in SelectedConfIDs:
  884                     RefMolConf = Chem.Mol(MolConfsMap[SelectedConfID])
  885                     if EnergyRMSDCalcModeBest:
  886                         CalcRMSD = AllChem.GetBestRMS(ProbeMolConf, RefMolConf)
  887                     else:
  888                         CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
  889                     if CalcRMSD < EnergyRMSDCutoff:
  890                         IgnoreConf = True
  891                         break
  892             if IgnoreConf:
  893                 IgnoredByRMSDConfCount += 1
  894                 continue
  895 
  896         ConfCount += 1
  897         SelectedConfIDs.append(ConfID)
  898 
  899     if not OptionsInfo["QuietMode"]:
  900         MiscUtil.PrintInfo(
  901             "\nTotal Number of conformations generated for %s: %d" % (RDKitUtil.GetMolName(Mol, MolNum), ConfCount)
  902         )
  903         MiscUtil.PrintInfo(
  904             "Number of conformations ignored due to energy window cutoff: %d" % (IgnoredByEnergyConfCount)
  905         )
  906         if ApplyEnergyRMSDCutoff:
  907             MiscUtil.PrintInfo(
  908                 "Number of conformations ignored due to energy RMSD cutoff:  %d" % (IgnoredByRMSDConfCount)
  909             )
  910 
  911     # Setup selected conformer molecules...
  912     SelectedConfMols = [MolConfsMap[ConfID] for ConfID in SelectedConfIDs]
  913 
  914     #  Setup energies...
  915     SelectedConfEnergies = None
  916     if OptionsInfo["EnergyOut"]:
  917         SelectedConfEnergies = []
  918         for ConfID in SelectedConfIDs:
  919             Energy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[ConfID])
  920             SelectedConfEnergies.append(Energy)
  921 
  922     # Setup scaffold RMSD values...
  923     SelectedConfScaffoldEmbedRMSDValues = None
  924     if OptionsInfo["ScaffoldRMSDOut"]:
  925         SelectedConfScaffoldEmbedRMSDValues = []
  926         for ConfID in SelectedConfIDs:
  927             # MolConf = MolConfsMap[ConfID]
  928             SelectedConfScaffoldEmbedRMSDValues.append(ScaffoldEmbedRMSDMap[ConfID])
  929 
  930     return (SelectedConfMols, SelectedConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues)
  931 
  932 
  933 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID=-1):
  934     """Constrain and Minimize molecule using Psi4."""
  935 
  936     # Setup a list for constrained atoms...
  937     ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore)
  938     if len(ConstrainedAtomIndices) == 0:
  939         return (False, None)
  940 
  941     # Setup a Psi4Mol...
  942     Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
  943     if Psi4Mol is None:
  944         return (False, None)
  945 
  946     #  Setup reference wave function...
  947     Reference = SetupReferenceWavefunction(Mol)
  948     Psi4Handle.set_options({"Reference": Reference})
  949 
  950     # Setup method name and basis set...
  951     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  952 
  953     # Setup freeze list for constrained atoms...
  954     FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices]
  955     FreezeXYZString = " %s " % " ".join(FreezeXYZList)
  956     Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString})
  957 
  958     # Optimize geometry...
  959     Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(
  960         Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
  961     )
  962 
  963     if not Status:
  964         PerformPsi4Cleanup(Psi4Handle)
  965         return (False, None)
  966 
  967     # Update atom positions...
  968     AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms=True)
  969     RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
  970 
  971     # Convert energy units...
  972     if OptionsInfo["ApplyEnergyConversionFactor"]:
  973         Energy = Energy * OptionsInfo["EnergyConversionFactor"]
  974 
  975     # Clean up
  976     PerformPsi4Cleanup(Psi4Handle)
  977 
  978     return (True, Energy)
  979 
  980 
  981 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum=None):
  982     """Constrain, embed, and minimize molecule."""
  983 
  984     # Setup forcefield function to use for constrained minimization...
  985     ForceFieldFunction = None
  986     ForceFieldName = None
  987     if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
  988         ForceFieldFunction = lambda mol, confId=-1: AllChem.UFFGetMoleculeForceField(mol, confId=confId)
  989         ForceFieldName = "UFF"
  990     else:
  991         ForceFieldFunction = lambda mol, confId=-1: AllChem.MMFFGetMoleculeForceField(
  992             mol,
  993             AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=OptionsInfo["ConfGenerationParams"]["MMFFVariant"]),
  994             confId=confId,
  995         )
  996         ForceFieldName = "MMFF"
  997 
  998     if ForceFieldFunction is None:
  999         if not OptionsInfo["QuietMode"]:
 1000             MiscUtil.PrintWarning(
 1001                 "Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))
 1002             )
 1003         return (None, False, None)
 1004 
 1005     MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
 1006     EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
 1007     UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
 1008     ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
 1009     UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
 1010     UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"]
 1011 
 1012     MolConfs = []
 1013     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
 1014 
 1015     for ConfID in ConfIDs:
 1016         try:
 1017             MolConf = Chem.Mol(Mol)
 1018             AllChem.ConstrainedEmbed(
 1019                 MolConf,
 1020                 RefMolCore,
 1021                 useTethers=UseTethers,
 1022                 coreConfId=-1,
 1023                 randomseed=ConfID,
 1024                 getForceField=ForceFieldFunction,
 1025                 enforceChirality=EnforceChirality,
 1026                 useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
 1027                 useBasicKnowledge=UseBasicKnowledge,
 1028                 ETversion=ETVersion,
 1029             )
 1030         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
 1031             if not OptionsInfo["QuietMode"]:
 1032                 MiscUtil.PrintWarning(
 1033                     "Constrained embedding couldn't  be performed for molecule %s:\n%s\n"
 1034                     % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)
 1035                 )
 1036             return (None, False, None)
 1037 
 1038         MolConfs.append(MolConf)
 1039 
 1040     return FilterConstrainedConformationsByRMSD(Mol, MolConfs, ConfIDs, MolNum)
 1041 
 1042 
 1043 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolConfIDs, MolNum=None):
 1044     """Filter contarained conformations by RMSD."""
 1045 
 1046     EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
 1047     if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0:
 1048         if not OptionsInfo["QuietMode"]:
 1049             MiscUtil.PrintInfo(
 1050                 "\nGenerating initial ensemble of  constrained conformations by distance geometry  and forcefield for %s - EmbedRMSDCutoff: None; Size: %s"
 1051                 % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))
 1052             )
 1053         return (MolConfs, True, MolConfIDs)
 1054 
 1055     FirstMolConf = True
 1056     SelectedMolConfs = []
 1057     SelectedMolConfIDs = []
 1058     for MolConfIndex, MolConf in enumerate(MolConfs):
 1059         if FirstMolConf:
 1060             FirstMolConf = False
 1061             SelectedMolConfs.append(MolConf)
 1062             SelectedMolConfIDs.append(MolConfIDs[MolConfIndex])
 1063             continue
 1064 
 1065         # Compare RMSD against all selected conformers...
 1066         IgnoreConf = False
 1067         ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf))
 1068         for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs):
 1069             RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf))
 1070             CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
 1071 
 1072             if CalcRMSD < EmbedRMSDCutoff:
 1073                 IgnoreConf = True
 1074                 break
 1075 
 1076         if IgnoreConf:
 1077             continue
 1078 
 1079         SelectedMolConfs.append(MolConf)
 1080         SelectedMolConfIDs.append(MolConfIDs[MolConfIndex])
 1081 
 1082     if not OptionsInfo["QuietMode"]:
 1083         MiscUtil.PrintInfo(
 1084             "\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s"
 1085             % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))
 1086         )
 1087 
 1088     return (SelectedMolConfs, True, SelectedMolConfIDs)
 1089 
 1090 
 1091 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens=False):
 1092     """Setup a list of atom indices to be constrained during Psi4 minimizaiton."""
 1093 
 1094     AtomIndices = []
 1095 
 1096     # Collect matched heavy atoms along with attached hydrogens...
 1097     for AtomIndex in Mol.GetSubstructMatch(RefMolCore):
 1098         Atom = Mol.GetAtomWithIdx(AtomIndex)
 1099         if Atom.GetAtomicNum() == 1:
 1100             continue
 1101 
 1102         AtomIndices.append(AtomIndex)
 1103         for AtomNbr in Atom.GetNeighbors():
 1104             if AtomNbr.GetAtomicNum() == 1:
 1105                 if ConstrainHydrogens:
 1106                     AtomNbrIndex = AtomNbr.GetIdx()
 1107                     AtomIndices.append(AtomNbrIndex)
 1108 
 1109     AtomIndices = sorted(AtomIndices)
 1110 
 1111     # Atom indices start from 1 for Psi4 instead 0 for RDKit...
 1112     AtomIndices = [AtomIndex + 1 for AtomIndex in AtomIndices]
 1113 
 1114     return AtomIndices
 1115 
 1116 
 1117 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID=-1):
 1118     """Setup a Psi4 molecule object."""
 1119 
 1120     # Turn off recentering and reorientation to perform optimization in the
 1121     # constrained coordinate space...
 1122     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=True, NoReorient=True)
 1123 
 1124     try:
 1125         Psi4Mol = Psi4Handle.geometry(MolGeometry)
 1126     except Exception as ErrMsg:
 1127         Psi4Mol = None
 1128         if not OptionsInfo["QuietMode"]:
 1129             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
 1130             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 1131             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
 1132 
 1133     return Psi4Mol
 1134 
 1135 
 1136 def PerformPsi4Cleanup(Psi4Handle):
 1137     """Perform clean up."""
 1138 
 1139     # Clean up after Psi4 run...
 1140     Psi4Handle.core.clean()
 1141 
 1142     # Clean up any leftover scratch files...
 1143     if OptionsInfo["MPMode"]:
 1144         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
 1145 
 1146 
 1147 def CheckAndValidateMolecule(Mol, MolCount=None):
 1148     """Validate molecule for Psi4 calculations."""
 1149 
 1150     if Mol is None:
 1151         if not OptionsInfo["QuietMode"]:
 1152             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
 1153         return False
 1154 
 1155     MolName = RDKitUtil.GetMolName(Mol, MolCount)
 1156     if not OptionsInfo["QuietMode"]:
 1157         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
 1158 
 1159     if RDKitUtil.IsMolEmpty(Mol):
 1160         if not OptionsInfo["QuietMode"]:
 1161             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
 1162         return False
 1163 
 1164     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
 1165         if not OptionsInfo["QuietMode"]:
 1166             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
 1167         return False
 1168 
 1169     return True
 1170 
 1171 
 1172 def SetupMethodNameAndBasisSet(Mol):
 1173     """Setup method name and basis set."""
 1174 
 1175     MethodName = OptionsInfo["MethodName"]
 1176     if OptionsInfo["MethodNameAuto"]:
 1177         MethodName = "B3LYP"
 1178 
 1179     BasisSet = OptionsInfo["BasisSet"]
 1180     if OptionsInfo["BasisSetAuto"]:
 1181         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
 1182 
 1183     return (MethodName, BasisSet)
 1184 
 1185 
 1186 def SetupReferenceWavefunction(Mol):
 1187     """Setup reference wavefunction."""
 1188 
 1189     Reference = OptionsInfo["Reference"]
 1190     if OptionsInfo["ReferenceAuto"]:
 1191         Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
 1192 
 1193     return Reference
 1194 
 1195 
 1196 def SetupEnergyConversionFactor(Psi4Handle):
 1197     """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
 1198 
 1199     EnergyUnits = OptionsInfo["EnergyUnits"]
 1200 
 1201     ApplyConversionFactor = True
 1202     if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
 1203         ConversionFactor = Psi4Handle.constants.hartree2kcalmol
 1204     elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
 1205         ConversionFactor = Psi4Handle.constants.hartree2kJmol
 1206     elif re.match("^eV$", EnergyUnits, re.I):
 1207         ConversionFactor = Psi4Handle.constants.hartree2ev
 1208     else:
 1209         ApplyConversionFactor = False
 1210         ConversionFactor = 1.0
 1211 
 1212     OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
 1213     OptionsInfo["EnergyConversionFactor"] = ConversionFactor
 1214 
 1215 
 1216 def WriteMolConformers(Writer, Mol, MolNum, ConfMols, ConfIDs, ConfEnergyValues=None, ConfScaffoldEmbedRMSDValues=None):
 1217     """Write molecule coformers."""
 1218 
 1219     if ConfMols is None:
 1220         return
 1221 
 1222     for Index, ConfMol in enumerate(ConfMols):
 1223         ConfMolName = RDKitUtil.GetMolName(Mol, MolNum)
 1224         SetConfMolName(ConfMol, ConfMolName, ConfIDs[Index])
 1225 
 1226         if ConfScaffoldEmbedRMSDValues is not None:
 1227             ConfMol.SetProp("CoreScaffoldEmbedRMSD", ConfScaffoldEmbedRMSDValues[Index])
 1228 
 1229         if ConfEnergyValues is not None:
 1230             ConfMol.SetProp(OptionsInfo["EnergyDataFieldLabel"], ConfEnergyValues[Index])
 1231 
 1232         Writer.write(ConfMol)
 1233 
 1234 
 1235 def SetConfMolName(Mol, MolName, ConfCount):
 1236     """Set conf mol name."""
 1237 
 1238     ConfName = "%s_Conf%d" % (MolName, ConfCount)
 1239     Mol.SetProp("_Name", ConfName)
 1240 
 1241 
 1242 def ProcessMCSParameters():
 1243     """Set up and process MCS parameters."""
 1244 
 1245     SetupMCSParameters()
 1246     ProcessSpecifiedMCSParameters()
 1247 
 1248 
 1249 def SetupMCSParameters():
 1250     """Set up default MCS parameters."""
 1251 
 1252     OptionsInfo["MCSParams"] = {
 1253         "MaximizeBonds": True,
 1254         "Threshold": 0.9,
 1255         "TimeOut": 3600,
 1256         "Verbose": False,
 1257         "MatchValences": True,
 1258         "MatchChiralTag": False,
 1259         "RingMatchesRingOnly": True,
 1260         "CompleteRingsOnly": True,
 1261         "AtomCompare": rdFMCS.AtomCompare.CompareElements,
 1262         "BondCompare": rdFMCS.BondCompare.CompareOrder,
 1263         "SeedSMARTS": "",
 1264         "MinNumAtoms": 1,
 1265         "MinNumBonds": 0,
 1266     }
 1267 
 1268 
 1269 def ProcessSpecifiedMCSParameters():
 1270     """Process specified MCS parameters."""
 1271 
 1272     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
 1273         # Nothing to process...
 1274         return
 1275 
 1276     # Parse specified parameters...
 1277     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
 1278     if not MCSParams:
 1279         MiscUtil.PrintError('No valid parameter name and value pairs specified using "-m, --mcsParams" option.')
 1280 
 1281     MCSParamsWords = MCSParams.split(",")
 1282     if len(MCSParamsWords) % 2:
 1283         MiscUtil.PrintError(
 1284             'The number of comma delimited paramater names and values, %d, specified using "-m, --mcsParams" option must be an even number.'
 1285             % (len(MCSParamsWords))
 1286         )
 1287 
 1288     # Setup  canonical parameter names...
 1289     ValidParamNames = []
 1290     CanonicalParamNamesMap = {}
 1291     for ParamName in sorted(OptionsInfo["MCSParams"]):
 1292         ValidParamNames.append(ParamName)
 1293         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 1294 
 1295     # Validate and set paramater names and value...
 1296     for Index in range(0, len(MCSParamsWords), 2):
 1297         Name = MCSParamsWords[Index]
 1298         Value = MCSParamsWords[Index + 1]
 1299 
 1300         CanonicalName = Name.lower()
 1301         if CanonicalName not in CanonicalParamNamesMap:
 1302             MiscUtil.PrintError(
 1303                 'The parameter name, %s, specified using "-m, --mcsParams" option is not a valid name. Supported parameter names: %s'
 1304                 % (Name, " ".join(ValidParamNames))
 1305             )
 1306 
 1307         ParamName = CanonicalParamNamesMap[CanonicalName]
 1308         if re.match("^Threshold$", ParamName, re.I):
 1309             Value = float(Value)
 1310             if Value <= 0.0 or Value > 1.0:
 1311                 MiscUtil.PrintError(
 1312                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0'
 1313                     % (Value, Name)
 1314                 )
 1315             ParamValue = Value
 1316         elif re.match("^Timeout$", ParamName, re.I):
 1317             Value = int(Value)
 1318             if Value <= 0:
 1319                 MiscUtil.PrintError(
 1320                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: > 0'
 1321                     % (Value, Name)
 1322                 )
 1323             ParamValue = Value
 1324         elif re.match("^MinNumAtoms$", ParamName, re.I):
 1325             Value = int(Value)
 1326             if Value < 1:
 1327                 MiscUtil.PrintError(
 1328                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: >= 1'
 1329                     % (Value, Name)
 1330                 )
 1331             ParamValue = Value
 1332         elif re.match("^MinNumBonds$", ParamName, re.I):
 1333             Value = int(Value)
 1334             if Value < 0:
 1335                 MiscUtil.PrintError(
 1336                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: >=0 '
 1337                     % (Value, Name)
 1338                 )
 1339             ParamValue = Value
 1340         elif re.match("^AtomCompare$", ParamName, re.I):
 1341             if re.match("^CompareAny$", Value, re.I):
 1342                 ParamValue = rdFMCS.AtomCompare.CompareAny
 1343             elif re.match("^CompareElements$", Value, re.I):
 1344                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
 1345             elif re.match("^CompareIsotopes$", Value, re.I):
 1346                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
 1347             else:
 1348                 MiscUtil.PrintError(
 1349                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes'
 1350                     % (Value, Name)
 1351                 )
 1352         elif re.match("^BondCompare$", ParamName, re.I):
 1353             if re.match("^CompareAny$", Value, re.I):
 1354                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
 1355             elif re.match("^CompareOrder$", Value, re.I):
 1356                 ParamValue = rdFMCS.BondCompare.CompareOrder
 1357             elif re.match("^CompareOrderExact$", Value, re.I):
 1358                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
 1359             else:
 1360                 MiscUtil.PrintError(
 1361                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact'
 1362                     % (Value, Name)
 1363                 )
 1364         elif re.match("^SeedSMARTS$", ParamName, re.I):
 1365             if not len(Value):
 1366                 MiscUtil.PrintError(
 1367                     'The parameter value specified using "-m, --mcsParams" option  for parameter, %s, is empty. '
 1368                     % (Name)
 1369                 )
 1370             ParamValue = Value
 1371         else:
 1372             if not re.match("^(Yes|No|True|False)$", Value, re.I):
 1373                 MiscUtil.PrintError(
 1374                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: Yes No True False'
 1375                     % (Value, Name)
 1376                 )
 1377             ParamValue = False
 1378             if re.match("^(Yes|True)$", Value, re.I):
 1379                 ParamValue = True
 1380 
 1381         # Set value...
 1382         OptionsInfo["MCSParams"][ParamName] = ParamValue
 1383 
 1384 
 1385 def ProcessOptions():
 1386     """Process and validate command line arguments and options."""
 1387 
 1388     MiscUtil.PrintInfo("Processing options...")
 1389 
 1390     # Validate options...
 1391     ValidateOptions()
 1392 
 1393     OptionsInfo["Infile"] = Options["--infile"]
 1394     OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
 1395     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1396     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
 1397         "--infileParams",
 1398         Options["--infileParams"],
 1399         InfileName=Options["--infile"],
 1400         ParamsDefaultInfo=ParamsDefaultInfoOverride,
 1401     )
 1402 
 1403     OptionsInfo["RefFile"] = Options["--reffile"]
 1404 
 1405     OptionsInfo["Outfile"] = Options["--outfile"]
 1406     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
 1407         "--outfileParams", Options["--outfileParams"]
 1408     )
 1409 
 1410     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1411 
 1412     OptionsInfo["Scaffold"] = Options["--scaffold"]
 1413     if re.match("^auto$", Options["--scaffold"], re.I):
 1414         UseScaffoldMCS = True
 1415         UseScaffoldSMARTS = False
 1416         ScaffoldSMARTS = None
 1417     else:
 1418         UseScaffoldMCS = False
 1419         UseScaffoldSMARTS = True
 1420         ScaffoldSMARTS = OptionsInfo["Scaffold"]
 1421 
 1422     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
 1423     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
 1424     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
 1425     OptionsInfo["ScaffoldPatternMol"] = None
 1426 
 1427     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
 1428     ProcessMCSParameters()
 1429 
 1430     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
 1431 
 1432     # Method, basis set, and reference wavefunction...
 1433     OptionsInfo["BasisSet"] = Options["--basisSet"]
 1434     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
 1435 
 1436     OptionsInfo["MethodName"] = Options["--methodName"]
 1437     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
 1438 
 1439     OptionsInfo["Reference"] = Options["--reference"]
 1440     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
 1441 
 1442     # Run and options parameters...
 1443     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
 1444         "--psi4OptionsParams", Options["--psi4OptionsParams"]
 1445     )
 1446     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
 1447         "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
 1448     )
 1449 
 1450     # Conformer generation paramaters...
 1451     ParamsDefaultInfoOverride = {"MaxConfs": 50}
 1452     OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters(
 1453         "--confParams", Options["--confParams"], ParamsDefaultInfoOverride
 1454     )
 1455 
 1456     # Write energy parameters...
 1457     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 1458     OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
 1459 
 1460     EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
 1461     if re.match("^auto$", EnergyDataFieldLabel, re.I):
 1462         EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
 1463     OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
 1464 
 1465     OptionsInfo["EnergyRMSDCalcMode"] = Options["--energyRMSDCalcMode"]
 1466     OptionsInfo["EnergyRMSDCalcModeBest"] = (
 1467         True if re.match("^BestRMSD$", Options["--energyRMSDCalcMode"], re.I) else False
 1468     )
 1469 
 1470     OptionsInfo["EnergyRMSDCutoff"] = float(Options["--energyRMSDCutoff"])
 1471 
 1472     OptionsInfo["EnergyRMSDCutoffMode"] = Options["--energyRMSDCutoffMode"]
 1473     OptionsInfo["EnergyRMSDCutoffModeLowest"] = (
 1474         True if re.match("^Lowest$", Options["--energyRMSDCutoffMode"], re.I) else False
 1475     )
 1476 
 1477     # Process energy window...
 1478     EnergyWindow = Options["--energyWindow"]
 1479     if re.match("^auto$", EnergyWindow, re.I):
 1480         # Set default energy window based on units...
 1481         EnergyUnits = Options["--energyUnits"]
 1482         if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
 1483             EnergyWindow = 20
 1484         elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
 1485             EnergyWindow = 83.68
 1486         elif re.match("^eV$", EnergyUnits, re.I):
 1487             EnergyWindow = 0.8673
 1488         elif re.match("^Hartrees$", EnergyUnits, re.I):
 1489             EnergyWindow = 0.03188
 1490         else:
 1491             MiscUtil.PrintError(
 1492                 'Failed to set default value for "--energyWindow". The value, %s, specified for option "--energyUnits" is not valid. '
 1493                 % EnergyUnits
 1494             )
 1495     else:
 1496         EnergyWindow = float(EnergyWindow)
 1497     OptionsInfo["EnergyWindow"] = EnergyWindow
 1498 
 1499     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
 1500 
 1501     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1502     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1503 
 1504     # Multiprocessing level...
 1505     MPLevelMoleculesMode = False
 1506     MPLevelConformersMode = False
 1507     MPLevel = Options["--mpLevel"]
 1508     if re.match("^Molecules$", MPLevel, re.I):
 1509         MPLevelMoleculesMode = True
 1510     elif re.match("^Conformers$", MPLevel, re.I):
 1511         MPLevelConformersMode = True
 1512     else:
 1513         MiscUtil.PrintError('The value, %s, specified for option "--mpLevel" is not valid. ' % MPLevel)
 1514     OptionsInfo["MPLevel"] = MPLevel
 1515     OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
 1516     OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
 1517 
 1518     OptionsInfo["Precision"] = int(Options["--precision"])
 1519     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 1520 
 1521 
 1522 def RetrieveOptions():
 1523     """Retrieve command line arguments and options."""
 1524 
 1525     # Get options...
 1526     global Options
 1527     Options = docopt(_docoptUsage_)
 1528 
 1529     # Set current working directory to the specified directory...
 1530     WorkingDir = Options["--workingdir"]
 1531     if WorkingDir:
 1532         os.chdir(WorkingDir)
 1533 
 1534     # Handle examples option...
 1535     if "--examples" in Options and Options["--examples"]:
 1536         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1537         sys.exit(0)
 1538 
 1539 
 1540 def ValidateOptions():
 1541     """Validate option values."""
 1542 
 1543     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 1544     MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
 1545 
 1546     MiscUtil.ValidateOptionTextValue(" --energyRMSDCalcMode", Options["--energyRMSDCalcMode"], "RMSD BestRMSD")
 1547 
 1548     MiscUtil.ValidateOptionFloatValue("--energyRMSDCutoff", Options["--energyRMSDCutoff"], {">=": 0})
 1549     MiscUtil.ValidateOptionTextValue(" --energyRMSDCutoffMode", Options["--energyRMSDCutoffMode"], "All Lowest")
 1550 
 1551     if not re.match("^auto$", Options["--energyWindow"], re.I):
 1552         MiscUtil.ValidateOptionFloatValue("--energyWindow", Options["--energyWindow"], {">": 0})
 1553 
 1554     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1555     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 1556 
 1557     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
 1558     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
 1559 
 1560     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 1561 
 1562     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1563     MiscUtil.ValidateOptionsOutputFileOverwrite(
 1564         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
 1565     )
 1566     MiscUtil.ValidateOptionsDistinctFileNames(
 1567         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
 1568     )
 1569 
 1570     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1571     MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
 1572 
 1573     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1574     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 1575 
 1576     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
 1577 
 1578 
 1579 # Setup a usage string for docopt...
 1580 _docoptUsage_ = """
 1581 Psi4GenerateConstrainedConformers.py - Generate constrained molecular conformations
 1582 
 1583 Usage:
 1584     Psi4GenerateConstrainedConformers.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>]
 1585                                          [--energyDataFieldLabel <text>] [--energyUnits <text>] [--energyRMSDCalcMode <RMSD or BestRMSD>]
 1586                                          [--energyRMSDCutoff <number>] [--energyRMSDCutoffMode <All or Lowest>] [--energyWindow <number>]
 1587                                          [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>]
 1588                                          [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
 1589                                          [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
 1590                                          [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
 1591                                          [--quiet <yes or no>]  [--reference <text>] [--scaffold <auto or SMARTS>]
 1592                                          [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
 1593     Psi4GenerateConstrainedConformers.py -h | --help | -e | --examples
 1594 
 1595 Description:
 1596     Generate molecular conformations  by performing a constrained energy
 1597     minimization against a reference molecule. The molecular conformations
 1598     are generated using a combination of distance geometry and forcefield
 1599     minimization followed by geometry optimization using a quantum chemistry
 1600     method.
 1601 
 1602     An initial set of 3D conformers are generated for input molecules using
 1603     distance geometry. A common core scaffold, corresponding to a Maximum
 1604     Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
 1605     between a pair of input and reference molecules. The core scaffold atoms in
 1606     input molecules are aligned against the same atoms in the reference molecule.
 1607     The energy of aligned structures are sequentially minimized using the forcefield
 1608     and a quantum chemistry method to generate the final 3D structures.
 1609 
 1610     A Psi4 XYZ format geometry string is automatically generated for each molecule
 1611     in input file. It contains atom symbols and 3D coordinates for each atom in a
 1612     molecule. In addition, the formal charge and spin multiplicity are present in the
 1613     the geometry string. These values are either retrieved from molecule properties
 1614     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
 1615     molecule.
 1616 
 1617     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 1618     .csv, .tsv, .txt)
 1619 
 1620     The supported output file formats are: SD (.sdf, .sd)
 1621 
 1622 Options:
 1623     -b, --basisSet <text>  [default: auto]
 1624         Basis set to use for constrained energy minimization. Default: 6-31+G**
 1625         for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 
 1626         value must be a valid Psi4 basis set. No validation is performed.
 1627         
 1628         The following list shows a representative sample of basis sets available
 1629         in Psi4:
 1630             
 1631             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
 1632             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
 1633             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
 1634             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
 1635             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
 1636             
 1637     --confParams <Name,Value,...>  [default: auto]
 1638         A comma delimited list of parameter name and value pairs for generating
 1639         initial 3D coordinates for molecules in input file. A common core scaffold is
 1640         identified between a pair of input and reference molecules. The atoms in
 1641         common core scaffold of input molecules are aligned against the reference
 1642         molecule followed by constrained energy minimization using forcefield
 1643         available in RDKit. The 3D structures are subsequently constrained and 
 1644         minimized by a quantum chemistry method available in Psi4.
 1645         
 1646         The supported parameter names along with their default values are shown
 1647         below:
 1648             
 1649             confMethod,ETKDGv2,
 1650             forceField,MMFF, forceFieldMMFFVariant,MMFF94,
 1651             enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
 1652             useTethers,yes
 1653             
 1654             confMethod,ETKDGv2   [ Possible values: SDG, KDG, ETDG,
 1655                 ETKDG , or ETKDGv2]
 1656             forceField, MMFF   [ Possible values: UFF or MMFF ]
 1657             forceFieldMMFFVariant,MMFF94   [ Possible values: MMFF94 or MMFF94s ]
 1658             enforceChirality,yes   [ Possible values: yes or no ]
 1659             useTethers,yes   [ Possible values: yes or no ]
 1660             
 1661         confMethod: Conformation generation methodology for generating initial 3D
 1662         coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
 1663         Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
 1664         with Distance Geometry (KDG) and Experimental Torsion-angle preference
 1665         along with basic Knowledge-terms and Distance Geometry (ETKDG or
 1666         ETKDGv2) [Ref 129, 167] .
 1667         
 1668         forceField: Forcefield method to use for constrained energy minimization.
 1669         Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular
 1670         Mechanics Force Field [ Ref 83-87 ] .
 1671         
 1672         enforceChirality: Enforce chirality for defined chiral centers during
 1673         forcefield minimization.
 1674         
 1675         maxConfs: Maximum number of conformations to generate for each molecule
 1676         during the generation of an initial 3D conformation ensemble using conformation
 1677         generation methodology. The conformations are constrained and minimized using
 1678         the specified forcefield and a quantum chemistry method. The lowest energy
 1679         conformation is written to the output file.
 1680         
 1681         embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded
 1682         using distance geometry and forcefield minimization. All embedded conformers
 1683         are kept for 'None' value. Otherwise, only those conformers which are different
 1684         from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first
 1685         embedded conformer is always retained.
 1686         
 1687         useTethers: Use tethers to optimize the final embedded conformation by
 1688         applying a series of extra forces to align matching atoms to the positions of
 1689         the core atoms. Otherwise, use simple distance constraints during the
 1690         optimization.
 1691     --energyOut <yes or no>  [default: yes]
 1692         Write out energy values.
 1693     --energyDataFieldLabel <text>  [default: auto]
 1694         Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 
 1695     --energyUnits <text>  [default: kcal/mol]
 1696         Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
 1697     --energyRMSDCalcMode <RMSD or BestRMSD>  [default: RMSD]
 1698         Methodology for calculating RMSD values during the application of RMSD
 1699         cutoff for retaining conformations after the final energy minimization. Possible
 1700         values: RMSD or BestRMSD. This option is ignore during 'None' value of
 1701         '--energyRMSDCutoff' option.
 1702         
 1703         During BestRMSMode mode, the RDKit 'function AllChem.GetBestRMS' is used to
 1704         align and calculate RMSD. This function calculates optimal RMSD for aligning two
 1705         molecules, taking symmetry into account. Otherwise, the RMSD value is calculated
 1706         using 'AllChem.GetConformerRMS' without changing the atom order. A word to the
 1707         wise from RDKit documentation: The AllChem.GetBestRMS function will attempt to
 1708         align all permutations of matching atom orders in both molecules, for some molecules
 1709         it will lead to 'combinatorial explosion'.
 1710     --energyRMSDCutoff <number>  [default: 0.5]
 1711         RMSD cutoff for retaining conformations after the final energy minimization.
 1712         By default, only those conformations which are different from other low
 1713         energy conformation by the specified RMSD cutoff and are with in the 
 1714         specified energy window are kept. The lowest energy conformation is always
 1715         retained. A value of zero keeps all minimized conformations with in the
 1716         specified energy window from the lowest energy.
 1717     --energyRMSDCutoffMode <All or Lowest>  [default: All]
 1718         RMSD cutoff mode for  retaining conformations after the final energy
 1719         minimization. Possible values: All or Lowest. The RMSD values are compared
 1720         against all the selected conformations or the lowest energy conformation during
 1721         'All' and 'Lowest' value of '--energyRMSDCutoffMode'. This option is ignored
 1722         during zero value of '--energyRMSDCutoff'.
 1723         
 1724         By default, only those conformations which all different from all selected
 1725         low energy conformations by the specified RMSD cutoff and are with in the
 1726         specified energy window are kept.
 1727     --energyWindow <number>  [default: auto]
 1728         Psi4 Energy window  for selecting conformers after the final energy minimization.
 1729         The default value is dependent on '--energyUnits': 20 kcal/mol, 83.68 kJ/mol,
 1730         0.8673 ev, or 0.03188 Hartrees. The specified value must be in '--energyUnits'.
 1731     -e, --examples
 1732         Print examples.
 1733     -h, --help
 1734         Print this help message.
 1735     -i, --infile <infile>
 1736         Input file name.
 1737     --infileParams <Name,Value,...>  [default: auto]
 1738         A comma delimited list of parameter name and value pairs for reading
 1739         molecules from files. The supported parameter names for different file
 1740         formats, along with their default values, are shown below:
 1741             
 1742             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1743             
 1744             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1745                 smilesTitleLine,auto,sanitize,yes
 1746             
 1747         Possible values for smilesDelimiter: space, comma or tab.
 1748     --maxIters <number>  [default: 50]
 1749         Maximum number of iterations to perform for each molecule or conformer
 1750         during energy minimization by a quantum chemistry method.
 1751     -m, --methodName <text>  [default: auto]
 1752         Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ].
 1753         The specified value must be a valid Psi4 method name. No validation is
 1754         performed.
 1755         
 1756         The following list shows a representative sample of methods available
 1757         in Psi4:
 1758             
 1759             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1760             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1761             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1762             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1763             
 1764     --mcsParams <Name,Value,...>  [default: auto]
 1765         Parameter values to use for identifying a maximum common substructure
 1766         (MCS) in between a pair of reference and input molecules.In general, it is a
 1767         comma delimited list of parameter name and value pairs. The supported
 1768         parameter names along with their default values are shown below:
 1769             
 1770             atomCompare,CompareElements,bondCompare,CompareOrder,
 1771             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
 1772             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
 1773             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
 1774             
 1775         Possible values for atomCompare: CompareAny, CompareElements,
 1776         CompareIsotopes. Possible values for bondCompare: CompareAny,
 1777         CompareOrder, CompareOrderExact.
 1778         
 1779         A brief description of MCS parameters taken from RDKit documentation is
 1780         as follows:
 1781             
 1782             atomCompare - Controls match between two atoms
 1783             bondCompare - Controls match between two bonds
 1784             maximizeBonds - Maximize number of bonds instead of atoms
 1785             matchValences - Include atom valences in the MCS match
 1786             matchChiralTag - Include atom chirality in the MCS match
 1787             minNumAtoms - Minimum number of atoms in the MCS match
 1788             minNumBonds - Minimum number of bonds in the MCS match
 1789             ringMatchesRingOnly - Ring bonds only match other ring bonds
 1790             completeRingsOnly - Partial rings not allowed during the match
 1791             threshold - Fraction of the dataset that must contain the MCS
 1792             seedSMARTS - SMARTS string as the seed of the MCS
 1793             timeout - Timeout for the MCS calculation in seconds
 1794             
 1795     --mp <yes or no>  [default: no]
 1796         Use multiprocessing.
 1797          
 1798         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1799         function employing lazy RDKit data iterable. This allows processing of
 1800         arbitrary large data sets without any additional requirements memory.
 1801         
 1802         All input data may be optionally loaded into memory by mp.Pool.map()
 1803         before starting worker processes in a process pool by setting the value
 1804         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1805         
 1806         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1807         data mode may adversely impact the performance. The '--mpParams' section
 1808         provides additional information to tune the value of 'chunkSize'.
 1809     --mpLevel <Molecules or Conformers>  [default: Molecules]
 1810         Perform multiprocessing at molecules or conformers level. Possible values:
 1811         Molecules or Conformers. The 'Molecules' value starts a process pool at the
 1812         molecules level. All conformers of a molecule are processed in a single
 1813         process. The 'Conformers' value, however, starts a process pool at the 
 1814         conformers level. Each conformer of a molecule is processed in an individual
 1815         process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
 1816         using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
 1817         a large number of Psi4 output files.
 1818     --mpParams <Name,Value,...>  [default: auto]
 1819         A comma delimited list of parameter name and value pairs to configure
 1820         multiprocessing.
 1821         
 1822         The supported parameter names along with their default and possible
 1823         values are shown below:
 1824         
 1825             chunkSize, auto
 1826             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1827             numProcesses, auto   [ Default: mp.cpu_count() ]
 1828         
 1829         These parameters are used by the following functions to configure and
 1830         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1831         mp.Pool.imap().
 1832         
 1833         The chunkSize determines chunks of input data passed to each worker
 1834         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1835         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1836         
 1837         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1838         automatically converts RDKit data iterable into a list, loads all data into
 1839         memory, and calculates the default chunkSize using the following method
 1840         as shown in its code:
 1841         
 1842             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1843             if extra: chunkSize += 1
 1844         
 1845         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1846         and 100 data items.
 1847         
 1848         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1849         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1850         data into memory. Consequently, the size of input data is not known a priori.
 1851         It's not possible to estimate an optimal value for the chunkSize. The default 
 1852         chunkSize is set to 1.
 1853         
 1854         The default value for the chunkSize during 'Lazy' data mode may adversely
 1855         impact the performance due to the overhead associated with exchanging
 1856         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1857         a larger value during 'Lazy' input data mode, based on the size of your input
 1858         data and number of processes in the process pool.
 1859         
 1860         The mp.Pool.map() function waits for all worker processes to process all
 1861         the data and return the results. The mp.Pool.imap() function, however,
 1862         returns the the results obtained from worker processes as soon as the
 1863         results become available for specified chunks of data.
 1864         
 1865         The order of data in the results returned by both mp.Pool.map() and 
 1866         mp.Pool.imap() functions always corresponds to the input data.
 1867     -o, --outfile <outfile>
 1868         Output file name.
 1869     --outfileParams <Name,Value,...>  [default: auto]
 1870         A comma delimited list of parameter name and value pairs for writing
 1871         molecules to files. The supported parameter names for different file
 1872         formats, along with their default values, are shown below:
 1873             
 1874             SD: kekulize,yes,forceV3000,no
 1875             
 1876     --overwrite
 1877         Overwrite existing files.
 1878     --precision <number>  [default: 6]
 1879         Floating point precision for writing energy values.
 1880     --psi4OptionsParams <Name,Value,...>  [default: none]
 1881         A comma delimited list of Psi4 option name and value pairs for setting
 1882         global and module options. The names are 'option_name' for global options
 1883         and 'module_name__option_name' for options local to a module. The
 1884         specified option names must be valid Psi4 names. No validation is
 1885         performed.
 1886         
 1887         The specified option name and  value pairs are processed and passed to
 1888         psi4.set_options() as a dictionary. The supported value types are float,
 1889         integer, boolean, or string. The float value string is converted into a float.
 1890         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1891     --psi4RunParams <Name,Value,...>  [default: auto]
 1892         A comma delimited list of parameter name and value pairs for configuring
 1893         Psi4 jobs.
 1894         
 1895         The supported parameter names along with their default and possible
 1896         values are shown below:
 1897              
 1898             MemoryInGB, 1
 1899             NumThreads, 1
 1900             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1901             ScratchDir, auto   [ Possivle values: DirName]
 1902             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1903             
 1904         These parameters control the runtime behavior of Psi4.
 1905         
 1906         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1907         is appended to output file name during multiprocessing as shown:
 1908         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1909         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1910         all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 
 1911         'Conformers' of '--mpLevel' option.
 1912         
 1913         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1914         of any existing Psi4 before creating new files to append output from
 1915         multiple Psi4 runs.
 1916         
 1917         The option 'ScratchDir' is a directory path to the location of scratch
 1918         files. The default value corresponds to Psi4 default. It may be used to
 1919         override the deafult path.
 1920     -q, --quiet <yes or no>  [default: no]
 1921         Use quiet mode. The warning and information messages will not be printed.
 1922     -r, --reffile <reffile>
 1923         Reference input file name containing a 3D reference molecule. A common
 1924         core scaffold must be present in a pair of an input and reference molecules.
 1925         Otherwise, no constrained minimization is performed on the input molecule.
 1926     --reference <text>  [default: auto]
 1927         Reference wave function to use for energy calculation. Default: RHF or UHF.
 1928         The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
 1929         with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
 1930         molecules with unpaired electrons.
 1931         
 1932         The specified value must be a valid Psi4 reference wave function. No validation
 1933         is performed. For example: ROHF, CUHF, RKS, etc.
 1934         
 1935         The spin multiplicity determines the default value of reference wave function
 1936         for input molecules. It is calculated from number of free radical electrons using
 1937         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1938         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1939         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1940         over the calculated value of spin multiplicity.
 1941     -s, --scaffold <auto or SMARTS>  [default: auto]
 1942         Common core scaffold between a pair of input and reference molecules used for
 1943         constrained minimization of molecules in input file. Possible values: Auto or a
 1944         valid SMARTS pattern. The common core scaffold is automatically detected
 1945         corresponding to the Maximum Common Substructure (MCS) between a pair of
 1946         reference and input molecules. A valid SMARTS pattern may be optionally specified
 1947         for the common core scaffold.
 1948     --scaffoldRMSDOut <yes or no>  [default: No]
 1949         Write out RMSD value for common core alignment between a pair of input and
 1950         reference molecules.
 1951     -w, --workingdir <dir>
 1952         Location of working directory which defaults to the current directory.
 1953 
 1954 Examples:
 1955     To generate conformers by performing constrained energy minimization for molecules
 1956     in a SMILES file against a reference 3D molecule in a SD file using a common core
 1957     scaffold between pairs of input and reference molecules identified using MCS,
 1958     generating up to 50 conformations using ETKDGv2 methodology followed by initial
 1959     MMFF forcefield minimization and  final energy minimization using B3LYP/6-31G**
 1960     and B3LYP/6-31+G** for non-sulfur and sulfur containing molecules, applying
 1961     energy RMSD cutoff of 0.5 and energy window value value of 20 kcal/mol, and
 1962     write out a SD file:
 1963 
 1964        %  Psi4GenerateConstrainedConformers.py  -i Psi4SampleAlkanes.smi
 1965           -r Psi4SampleEthane3D.sdf  -o Psi4SampleAlkanesOut.sdf
 1966 
 1967     To run the first example in a quiet mode and write out a SD file, type:
 1968 
 1969        %  Psi4GenerateConstrainedConformers.py  -q yes -i Psi4SampleAlkanes.smi
 1970           -r Psi4SampleEthane3D.sdf  -o Psi4SampleAlkanesOut.sdf
 1971 
 1972     To rerun the first example in multiprocessing mode on all available CPUs
 1973     without loading all data into memory and write out a SD file, type:
 1974 
 1975        %  Psi4GenerateConstrainedConformers.py  --mp yes
 1976           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1977           -o Psi4SampleAlkanesOut.sdf
 1978 
 1979     To run the first example in multiprocessing mode on all available CPUs
 1980     by loading all data into memory and write out a SD file, type:
 1981 
 1982        %  Psi4GenerateConstrainedConformers.py  --mp yes --mpParams
 1983           "inputDataMode,InMemory"-i Psi4SampleAlkanes.smi
 1984           -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
 1985 
 1986     To rerun the first example in multiprocessing mode on specific number of
 1987     CPUs and chunk size without loading all data into memory and write out a SD file,
 1988     type:
 1989 
 1990        %  Psi4GenerateConstrainedConformers.py  --mp yes --mpParams
 1991           "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
 1992           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 1993           -o Psi4SampleAlkanesOut.sdf
 1994 
 1995     To rerun the first example using an explicit SMARTS string for a common core
 1996     scaffold and write out a SD file, type:
 1997 
 1998        %  Psi4GenerateConstrainedConformers.py  --scaffold "CC"
 1999           -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
 2000           -o Psi4SampleAlkanesOut.sdf
 2001 
 2002     To run the first example using a specific set of parameters for generating
 2003     an initial set of conformers followed by energy minimization using forcefield
 2004     and a quantum chemistry method and write out a SD file type:
 2005 
 2006        %  Psi4GenerateConstrainedConformers.py  --confParams "confMethod,
 2007           ETKDGv2, forceField,MMFF, forceFieldMMFFVariant,MMFF94s, maxConfs,20,
 2008           embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
 2009           -b "6-31+G**" --maxIters 20 --energyRMSDCutoff 0.5
 2010           --energyRMSDCutoffMode All -i Psi4SampleAlkanes.sdf
 2011           -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
 2012 
 2013     To rerun the first example using molecules in a CSV SMILES file, SMILES
 2014     strings in column 1, name in column2, and write out a SD file, type:
 2015 
 2016        %  Psi4GenerateConstrainedConformers.py  --infileParams
 2017           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 2018           smilesNameColumn,2" -i Psi4SampleAlkanes.csv
 2019           -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
 2020 
 2021 Author:
 2022 
 2023     Manish Sud(msud@san.rr.com)
 2024 
 2025 See also:
 2026     Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py,
 2027     Psi4PerformConstrainedMinimization.py, Psi4PerformMinimization.py
 2028 
 2029 Copyright:
 2030     Copyright (C) 2026 Manish Sud. All rights reserved.
 2031 
 2032     The functionality available in this script is implemented using Psi4, an
 2033     open source quantum chemistry software package, and RDKit, an open
 2034     source toolkit for cheminformatics developed by Greg Landrum.
 2035 
 2036     This file is part of MayaChemTools.
 2037 
 2038     MayaChemTools is free software; you can redistribute it and/or modify it under
 2039     the terms of the GNU Lesser General Public License as published by the Free
 2040     Software Foundation; either version 3 of the License, or (at your option) any
 2041     later version.
 2042 
 2043 """
 2044 
 2045 if __name__ == "__main__":
 2046     main()