MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitPerformConstrainedMinimization.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2026 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using RDKit, an
    9 # open source toolkit for cheminformatics developed by Greg Landrum.
   10 #
   11 # This file is part of MayaChemTools.
   12 #
   13 # MayaChemTools is free software; you can redistribute it and/or modify it under
   14 # the terms of the GNU Lesser General Public License as published by the Free
   15 # Software Foundation; either version 3 of the License, or (at your option) any
   16 # later version.
   17 #
   18 # MayaChemTools is distributed in the hope that it will be useful, but without
   19 # any warranty; without even the implied warranty of merchantability of fitness
   20 # for a particular purpose.  See the GNU Lesser General Public License for more
   21 # details.
   22 #
   23 # You should have received a copy of the GNU Lesser General Public License
   24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   26 # Boston, MA, 02111-1307, USA.
   27 #
   28 
   29 from __future__ import print_function
   30 
   31 import os
   32 import sys
   33 import time
   34 import re
   35 import multiprocessing as mp
   36 
   37 # RDKit imports...
   38 try:
   39     from rdkit import rdBase
   40     from rdkit import Chem
   41     from rdkit.Chem import AllChem
   42     from rdkit.Chem import rdFMCS
   43 except ImportError as ErrMsg:
   44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   46     sys.exit(1)
   47 
   48 # MayaChemTools imports...
   49 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   50 try:
   51     from docopt import docopt
   52     import MiscUtil
   53     import RDKitUtil
   54 except ImportError as ErrMsg:
   55     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   56     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   57     sys.exit(1)
   58 
   59 ScriptName = os.path.basename(sys.argv[0])
   60 Options = {}
   61 OptionsInfo = {}
   62 
   63 
   64 def main():
   65     """Start execution of the script."""
   66 
   67     MiscUtil.PrintInfo(
   68         "\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
   69         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
   70     )
   71 
   72     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   73 
   74     # Retrieve command line arguments and options...
   75     RetrieveOptions()
   76 
   77     # Process and validate command line arguments and options...
   78     ProcessOptions()
   79 
   80     # Perform actions required by the script...
   81     PerformConstrainedMinimization()
   82 
   83     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   84     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   85 
   86 
   87 def PerformConstrainedMinimization():
   88     """Perform constrained minimization."""
   89 
   90     # Read and validate reference molecule...
   91     RefMol = RetrieveReferenceMolecule()
   92 
   93     # Setup a molecule reader for input file...
   94     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
   95     OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
   96     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
   97 
   98     # Set up a molecule writer...
   99     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  100     if Writer is None:
  101         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  102     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  103 
  104     MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer)
  105 
  106     if Writer is not None:
  107         Writer.close()
  108 
  109     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  110     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  111     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
  112     MiscUtil.PrintInfo(
  113         "Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount
  114     )
  115     MiscUtil.PrintInfo(
  116         "Number of ignored molecules: %d"
  117         % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount)
  118     )
  119 
  120 
  121 def ProcessMolecules(RefMol, Mols, Writer):
  122     """Process and minimize molecules."""
  123 
  124     if OptionsInfo["MPMode"]:
  125         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
  126     else:
  127         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
  128 
  129 
  130 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
  131     """Process and minimize molecules using a single process."""
  132 
  133     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
  134 
  135     for Mol in Mols:
  136         MolCount += 1
  137 
  138         if Mol is None:
  139             continue
  140 
  141         if RDKitUtil.IsMolEmpty(Mol):
  142             if not OptionsInfo["QuietMode"]:
  143                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  144                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  145             continue
  146         ValidMolCount += 1
  147 
  148         # Setup a reference molecule core containing common scaffold atoms...
  149         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  150         if RefMolCore is None:
  151             CoreScaffoldMissingCount += 1
  152             continue
  153 
  154         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolCount)
  155 
  156         if not CalcStatus:
  157             MinimizationFailedCount += 1
  158             continue
  159 
  160         WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
  161 
  162     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
  163 
  164 
  165 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
  166     """Process and minimize molecules using multiprocessing."""
  167 
  168     MPParams = OptionsInfo["MPParams"]
  169 
  170     # Setup data for initializing a worker process...
  171     MiscUtil.PrintInfo("Encoding options info and reference molecule...")
  172 
  173     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
  174     InitializeWorkerProcessArgs = (
  175         MiscUtil.ObjectToBase64EncodedString(Options),
  176         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  177     )
  178 
  179     # Setup a encoded mols data iterable for a worker process...
  180     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  181 
  182     # Setup process pool along with data initialization for each process...
  183     MiscUtil.PrintInfo(
  184         "\nConfiguring multiprocessing using %s method..."
  185         % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  186     )
  187     MiscUtil.PrintInfo(
  188         "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  189         % (
  190             MPParams["NumProcesses"],
  191             MPParams["InputDataMode"],
  192             ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  193         )
  194     )
  195 
  196     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  197 
  198     # Start processing...
  199     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  200         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  201     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  202         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  203     else:
  204         MiscUtil.PrintError(
  205             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  206         )
  207 
  208     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
  209     for Result in Results:
  210         MolCount += 1
  211         MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD = Result
  212 
  213         if EncodedMol is None:
  214             continue
  215         ValidMolCount += 1
  216 
  217         if CoreScaffoldMissingStatus:
  218             CoreScaffoldMissingStatus += 1
  219             continue
  220 
  221         if not CalcStatus:
  222             MinimizationFailedCount += 1
  223             continue
  224 
  225         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  226         WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
  227 
  228     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
  229 
  230 
  231 def InitializeWorkerProcess(*EncodedArgs):
  232     """Initialize data for a worker process."""
  233 
  234     global Options, OptionsInfo
  235 
  236     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  237 
  238     # Decode Options and OptionInfo...
  239     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  240     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  241 
  242     # Decode RefMol...
  243     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
  244 
  245 
  246 def WorkerProcess(EncodedMolInfo):
  247     """Process data for a worker process."""
  248 
  249     MolIndex, EncodedMol = EncodedMolInfo
  250 
  251     CoreScaffoldMissingStatus = False
  252     CalcStatus = False
  253     Energy = None
  254     ScaffoldEmbedRMSD = None
  255 
  256     if EncodedMol is None:
  257         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  258 
  259     RefMol = OptionsInfo["RefMol"]
  260 
  261     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  262     if RDKitUtil.IsMolEmpty(Mol):
  263         if not OptionsInfo["QuietMode"]:
  264             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  265             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  266         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
  267 
  268     # Setup a reference molecule core containing common scaffold atoms...
  269     RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1))
  270     if RefMolCore is None:
  271         CoreScaffoldMissingStatus = True
  272         return [MolIndex, None, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
  273 
  274     Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, (MolIndex + 1))
  275 
  276     return [
  277         MolIndex,
  278         RDKitUtil.MolToBase64EncodedMolString(
  279             Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
  280         ),
  281         CoreScaffoldMissingStatus,
  282         CalcStatus,
  283         Energy,
  284         ScaffoldEmbedRMSD,
  285     ]
  286 
  287 
  288 def RetrieveReferenceMolecule():
  289     """Retrieve and validate reference molecule."""
  290 
  291     RefFile = OptionsInfo["RefFile"]
  292 
  293     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
  294     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
  295     ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(
  296         RefFile, **OptionsInfo["InfileParams"]
  297     )
  298 
  299     if ValidRefMolCount == 0:
  300         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
  301     elif ValidRefMolCount > 1:
  302         MiscUtil.PrintWarning(
  303             "The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..."
  304             % (RefFile, ValidRefMolCount)
  305         )
  306 
  307     RefMol = ValidRefMols[0]
  308 
  309     if OptionsInfo["UseScaffoldSMARTS"]:
  310         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  311         if ScaffoldPatternMol is None:
  312             MiscUtil.PrintError(
  313                 'Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is not valid.'
  314                 % (OptionsInfo["ScaffoldSMARTS"])
  315             )
  316 
  317         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
  318             MiscUtil.PrintError(
  319                 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option, is missing in the first valid reference molecule.'
  320                 % (OptionsInfo["ScaffoldSMARTS"])
  321             )
  322 
  323     return RefMol
  324 
  325 
  326 def SetupCoreScaffold(RefMol, Mol, MolCount):
  327     """Setup a reference molecule core containing common scaffold atoms between
  328     a pair of molecules."""
  329 
  330     if OptionsInfo["UseScaffoldMCS"]:
  331         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
  332     elif OptionsInfo["UseScaffoldSMARTS"]:
  333         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
  334     else:
  335         MiscUtil.PrintError(
  336             'The  value, %s, specified for  "-s, --scaffold" option is not supported.' % (OptionsInfo["Scaffold"])
  337         )
  338 
  339 
  340 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
  341     """Setup a reference molecule core containing common scaffold atoms between
  342     a pair of molecules using MCS."""
  343 
  344     MCSParams = OptionsInfo["MCSParams"]
  345     Mols = [RefMol, Mol]
  346 
  347     MCSResultObject = rdFMCS.FindMCS(
  348         Mols,
  349         maximizeBonds=MCSParams["MaximizeBonds"],
  350         threshold=MCSParams["Threshold"],
  351         timeout=MCSParams["TimeOut"],
  352         verbose=MCSParams["Verbose"],
  353         matchValences=MCSParams["MatchValences"],
  354         ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"],
  355         completeRingsOnly=MCSParams["CompleteRingsOnly"],
  356         matchChiralTag=MCSParams["MatchChiralTag"],
  357         atomCompare=MCSParams["AtomCompare"],
  358         bondCompare=MCSParams["BondCompare"],
  359         seedSmarts=MCSParams["SeedSMARTS"],
  360     )
  361 
  362     if MCSResultObject.canceled:
  363         if not OptionsInfo["QuietMode"]:
  364             MiscUtil.PrintWarning(
  365                 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
  366                 % (RDKitUtil.GetMolName(Mol, MolCount))
  367             )
  368         return None
  369 
  370     CoreNumAtoms = MCSResultObject.numAtoms
  371     CoreNumBonds = MCSResultObject.numBonds
  372 
  373     SMARTSCore = MCSResultObject.smartsString
  374 
  375     if not len(SMARTSCore):
  376         if not OptionsInfo["QuietMode"]:
  377             MiscUtil.PrintWarning(
  378                 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
  379                 % (RDKitUtil.GetMolName(Mol, MolCount))
  380             )
  381         return None
  382 
  383     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
  384         if not OptionsInfo["QuietMode"]:
  385             MiscUtil.PrintWarning(
  386                 'Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumAtoms" parameter in  "-m, --mcsParams" option.'
  387                 % (CoreNumAtoms, MCSParams["MinNumAtoms"])
  388             )
  389         return None
  390 
  391     if CoreNumBonds < MCSParams["MinNumBonds"]:
  392         if not OptionsInfo["QuietMode"]:
  393             MiscUtil.PrintWarning(
  394                 'Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumBonds" parameter in  "-m, --mcsParams" option.'
  395                 % (CoreNumBonds, MCSParams["MinNumBonds"])
  396             )
  397         return None
  398 
  399     return GenerateCoreMol(RefMol, SMARTSCore)
  400 
  401 
  402 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
  403     """Setup a reference molecule core containing common scaffold atoms between
  404     a pair of molecules using specified SMARTS."""
  405 
  406     if OptionsInfo["ScaffoldPatternMol"] is None:
  407         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  408 
  409     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
  410         if not OptionsInfo["QuietMode"]:
  411             MiscUtil.PrintWarning(
  412                 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is missing in input molecule,  %s.'
  413                 % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))
  414             )
  415         return None
  416 
  417     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
  418 
  419 
  420 def GenerateCoreMol(RefMol, SMARTSCore):
  421     """Generate core molecule for embedding."""
  422 
  423     # Create a molecule corresponding to core atoms...
  424     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
  425 
  426     # Setup a ref molecule containing core atoms with dummy atoms as
  427     # attachment points for atoms around the core atoms...
  428     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
  429 
  430     # Delete any substructures containing dummy atoms..
  431     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles("*"))
  432     RefMolCore.UpdatePropertyCache()
  433 
  434     return RefMolCore
  435 
  436 
  437 def ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolNum=None):
  438     "Constrain and minimize molecule."
  439 
  440     if OptionsInfo["AddHydrogens"]:
  441         Mol = Chem.AddHs(Mol, addCoords=True)
  442 
  443     # Setup forcefield function to use for constrained minimization...
  444     ForceFieldFunction = None
  445     ForceFieldName = None
  446     if OptionsInfo["UseUFF"]:
  447         ForceFieldFunction = lambda mol, confId=-1: AllChem.UFFGetMoleculeForceField(mol, confId=confId)
  448         ForceFieldName = "UFF"
  449     else:
  450         ForceFieldFunction = lambda mol, confId=-1: AllChem.MMFFGetMoleculeForceField(
  451             mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=OptionsInfo["MMFFVariant"]), confId=confId
  452         )
  453         ForceFieldName = "MMFF"
  454 
  455     if ForceFieldFunction is None:
  456         if not OptionsInfo["QuietMode"]:
  457             MiscUtil.PrintWarning(
  458                 "Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))
  459             )
  460         return (None, False, None, None)
  461 
  462     MaxConfs = OptionsInfo["MaxConfs"]
  463     EnforceChirality = OptionsInfo["EnforceChirality"]
  464     UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"]
  465     ETVersion = OptionsInfo["ETVersion"]
  466     UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"]
  467     UseTethers = OptionsInfo["UseTethers"]
  468 
  469     CalcEnergyMap = {}
  470     MolConfsMap = {}
  471     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
  472 
  473     for ConfID in ConfIDs:
  474         try:
  475             MolConf = Chem.Mol(Mol)
  476             AllChem.ConstrainedEmbed(
  477                 MolConf,
  478                 RefMolCore,
  479                 useTethers=UseTethers,
  480                 coreConfId=-1,
  481                 randomseed=ConfID,
  482                 getForceField=ForceFieldFunction,
  483                 enforceChirality=EnforceChirality,
  484                 useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
  485                 useBasicKnowledge=UseBasicKnowledge,
  486                 ETversion=ETVersion,
  487             )
  488         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
  489             if not OptionsInfo["QuietMode"]:
  490                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
  491                 MiscUtil.PrintWarning(
  492                     "Constrained embedding couldn't  be performed for molecule %s:\n%s\n"
  493                     % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)
  494                 )
  495             return (None, False, None, None)
  496 
  497         EnergyStatus, Energy = GetEnergy(MolConf)
  498 
  499         if not EnergyStatus:
  500             if not OptionsInfo["QuietMode"]:
  501                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
  502                 MiscUtil.PrintWarning(
  503                     "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n"
  504                     % (ConfID, MolName)
  505                 )
  506             return (None, False, None, None)
  507 
  508         CalcEnergyMap[ConfID] = Energy
  509         MolConfsMap[ConfID] = MolConf
  510 
  511     SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
  512     MinEnergyConfID = SortedConfIDs[0]
  513 
  514     MinEnergy = "%.2f" % CalcEnergyMap[MinEnergyConfID] if OptionsInfo["EnergyOut"] else None
  515     MinEnergyMolConf = MolConfsMap[MinEnergyConfID]
  516 
  517     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp("EmbedRMS")) if OptionsInfo["ScaffoldRMSDOut"] else None
  518     MinEnergyMolConf.ClearProp("EmbedRMS")
  519 
  520     if OptionsInfo["RemoveHydrogens"]:
  521         MinEnergyMolConf = Chem.RemoveHs(MinEnergyMolConf)
  522 
  523     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
  524 
  525 
  526 def GetEnergy(Mol, ConfID=None):
  527     """Calculate energy."""
  528 
  529     Status = True
  530     Energy = None
  531 
  532     if ConfID is None:
  533         ConfID = -1
  534 
  535     if OptionsInfo["UseUFF"]:
  536         UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId=ConfID)
  537         if UFFMoleculeForcefield is None:
  538             Status = False
  539         else:
  540             Energy = UFFMoleculeForcefield.CalcEnergy()
  541     elif OptionsInfo["UseMMFF"]:
  542         MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant=OptionsInfo["MMFFVariant"])
  543         MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId=ConfID)
  544         if MMFFMoleculeForcefield is None:
  545             Status = False
  546         else:
  547             Energy = MMFFMoleculeForcefield.CalcEnergy()
  548     else:
  549         MiscUtil.PrintError(
  550             "Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]
  551         )
  552 
  553     return (Status, Energy)
  554 
  555 
  556 def WriteMolecule(
  557     Writer,
  558     Mol,
  559     Energy=None,
  560     ScaffoldEmbedRMSD=None,
  561     ConfID=None,
  562 ):
  563     """Write molecule."""
  564 
  565     if ScaffoldEmbedRMSD is not None:
  566         Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
  567 
  568     if Energy is not None:
  569         Mol.SetProp(OptionsInfo["EnergyLabel"], Energy)
  570 
  571     if ConfID is None:
  572         Writer.write(Mol)
  573     else:
  574         Writer.write(Mol, confId=ConfID)
  575 
  576 
  577 def ProcessMCSParameters():
  578     """Set up and process MCS parameters."""
  579 
  580     SetupMCSParameters()
  581     ProcessSpecifiedMCSParameters()
  582 
  583 
  584 def SetupMCSParameters():
  585     """Set up default MCS parameters."""
  586 
  587     OptionsInfo["MCSParams"] = {
  588         "MaximizeBonds": True,
  589         "Threshold": 0.9,
  590         "TimeOut": 3600,
  591         "Verbose": False,
  592         "MatchValences": True,
  593         "MatchChiralTag": False,
  594         "RingMatchesRingOnly": True,
  595         "CompleteRingsOnly": True,
  596         "AtomCompare": rdFMCS.AtomCompare.CompareElements,
  597         "BondCompare": rdFMCS.BondCompare.CompareOrder,
  598         "SeedSMARTS": "",
  599         "MinNumAtoms": 1,
  600         "MinNumBonds": 0,
  601     }
  602 
  603 
  604 def ProcessSpecifiedMCSParameters():
  605     """Process specified MCS parameters."""
  606 
  607     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
  608         # Nothing to process...
  609         return
  610 
  611     # Parse specified parameters...
  612     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
  613     if not MCSParams:
  614         MiscUtil.PrintError('No valid parameter name and value pairs specified using "-m, --mcsParams" option.')
  615 
  616     MCSParamsWords = MCSParams.split(",")
  617     if len(MCSParamsWords) % 2:
  618         MiscUtil.PrintError(
  619             'The number of comma delimited paramater names and values, %d, specified using "-m, --mcsParams" option must be an even number.'
  620             % (len(MCSParamsWords))
  621         )
  622 
  623     # Setup  canonical parameter names...
  624     ValidParamNames = []
  625     CanonicalParamNamesMap = {}
  626     for ParamName in sorted(OptionsInfo["MCSParams"]):
  627         ValidParamNames.append(ParamName)
  628         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  629 
  630     # Validate and set paramater names and value...
  631     for Index in range(0, len(MCSParamsWords), 2):
  632         Name = MCSParamsWords[Index]
  633         Value = MCSParamsWords[Index + 1]
  634 
  635         CanonicalName = Name.lower()
  636         if CanonicalName not in CanonicalParamNamesMap:
  637             MiscUtil.PrintError(
  638                 'The parameter name, %s, specified using "-m, --mcsParams" option is not a valid name. Supported parameter names: %s'
  639                 % (Name, " ".join(ValidParamNames))
  640             )
  641 
  642         ParamName = CanonicalParamNamesMap[CanonicalName]
  643         if re.match("^Threshold$", ParamName, re.I):
  644             Value = float(Value)
  645             if Value <= 0.0 or Value > 1.0:
  646                 MiscUtil.PrintError(
  647                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0'
  648                     % (Value, Name)
  649                 )
  650             ParamValue = Value
  651         elif re.match("^Timeout$", ParamName, re.I):
  652             Value = int(Value)
  653             if Value <= 0:
  654                 MiscUtil.PrintError(
  655                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: > 0'
  656                     % (Value, Name)
  657                 )
  658             ParamValue = Value
  659         elif re.match("^MinNumAtoms$", ParamName, re.I):
  660             Value = int(Value)
  661             if Value < 1:
  662                 MiscUtil.PrintError(
  663                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: >= 1'
  664                     % (Value, Name)
  665                 )
  666             ParamValue = Value
  667         elif re.match("^MinNumBonds$", ParamName, re.I):
  668             Value = int(Value)
  669             if Value < 0:
  670                 MiscUtil.PrintError(
  671                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: >=0 '
  672                     % (Value, Name)
  673                 )
  674             ParamValue = Value
  675         elif re.match("^AtomCompare$", ParamName, re.I):
  676             if re.match("^CompareAny$", Value, re.I):
  677                 ParamValue = rdFMCS.AtomCompare.CompareAny
  678             elif re.match("^CompareElements$", Value, re.I):
  679                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
  680             elif re.match("^CompareIsotopes$", Value, re.I):
  681                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
  682             else:
  683                 MiscUtil.PrintError(
  684                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes'
  685                     % (Value, Name)
  686                 )
  687         elif re.match("^BondCompare$", ParamName, re.I):
  688             if re.match("^CompareAny$", Value, re.I):
  689                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
  690             elif re.match("^CompareOrder$", Value, re.I):
  691                 ParamValue = rdFMCS.BondCompare.CompareOrder
  692             elif re.match("^CompareOrderExact$", Value, re.I):
  693                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
  694             else:
  695                 MiscUtil.PrintError(
  696                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact'
  697                     % (Value, Name)
  698                 )
  699         elif re.match("^SeedSMARTS$", ParamName, re.I):
  700             if not len(Value):
  701                 MiscUtil.PrintError(
  702                     'The parameter value specified using "-m, --mcsParams" option  for parameter, %s, is empty. '
  703                     % (Name)
  704                 )
  705             ParamValue = Value
  706         else:
  707             if not re.match("^(Yes|No|True|False)$", Value, re.I):
  708                 MiscUtil.PrintError(
  709                     'The parameter value, %s, specified using "-m, --mcsParams" option  for parameter, %s, is not a valid value. Supported values: Yes No True False'
  710                     % (Value, Name)
  711                 )
  712             ParamValue = False
  713             if re.match("^(Yes|True)$", Value, re.I):
  714                 ParamValue = True
  715 
  716         # Set value...
  717         OptionsInfo["MCSParams"][ParamName] = ParamValue
  718 
  719 
  720 def ProcesssConformerGeneratorOption():
  721     """Process comformer generator option."""
  722 
  723     ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"])
  724 
  725     OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"]
  726     OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"]
  727     OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"]
  728     OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"]
  729 
  730 
  731 def ProcessOptions():
  732     """Process and validate command line arguments and options."""
  733 
  734     MiscUtil.PrintInfo("Processing options...")
  735 
  736     # Validate options...
  737     ValidateOptions()
  738 
  739     OptionsInfo["Infile"] = Options["--infile"]
  740     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
  741         "--infileParams", Options["--infileParams"], Options["--infile"]
  742     )
  743 
  744     OptionsInfo["RefFile"] = Options["--reffile"]
  745 
  746     OptionsInfo["Scaffold"] = Options["--scaffold"]
  747     if re.match("^auto$", Options["--scaffold"], re.I):
  748         UseScaffoldMCS = True
  749         UseScaffoldSMARTS = False
  750         ScaffoldSMARTS = None
  751     else:
  752         UseScaffoldMCS = False
  753         UseScaffoldSMARTS = True
  754         ScaffoldSMARTS = OptionsInfo["Scaffold"]
  755 
  756     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
  757     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
  758     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
  759     OptionsInfo["ScaffoldPatternMol"] = None
  760 
  761     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
  762     ProcessMCSParameters()
  763 
  764     OptionsInfo["Outfile"] = Options["--outfile"]
  765     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
  766         "--outfileParams", Options["--outfileParams"]
  767     )
  768 
  769     OptionsInfo["Overwrite"] = Options["--overwrite"]
  770 
  771     OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
  772 
  773     ProcesssConformerGeneratorOption()
  774 
  775     if re.match("^UFF$", Options["--forceField"], re.I):
  776         ForceField = "UFF"
  777         UseUFF = True
  778         UseMMFF = False
  779     elif re.match("^MMFF$", Options["--forceField"], re.I):
  780         ForceField = "MMFF"
  781         UseUFF = False
  782         UseMMFF = True
  783     else:
  784         MiscUtil.PrintError(
  785             'The value, %s, specified for "--forceField" is not supported.' % (Options["--forceField"],)
  786         )
  787 
  788     MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s"
  789 
  790     OptionsInfo["ForceField"] = ForceField
  791     OptionsInfo["MMFFVariant"] = MMFFVariant
  792     OptionsInfo["UseMMFF"] = UseMMFF
  793     OptionsInfo["UseUFF"] = UseUFF
  794 
  795     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
  796 
  797     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
  798     if UseMMFF:
  799         OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant
  800     else:
  801         OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField
  802 
  803     OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False
  804 
  805     OptionsInfo["MaxConfs"] = int(Options["--maxConfs"])
  806 
  807     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
  808     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
  809 
  810     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
  811 
  812     OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False
  813     OptionsInfo["UseTethers"] = True if re.match("^yes$", Options["--useTethers"], re.I) else False
  814 
  815 
  816 def RetrieveOptions():
  817     """Retrieve command line arguments and options."""
  818 
  819     # Get options...
  820     global Options
  821     Options = docopt(_docoptUsage_)
  822 
  823     # Set current working directory to the specified directory...
  824     WorkingDir = Options["--workingdir"]
  825     if WorkingDir:
  826         os.chdir(WorkingDir)
  827 
  828     # Handle examples option...
  829     if "--examples" in Options and Options["--examples"]:
  830         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
  831         sys.exit(0)
  832 
  833 
  834 def ValidateOptions():
  835     """Validate option values."""
  836 
  837     MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no")
  838     MiscUtil.ValidateOptionTextValue(
  839         "-c, --conformerGenerator", Options["--conformerGenerator"], "SDG KDG ETDG ETKDG ETKDGv2"
  840     )
  841 
  842     MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF")
  843     MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s")
  844 
  845     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
  846 
  847     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
  848     MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no")
  849 
  850     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
  851     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
  852 
  853     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
  854     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
  855 
  856     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
  857     MiscUtil.ValidateOptionsOutputFileOverwrite(
  858         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
  859     )
  860     MiscUtil.ValidateOptionsDistinctFileNames(
  861         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
  862     )
  863 
  864     MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0})
  865 
  866     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
  867     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
  868 
  869     MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no")
  870 
  871     MiscUtil.ValidateOptionTextValue("-u, --useTethers", Options["--useTethers"], "yes no")
  872 
  873 
  874 # Setup a usage string for docopt...
  875 _docoptUsage_ = """
  876 RDKitPerformConstrainedMinimization.py - Perform constrained minimization
  877 
  878 Usage:
  879     RDKitPerformConstrainedMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG, ETKDG, ETKDGv2>]
  880                                            [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>]
  881                                            [--energyOut  <yes or no>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>]
  882                                            [--maxConfs <number>]  [--mcsParams <Name,Value,...>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
  883                                            [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>]
  884                                            [--scaffold <auto or SMARTS>]  [--scaffoldRMSDOut  <yes or no>] [--useTethers  <yes or no>] 
  885                                            [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
  886     RDKitPerformConstrainedMinimization.py -h | --help | -e | --examples
  887 
  888 Description:
  889     Generate 3D structures for molecules by performing a constrained energy minimization
  890     against a reference molecule. An initial set of 3D conformers are generated for the
  891     input molecules using distance geometry. A common core scaffold, corresponding to
  892     a Maximum Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
  893     between a pair of input and reference molecules. The core scaffold atoms in input
  894     molecules are aligned against the same atoms in the reference molecule. The energy
  895     of aligned structures are minimized using the forcefield to generate the final 3D structures.
  896 
  897     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi
  898     .csv, .tsv, .txt)
  899 
  900     The supported output file formats are: SD (.sdf, .sd)
  901 
  902 Options:
  903     -a, --addHydrogens <yes or no>  [default: yes]
  904         Add hydrogens before minimization.
  905     -c, --conformerGenerator <text>  [default: ETKDGv2]
  906         Conformation generation methodology for generating initial 3D coordinates
  907         for molecules in input file. A common core scaffold is identified between a
  908         pair of input and reference molecules. The atoms in common core scaffold 
  909         of input molecules are aligned against the reference molecule followed by
  910         energy minimization to generate final 3D structure.
  911         
  912         The possible values along with a brief description are shown below:
  913             
  914             SDG: Standard Distance Geometry
  915             KDG: basic Knowledge-terms with Distance Geometry
  916             ETDG: Experimental Torsion-angle preference with Distance Geometry
  917             ETKDG: Experimental Torsion-angle preference along with basic
  918                 Knowledge-terms and Distance Geometry [Ref 129]
  919             ETKDGv2: Experimental Torsion-angle preference along with basic
  920                 Knowledge-terms and Distance Geometry [Ref 167]
  921     -f, --forceField <UFF, MMFF>  [default: MMFF]
  922         Forcefield method to use for  constrained energy minimization. Possible values:
  923         Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force
  924         Field [ Ref 83-87 ] .
  925     --forceFieldMMFFVariant <MMFF94 or MMFF94s>  [default: MMFF94]
  926         Variant of MMFF forcefield to use for energy minimization.
  927     --energyOut <yes or no>  [default: No]
  928         Write out energy values.
  929     --enforceChirality <yes or no>  [default: Yes]
  930         Enforce chirality for defined chiral centers.
  931     -e, --examples
  932         Print examples.
  933     -h, --help
  934         Print this help message.
  935     -i, --infile <infile>
  936         Input file name.
  937     --infileParams <Name,Value,...>  [default: auto]
  938         A comma delimited list of parameter name and value pairs for reading
  939         molecules from files. The supported parameter names for different file
  940         formats, along with their default values, are shown below:
  941             
  942             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
  943             
  944             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
  945                 smilesTitleLine,auto,sanitize,yes
  946             
  947         Possible values for smilesDelimiter: space, comma or tab.
  948     --maxConfs <number>  [default: 250]
  949         Maximum number of conformations to generate for each molecule by conformation
  950         generation methodology for initial 3D coordinates. A constrained minimization is
  951         performed using the specified forcefield and the lowest energy conformation is written
  952         to the output file.
  953     --mcsParams <Name,Value,...>  [default: auto]
  954         Parameter values to use for identifying a maximum common substructure
  955         (MCS) in between a pair of reference and input molecules.In general, it is a
  956         comma delimited list of parameter name and value pairs. The supported
  957         parameter names along with their default values are shown below:
  958             
  959             atomCompare,CompareElements,bondCompare,CompareOrder,
  960             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
  961             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
  962             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
  963             
  964         Possible values for atomCompare: CompareAny, CompareElements,
  965         CompareIsotopes. Possible values for bondCompare: CompareAny,
  966         CompareOrder, CompareOrderExact.
  967         
  968         A brief description of MCS parameters taken from RDKit documentation is
  969         as follows:
  970             
  971             atomCompare - Controls match between two atoms
  972             bondCompare - Controls match between two bonds
  973             maximizeBonds - Maximize number of bonds instead of atoms
  974             matchValences - Include atom valences in the MCS match
  975             matchChiralTag - Include atom chirality in the MCS match
  976             minNumAtoms - Minimum number of atoms in the MCS match
  977             minNumBonds - Minimum number of bonds in the MCS match
  978             ringMatchesRingOnly - Ring bonds only match other ring bonds
  979             completeRingsOnly - Partial rings not allowed during the match
  980             threshold - Fraction of the dataset that must contain the MCS
  981             seedSMARTS - SMARTS string as the seed of the MCS
  982             timeout - Timeout for the MCS calculation in seconds
  983             
  984     --mp <yes or no>  [default: no]
  985         Use multiprocessing.
  986          
  987         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
  988         function employing lazy RDKit data iterable. This allows processing of
  989         arbitrary large data sets without any additional requirements memory.
  990         
  991         All input data may be optionally loaded into memory by mp.Pool.map()
  992         before starting worker processes in a process pool by setting the value
  993         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
  994         
  995         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
  996         data mode may adversely impact the performance. The '--mpParams' section
  997         provides additional information to tune the value of 'chunkSize'.
  998     --mpParams <Name,Value,...>  [default: auto]
  999         A comma delimited list of parameter name and value pairs to configure
 1000         multiprocessing.
 1001         
 1002         The supported parameter names along with their default and possible
 1003         values are shown below:
 1004         
 1005             chunkSize, auto
 1006             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1007             numProcesses, auto   [ Default: mp.cpu_count() ]
 1008         
 1009         These parameters are used by the following functions to configure and
 1010         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1011         mp.Pool.imap().
 1012         
 1013         The chunkSize determines chunks of input data passed to each worker
 1014         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1015         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1016         
 1017         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1018         automatically converts RDKit data iterable into a list, loads all data into
 1019         memory, and calculates the default chunkSize using the following method
 1020         as shown in its code:
 1021         
 1022             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1023             if extra: chunkSize += 1
 1024         
 1025         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1026         and 100 data items.
 1027         
 1028         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1029         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1030         data into memory. Consequently, the size of input data is not known a priori.
 1031         It's not possible to estimate an optimal value for the chunkSize. The default 
 1032         chunkSize is set to 1.
 1033         
 1034         The default value for the chunkSize during 'Lazy' data mode may adversely
 1035         impact the performance due to the overhead associated with exchanging
 1036         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1037         a larger value during 'Lazy' input data mode, based on the size of your input
 1038         data and number of processes in the process pool.
 1039         
 1040         The mp.Pool.map() function waits for all worker processes to process all
 1041         the data and return the results. The mp.Pool.imap() function, however,
 1042         returns the the results obtained from worker processes as soon as the
 1043         results become available for specified chunks of data.
 1044         
 1045         The order of data in the results returned by both mp.Pool.map() and 
 1046         mp.Pool.imap() functions always corresponds to the input data.
 1047     -o, --outfile <outfile>
 1048         Output file name.
 1049     --outfileParams <Name,Value,...>  [default: auto]
 1050         A comma delimited list of parameter name and value pairs for writing
 1051         molecules to files. The supported parameter names for different file
 1052         formats, along with their default values, are shown below:
 1053             
 1054             SD: kekulize,yes,forceV3000,no
 1055             
 1056     --overwrite
 1057         Overwrite existing files.
 1058     -q, --quiet <yes or no>  [default: no]
 1059         Use quiet mode. The warning and information messages will not be printed.
 1060     -r, --reffile <reffile>
 1061         Reference input file name containing a 3D reference molecule. A common
 1062         core scaffold must be present in a pair of an input and reference molecules.
 1063         Otherwise, no constrained minimization is performed on the input molecule.
 1064     --removeHydrogens <yes or no>  [default: Yes]
 1065         Remove hydrogens after minimization.
 1066     -s, --scaffold <auto or SMARTS>  [default: auto]
 1067         Common core scaffold between a pair of input and reference molecules used for
 1068         constrained minimization of molecules in input file. Possible values: Auto or a
 1069         valid SMARTS pattern. The common core scaffold is automatically detected
 1070         corresponding to the Maximum Common Substructure (MCS) between a pair of
 1071         reference and input molecules. A valid SMARTS pattern may be optionally specified
 1072         for the common core scaffold.
 1073     --scaffoldRMSDOut <yes or no>  [default: No]
 1074         Write out RMSD value for common core alignment between a pair of input and
 1075         reference molecules.
 1076     -u, --useTethers <yes or no>  [default: yes]
 1077         Use tethers to optimize the final conformation by applying a series of extra forces
 1078         to align matching atoms to the positions of the core atoms. Otherwise, use simple
 1079         distance constraints during the optimization.
 1080     -w, --workingdir <dir>
 1081         Location of working directory which defaults to the current directory.
 1082 
 1083 Examples:
 1084     To perform constrained energy minimization for molecules in a SMILES file against
 1085     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 1086     input and reference molecules identified using MCS, generating up to 250 conformations
 1087     using ETKDG methodology followed by MMFF forcefield minimization, and write out
 1088     a SD file containing minimum energy structure corresponding to each constrained
 1089     molecule, type:
 1090 
 1091         % RDKitPerformConstrainedMinimization.py  -i SampleSeriesD3R.smi
 1092           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1093 
 1094     To rerun the first example in a quiet mode and write out a SD file, type:
 1095 
 1096         % RDKitPerformConstrainedMinimization.py  -q yes -i SampleSeriesD3R.smi
 1097           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1098 
 1099     To run the first example in multiprocessing mode on all available CPUs
 1100     without loading all data into memory and write out a SD file, type:
 1101 
 1102         % RDKitPerformConstrainedMinimization.py  --mp yes
 1103           -i SampleSeriesD3R.smi -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1104 
 1105     To rerun the first example in multiprocessing mode on all available CPUs
 1106     by loading all data into memory and write out a SD file, type:
 1107 
 1108         % RDKitPerformConstrainedMinimization.py  --mp yes --mpParams
 1109           "inputDataMode,InMemory" -i SampleSeriesD3R.smi
 1110           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1111 
 1112     To rerun the first example using an explicit SMARTS string for a common core
 1113     scaffold and write out a SD file, type:
 1114 
 1115         % RDKitPerformConstrainedMinimization.py  --scaffold
 1116           "c1c(C(N(C(c2cc(-c3nc(N)ncc3)cn2))))cccc1" -i SampleSeriesD3R.smi -r
 1117           SampleSeriesRef3D.sdf -o SampleOut.sdf 
 1118 
 1119     To rerun the first example using molecules in a CSV SMILES file, SMILES
 1120     strings in column 1, name in column2, and write out a SD file, type:
 1121 
 1122         % RDKitPerformConstrainedMinimization.py  --infileParams "smilesDelimiter,
 1123           comma,smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
 1124           -i SampleSeriesD3R.csv -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1125 
 1126     To perform constrained energy minimization for molecules in a SD file against
 1127     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 1128     input and reference molecules identified using MCS, generating up to 50 conformations
 1129     using SDG methodology followed by UFF forcefield minimization, and write out
 1130     a SD file containing minimum energy structure along with energy and embed RMS values
 1131     corresponding to each constrained molecule, type:
 1132 
 1133         % RDKitPerformConstrainedMinimization.py  --maxConfs 50  -c SDG -f UFF
 1134           --scaffoldRMSDOut yes --energyOut yes -i SampleSeriesD3R.sdf
 1135           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1136 
 1137 Author:
 1138     Manish Sud(msud@san.rr.com)
 1139 
 1140 See also:
 1141     RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py,
 1142     RDKitConvertFileFormat.py, RDKitGenerateConstrainedConformers.py, RDKitPerformMinimization.py
 1143 
 1144 Copyright:
 1145     Copyright (C) 2026 Manish Sud. All rights reserved.
 1146 
 1147     The functionality available in this script is implemented using RDKit, an
 1148     open source toolkit for cheminformatics developed by Greg Landrum.
 1149 
 1150     This file is part of MayaChemTools.
 1151 
 1152     MayaChemTools is free software; you can redistribute it and/or modify it under
 1153     the terms of the GNU Lesser General Public License as published by the Free
 1154     Software Foundation; either version 3 of the License, or (at your option) any
 1155     later version.
 1156 
 1157 """
 1158 
 1159 if __name__ == "__main__":
 1160     main()