MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitGenerateConstrainedConformers.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2024 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using RDKit, an
    9 # open source toolkit for cheminformatics developed by Greg Landrum.
   10 #
   11 # This file is part of MayaChemTools.
   12 #
   13 # MayaChemTools is free software; you can redistribute it and/or modify it under
   14 # the terms of the GNU Lesser General Public License as published by the Free
   15 # Software Foundation; either version 3 of the License, or (at your option) any
   16 # later version.
   17 #
   18 # MayaChemTools is distributed in the hope that it will be useful, but without
   19 # any warranty; without even the implied warranty of merchantability of fitness
   20 # for a particular purpose.  See the GNU Lesser General Public License for more
   21 # details.
   22 #
   23 # You should have received a copy of the GNU Lesser General Public License
   24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   26 # Boston, MA, 02111-1307, USA.
   27 #
   28 
   29 from __future__ import print_function
   30 
   31 # Add local python path to the global path and import standard library modules...
   32 import os
   33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   34 import time
   35 import re
   36 import multiprocessing as mp
   37 
   38 # RDKit imports...
   39 try:
   40     from rdkit import rdBase
   41     from rdkit import Chem
   42     from rdkit.Chem import AllChem
   43     from rdkit.Chem import rdFMCS
   44     from rdkit.Chem import rdMolAlign
   45 except ImportError as ErrMsg:
   46     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   47     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   48     sys.exit(1)
   49 
   50 # MayaChemTools imports...
   51 try:
   52     from docopt import docopt
   53     import MiscUtil
   54     import RDKitUtil
   55 except ImportError as ErrMsg:
   56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   58     sys.exit(1)
   59 
   60 ScriptName = os.path.basename(sys.argv[0])
   61 Options = {}
   62 OptionsInfo = {}
   63 
   64 def main():
   65     """Start execution of the script."""
   66     
   67     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   68     
   69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   70     
   71     # Retrieve command line arguments and options...
   72     RetrieveOptions()
   73     
   74     # Process and validate command line arguments and options...
   75     ProcessOptions()
   76     
   77     # Perform actions required by the script...
   78     GenerateConstrainedConformers()
   79     
   80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   82 
   83 def GenerateConstrainedConformers():
   84     """Generate constrained conformers."""
   85     
   86     # Read and validate reference molecule...
   87     RefMol = RetrieveReferenceMolecule()
   88     
   89     # Setup a molecule reader for input file...
   90     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
   91     OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
   92     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
   93 
   94     # Set up a molecule writer...
   95     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
   96     if Writer is None:
   97         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
   98     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
   99 
  100     MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount = ProcessMolecules(RefMol, Mols, Writer)
  101 
  102     if Writer is not None:
  103         Writer.close()
  104     
  105     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  106     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  107     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
  108     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % ConfGenFailedCount)
  109     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + ConfGenFailedCount))
  110 
  111 def ProcessMolecules(RefMol, Mols, Writer):
  112     """Process molecules to generate constrained conformers."""
  113     
  114     if OptionsInfo["MPMode"]:
  115         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
  116     else:
  117         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
  118 
  119 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
  120     """Process molecules to generate constrained conformers using a single process."""
  121 
  122     (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
  123     
  124     for Mol in Mols:
  125         MolCount += 1
  126         
  127         if Mol is None:
  128             continue
  129         
  130         if RDKitUtil.IsMolEmpty(Mol):
  131             if not OptionsInfo["QuietMode"]:
  132                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  133                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  134             continue
  135         ValidMolCount += 1
  136 
  137         # Setup a reference molecule core containing common scaffold atoms...
  138         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
  139         if RefMolCore is None:
  140             CoreScaffoldMissingCount += 1
  141             continue
  142             
  143         ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(Mol, RefMolCore, MolCount)
  144         
  145         if not CalcStatus:
  146             ConfGenFailedCount += 1
  147             continue
  148 
  149         WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
  150 
  151     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
  152 
  153 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
  154     """Process molecules to generate constrained conformers using multiprocessing."""
  155 
  156     MPParams = OptionsInfo["MPParams"]
  157     
  158     # Setup data for initializing a worker process...
  159     MiscUtil.PrintInfo("Encoding options info and reference molecule...")
  160     
  161     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
  162     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  163     
  164     # Setup a encoded mols data iterable for a worker process...
  165     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  166 
  167     # Setup process pool along with data initialization for each process...
  168     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  169     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  170     
  171     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  172     
  173     # Start processing...
  174     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  175         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  176     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  177         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  178     else:
  179         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  180     
  181     (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
  182     for Result in Results:
  183         MolCount += 1
  184         MolIndex, EncodedMol, EncodedConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues  = Result
  185         
  186         if EncodedMol is None:
  187             continue
  188         ValidMolCount += 1
  189 
  190         if CoreScaffoldMissingStatus:
  191             CoreScaffoldMissingCount += 1
  192             continue
  193         
  194         if not CalcStatus:
  195             ConfGenFailedCount += 1
  196             continue
  197 
  198         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  199         ConfMols = [RDKitUtil.MolFromBase64EncodedMolString(EncodedConfMol) for EncodedConfMol in EncodedConfMols]
  200         
  201         WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
  202     
  203     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
  204     
  205 def InitializeWorkerProcess(*EncodedArgs):
  206     """Initialize data for a worker process."""
  207     
  208     global Options, OptionsInfo
  209 
  210     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  211 
  212     # Decode Options and OptionInfo...
  213     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  214     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  215 
  216     # Decode RefMol...
  217     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
  218     
  219 def WorkerProcess(EncodedMolInfo):
  220     """Process data for a worker process."""
  221 
  222     MolIndex, EncodedMol = EncodedMolInfo
  223 
  224     ConfMols = None
  225     CoreScaffoldMissingStatus = False
  226     CalcStatus = False
  227     ConfIDs = None
  228     ConfEnergyValues = None
  229     ConfScaffoldEmbedRMSDValues = None
  230     
  231     if EncodedMol is None:
  232         return [MolIndex, None, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues]
  233 
  234     RefMol = OptionsInfo["RefMol"]
  235     
  236     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  237     if RDKitUtil.IsMolEmpty(Mol):
  238         if not OptionsInfo["QuietMode"]:
  239             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  240             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  241         return [MolIndex, None, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues]
  242     
  243     # Setup a reference molecule core containing common scaffold atoms...
  244     RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1))
  245     if RefMolCore is None:
  246         CoreScaffoldMissingStatus = True
  247         return [MolIndex, EncodedMol, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues]
  248     
  249     ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(Mol, RefMolCore, (MolIndex + 1))
  250 
  251     EncodedConfMols = None
  252     if ConfMols is not None:
  253         EncodedConfMols = [RDKitUtil.MolToBase64EncodedMolString(ConfMol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps) for ConfMol in ConfMols]
  254     
  255     return [MolIndex, EncodedMol, EncodedConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues]
  256 
  257 def RetrieveReferenceMolecule():
  258     """Retrieve and validate reference molecule."""
  259     
  260     RefFile = OptionsInfo["RefFile"]
  261     
  262     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
  263     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
  264     ValidRefMols, RefMolCount, ValidRefMolCount  = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"])
  265     
  266     if ValidRefMolCount == 0:
  267         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
  268     elif ValidRefMolCount > 1:
  269         MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount))
  270     
  271     RefMol = ValidRefMols[0]
  272 
  273     if OptionsInfo["UseScaffoldSMARTS"]:
  274         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  275         if ScaffoldPatternMol is None:
  276             MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"]))
  277         
  278         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
  279             MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"]))
  280             
  281     return RefMol
  282 
  283 def SetupCoreScaffold(RefMol, Mol, MolCount):
  284     """Setup a reference molecule core containing common scaffold atoms between
  285     a pair of molecules."""
  286 
  287     if OptionsInfo["UseScaffoldMCS"]:
  288         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
  289     elif OptionsInfo["UseScaffoldSMARTS"]:
  290         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
  291     else:
  292         MiscUtil.PrintError("The  value, %s, specified for  \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"]))
  293         
  294 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
  295     """Setup a reference molecule core containing common scaffold atoms between
  296     a pair of molecules using MCS."""
  297     
  298     MCSParams = OptionsInfo["MCSParams"]
  299     Mols = [RefMol, Mol]
  300 
  301     MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 
  302     
  303     if MCSResultObject.canceled:
  304         if not OptionsInfo["QuietMode"]:
  305             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
  306         return None
  307     
  308     CoreNumAtoms = MCSResultObject.numAtoms
  309     CoreNumBonds = MCSResultObject.numBonds
  310     
  311     SMARTSCore = MCSResultObject.smartsString
  312     
  313     if not len(SMARTSCore):
  314         if not OptionsInfo["QuietMode"]:
  315             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
  316         return None
  317         
  318     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
  319         if not OptionsInfo["QuietMode"]:
  320             MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in  \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"]))
  321         return None
  322     
  323     if CoreNumBonds < MCSParams["MinNumBonds"]:
  324         if not OptionsInfo["QuietMode"]:
  325             MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in  \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"]))
  326         return None
  327 
  328     return GenerateCoreMol(RefMol, SMARTSCore)
  329     
  330 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
  331     """Setup a reference molecule core containing common scaffold atoms between
  332     a pair of molecules using specified SMARTS."""
  333     
  334     if OptionsInfo["ScaffoldPatternMol"] is None:
  335         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
  336         
  337     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
  338         if not OptionsInfo["QuietMode"]:
  339             MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule,  %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount)))
  340         return None
  341 
  342     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
  343 
  344 def GenerateCoreMol(RefMol, SMARTSCore):
  345     """Generate core molecule for embedding."""
  346 
  347     # Create a molecule corresponding to core atoms...
  348     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
  349 
  350     # Setup a ref molecule containing core atoms with dummy atoms as
  351     # attachment points for atoms around the core atoms...
  352     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
  353 
  354     # Delete any substructures containing dummy atoms..
  355     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*'))
  356     RefMolCore.UpdatePropertyCache()
  357     
  358     return RefMolCore
  359 
  360 def GenerateMolConformers(Mol, RefMolCore, MolNum = None):
  361     """Generate constrained conformers."""
  362 
  363     if  OptionsInfo["AddHydrogens"]:
  364         Mol = Chem.AddHs(Mol, addCoords = True)
  365 
  366     # Setup forcefield function to use for constrained minimization...
  367     ForceFieldFunction = None
  368     ForceFieldName = None
  369     if OptionsInfo["UseUFF"]:
  370         ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId)
  371         ForeceFieldName = "UFF"
  372     else:
  373         ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["MMFFVariant"]) , confId = confId)
  374         ForeceFieldName = "MMFF"
  375 
  376     if ForceFieldFunction is None:
  377         if not OptionsInfo["QuietMode"]:
  378             MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum)))
  379         return (None, False, None, None, None)
  380         
  381     MaxConfs = OptionsInfo["MaxConfs"]
  382     EnforceChirality = OptionsInfo["EnforceChirality"]
  383     UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"]
  384     ETVersion = OptionsInfo["ETVersion"]
  385     UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"]
  386     UseTethers = OptionsInfo["UseTethers"]
  387 
  388     CalcEnergyMap = {}
  389     MolConfsMap = {}
  390     CalcRMSDMap = {}
  391     
  392     ScaffoldEmbedRMSDMap = {}
  393     
  394     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
  395     for ConfID in ConfIDs:
  396         try:
  397             MolConf = Chem.Mol(Mol)
  398             AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion)
  399         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException)  as ErrMsg:
  400             if not OptionsInfo["QuietMode"]:
  401                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
  402                 MiscUtil.PrintWarning("Constrained embedding coupldn't  be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg))
  403             return (None, False, None, None, None)
  404         
  405         EnergyStatus, Energy = GetEnergy(MolConf)
  406         
  407         if not EnergyStatus:
  408             if not OptionsInfo["QuietMode"]:
  409                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
  410                 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName))
  411             return (None, False, None, None, None)
  412         
  413         CalcEnergyMap[ConfID] = Energy
  414         
  415         if OptionsInfo["ScaffoldRMSDOut"]:
  416             ScaffoldEmbedRMSDMap[ConfID] = "%.4f" % float(MolConf.GetProp('EmbedRMS'))
  417         MolConf.ClearProp('EmbedRMS')
  418         
  419         if  OptionsInfo["RemoveHydrogens"]:
  420             MolConf = Chem.RemoveHs(MolConf)
  421         MolConfsMap[ConfID] = MolConf
  422 
  423     # Sort conformers by energy...
  424     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
  425     
  426     MinEnergyConfID = SortedConfIDs[0]
  427     EnergyWindow = OptionsInfo["EnergyWindow"]
  428     
  429     MinConfEnergy = CalcEnergyMap[MinEnergyConfID]
  430     MinEnergyMolConf = MolConfsMap[MinEnergyConfID]
  431     
  432     # Calculate RMSD values for conformers...
  433     CalcRMSDMap = {}
  434     
  435     EnergyRMSDCutoff = OptionsInfo["EnergyRMSDCutoff"]
  436     ApplyEnergyRMSDCutoff = True  if EnergyRMSDCutoff > 0 else False
  437 
  438     if ApplyEnergyRMSDCutoff:
  439         FirstConf = True
  440         RefMolConf = None
  441         for ConfID in SortedConfIDs:
  442             if FirstConf:
  443                 FirstConf = False
  444                 RefMolConf = MolConfsMap[ConfID]
  445                 CalcRMSDMap[ConfID] = 0.0
  446                 continue
  447                 
  448             # Make a copy for probe molecule. It gets updated during the calculation...
  449             ProbeMolConf = Chem.Mol(MolConfsMap[ConfID])
  450             RMSD = rdMolAlign.AlignMol(ProbeMolConf, MinEnergyMolConf)
  451             CalcRMSDMap[ConfID] = RMSD
  452 
  453     # Track conformers with in the specified energy window  from the lowest
  454     # energy conformation along with applying RMSD cutoff as needed...
  455     #
  456     SelectedConfIDs = []
  457     
  458     ConfCount = 0
  459     IgnoredByEnergyConfCount = 0
  460     IgnoredByRMSDConfCount = 0
  461     
  462     FirstConf = True
  463     for ConfID in SortedConfIDs:
  464         if FirstConf:
  465             ConfCount += 1
  466             FirstConf = False
  467             SelectedConfIDs.append(ConfID)
  468             continue
  469             
  470         ConfEnergyDiff = abs(CalcEnergyMap[ConfID] - MinConfEnergy )
  471         if  ConfEnergyDiff > EnergyWindow:
  472             IgnoredByEnergyConfCount += 1
  473             continue
  474         
  475         if ApplyEnergyRMSDCutoff:
  476             if CalcRMSDMap[ConfID] < EnergyRMSDCutoff:
  477                 IgnoredByRMSDConfCount += 1
  478                 continue
  479 
  480         ConfCount += 1
  481         SelectedConfIDs.append(ConfID)
  482     
  483     if not OptionsInfo["QuietMode"]:
  484         MolName = RDKitUtil.GetMolName(Mol, MolNum)
  485         MiscUtil.PrintInfo("\nTotal Number of conformations generated for %s: %d" % (MolName, ConfCount))
  486         MiscUtil.PrintInfo("Number of conformations ignored due to energy window cutoff: %d" % (IgnoredByEnergyConfCount))
  487         if ApplyEnergyRMSDCutoff:
  488             MiscUtil.PrintInfo("Number of conformations ignored due to energy RMSD cutoff:  %d" % (IgnoredByRMSDConfCount))
  489 
  490     # Setup selected conformer molecules...
  491     SelectedConfMols = [MolConfsMap[ConfID] for ConfID in SelectedConfIDs]
  492     
  493     # Setup selected conformer energy values...
  494     SelectedConfEnergyValues = None
  495     if OptionsInfo["EnergyOut"]:
  496         SelectedConfEnergyValues = ["%.2f" % CalcEnergyMap[ConfID] for ConfID in SelectedConfIDs]
  497         
  498     # Setup selected conformer scaffold RMSD values...
  499     SelectedConfScaffoldEmbedRMSDValues = None
  500     if OptionsInfo["ScaffoldRMSDOut"]:
  501         SelectedConfScaffoldEmbedRMSDValues = []
  502         for ConfID in SelectedConfIDs:
  503             SelectedConfScaffoldEmbedRMSDValues.append(ScaffoldEmbedRMSDMap[ConfID])
  504     
  505     return (SelectedConfMols, True, SelectedConfIDs, SelectedConfEnergyValues, SelectedConfScaffoldEmbedRMSDValues)
  506 
  507 def GetEnergy(Mol, ConfID = None):
  508     """Calculate energy."""
  509 
  510     Status = True
  511     Energy = None
  512 
  513     if ConfID is None:
  514         ConfID = -1
  515     
  516     if OptionsInfo["UseUFF"]:
  517         UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID)
  518         if UFFMoleculeForcefield is None:
  519             Status = False
  520         else:
  521             Energy = UFFMoleculeForcefield.CalcEnergy()
  522     elif OptionsInfo["UseMMFF"]:
  523         MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"])
  524         MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID)
  525         if MMFFMoleculeForcefield is None:
  526             Status = False
  527         else:
  528             Energy = MMFFMoleculeForcefield.CalcEnergy()
  529     else:
  530         MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
  531     
  532     return (Status, Energy)
  533     
  534 def WriteMolConformers(Writer, Mol, MolNum, ConfMols, ConfIDs, ConfEnergyValues = None, ConfScaffoldEmbedRMSDValues = None):
  535     """Write molecule coformers."""
  536 
  537     if ConfMols is None:
  538         return
  539 
  540     for Index, ConfMol in enumerate(ConfMols):
  541         ConfMolName = RDKitUtil.GetMolName(Mol, MolNum)
  542         SetConfMolName(ConfMol, ConfMolName, ConfIDs[Index])
  543         
  544         if ConfScaffoldEmbedRMSDValues is not None:
  545             ConfMol.SetProp("CoreScaffoldEmbedRMSD", ConfScaffoldEmbedRMSDValues[Index])
  546             
  547         if ConfEnergyValues is not None:
  548             ConfMol.SetProp(OptionsInfo["EnergyLabel"], ConfEnergyValues[Index])
  549     
  550         Writer.write(ConfMol)
  551     
  552 def SetConfMolName(Mol, MolName, ConfCount):
  553     """Set conf mol name."""
  554 
  555     ConfName = "%s_Conf%d" % (MolName, ConfCount)
  556     Mol.SetProp("_Name", ConfName)
  557 
  558 def ProcessMCSParameters():
  559     """Set up and process MCS parameters."""
  560 
  561     SetupMCSParameters()
  562     ProcessSpecifiedMCSParameters()
  563 
  564 def SetupMCSParameters():
  565     """Set up default MCS parameters."""
  566     
  567     OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0}
  568     
  569 def ProcessSpecifiedMCSParameters():
  570     """Process specified MCS parameters."""
  571 
  572     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
  573         # Nothing to process...
  574         return
  575     
  576     # Parse specified parameters...
  577     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
  578     if not MCSParams:
  579         MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.")
  580 
  581     MCSParamsWords = MCSParams.split(",")
  582     if len(MCSParamsWords) % 2:
  583         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords)))
  584     
  585     # Setup  canonical parameter names...
  586     ValidParamNames = []
  587     CanonicalParamNamesMap = {}
  588     for ParamName in sorted(OptionsInfo["MCSParams"]):
  589         ValidParamNames.append(ParamName)
  590         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  591 
  592     # Validate and set paramater names and value...
  593     for Index in range(0, len(MCSParamsWords), 2):
  594         Name = MCSParamsWords[Index]
  595         Value = MCSParamsWords[Index + 1]
  596 
  597         CanonicalName = Name.lower()
  598         if  not CanonicalName in CanonicalParamNamesMap:
  599             MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name,  " ".join(ValidParamNames)))
  600 
  601         ParamName = CanonicalParamNamesMap[CanonicalName]
  602         if re.match("^Threshold$", ParamName, re.I):
  603             Value = float(Value)
  604             if Value <= 0.0 or Value > 1.0 :
  605                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name))
  606             ParamValue = Value
  607         elif re.match("^Timeout$", ParamName, re.I):
  608             Value = int(Value)
  609             if Value <= 0:
  610                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name))
  611             ParamValue = Value
  612         elif re.match("^MinNumAtoms$", ParamName, re.I):
  613             Value = int(Value)
  614             if Value < 1:
  615                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name))
  616             ParamValue = Value
  617         elif re.match("^MinNumBonds$", ParamName, re.I):
  618             Value = int(Value)
  619             if Value < 0:
  620                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name))
  621             ParamValue = Value
  622         elif re.match("^AtomCompare$", ParamName, re.I):
  623             if re.match("^CompareAny$", Value, re.I):
  624                 ParamValue = rdFMCS.AtomCompare.CompareAny
  625             elif re.match("^CompareElements$", Value, re.I):
  626                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
  627             elif re.match("^CompareIsotopes$", Value, re.I):
  628                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
  629             else:
  630                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name))
  631         elif re.match("^BondCompare$", ParamName, re.I):
  632             if re.match("^CompareAny$", Value, re.I):
  633                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
  634             elif re.match("^CompareOrder$", Value, re.I):
  635                 ParamValue = rdFMCS.BondCompare.CompareOrder
  636             elif re.match("^CompareOrderExact$", Value, re.I):
  637                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
  638             else:
  639                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name))
  640         elif re.match("^SeedSMARTS$", ParamName, re.I):
  641             if not len(Value):
  642                 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option  for parameter, %s, is empty. " % (Name))
  643             ParamValue = Value
  644         else:
  645             if not re.match("^(Yes|No|True|False)$", Value, re.I):
  646                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name))
  647             ParamValue = False
  648             if re.match("^(Yes|True)$", Value, re.I):
  649                 ParamValue = True
  650         
  651         # Set value...
  652         OptionsInfo["MCSParams"][ParamName] = ParamValue
  653 
  654 def ProcesssConformerGeneratorOption():
  655     """Process comformer generator option. """
  656     
  657     ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"])
  658     
  659     OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"]
  660     OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"]
  661     OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"]
  662     OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"]
  663 
  664 def ProcessOptions():
  665     """Process and validate command line arguments and options."""
  666     
  667     MiscUtil.PrintInfo("Processing options...")
  668     
  669     # Validate options...
  670     ValidateOptions()
  671 
  672     OptionsInfo["Infile"] = Options["--infile"]
  673     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
  674     
  675     OptionsInfo["RefFile"] = Options["--reffile"]
  676 
  677     OptionsInfo["Scaffold"] = Options["--scaffold"]
  678     if re.match("^auto$", Options["--scaffold"], re.I):
  679         UseScaffoldMCS = True
  680         UseScaffoldSMARTS = False
  681         ScaffoldSMARTS = None
  682     else:
  683         UseScaffoldMCS = False
  684         UseScaffoldSMARTS = True
  685         ScaffoldSMARTS = OptionsInfo["Scaffold"]
  686     
  687     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
  688     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
  689     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
  690     OptionsInfo["ScaffoldPatternMol"] = None
  691 
  692     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
  693     ProcessMCSParameters()
  694     
  695     OptionsInfo["Outfile"] = Options["--outfile"]
  696     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
  697     
  698     OptionsInfo["Overwrite"] = Options["--overwrite"]
  699 
  700     OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
  701     
  702     ProcesssConformerGeneratorOption()
  703     
  704     if re.match("^UFF$", Options["--forceField"], re.I):
  705         ForceField = "UFF"
  706         UseUFF = True
  707         UseMMFF = False
  708     elif re.match("^MMFF$", Options["--forceField"], re.I):
  709         ForceField = "MMFF"
  710         UseUFF = False
  711         UseMMFF = True
  712     else:
  713         MiscUtil.PrintError("The value, %s, specified for \"--forceField\" is not supported." % (Options["--forceField"],))
  714     
  715     MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s"
  716     
  717     OptionsInfo["ForceField"] = ForceField
  718     OptionsInfo["MMFFVariant"] = MMFFVariant
  719     OptionsInfo["UseMMFF"] = UseMMFF
  720     OptionsInfo["UseUFF"] = UseUFF
  721     
  722     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
  723     
  724     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
  725     if UseMMFF:
  726         OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant
  727     else:
  728         OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField
  729     
  730     OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False
  731     EnergyRMSDCutoff = -1.0
  732     if not re.match("^none$", Options["--energyRMSDCutoff"], re.I):
  733         EnergyRMSDCutoff = float(Options["--energyRMSDCutoff"])
  734     OptionsInfo["EnergyRMSDCutoff"] = EnergyRMSDCutoff
  735     
  736     OptionsInfo["EnergyWindow"] = float(Options["--energyWindow"])
  737     
  738     OptionsInfo["MaxConfs"] = int(Options["--maxConfs"])
  739     
  740     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
  741     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
  742     
  743     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
  744     
  745     OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False
  746     OptionsInfo["UseTethers"] = True if re.match("^yes$", Options["--useTethers"], re.I) else False
  747 
  748 def RetrieveOptions():
  749     """Retrieve command line arguments and options."""
  750     
  751     # Get options...
  752     global Options
  753     Options = docopt(_docoptUsage_)
  754     
  755     # Set current working directory to the specified directory...
  756     WorkingDir = Options["--workingdir"]
  757     if WorkingDir:
  758         os.chdir(WorkingDir)
  759     
  760     # Handle examples option...
  761     if "--examples" in Options and Options["--examples"]:
  762         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
  763         sys.exit(0)
  764 
  765 def ValidateOptions():
  766     """Validate option values."""
  767     
  768     MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no")
  769     MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG KDG ETDG ETKDG ETKDGv2")
  770     
  771     MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF")
  772     MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s")
  773     
  774     MiscUtil.ValidateOptionFloatValue("--energyWindow", Options["--energyWindow"], {">": 0.0})
  775     if not re.match("^none$", Options["--energyRMSDCutoff"], re.I):
  776         MiscUtil.ValidateOptionFloatValue("--energyRMSDCutoff", Options["--energyRMSDCutoff"], {">": 0.0})
  777     
  778     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
  779     
  780     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
  781     MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no")
  782     
  783     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
  784     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
  785 
  786     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
  787     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
  788     
  789     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
  790     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
  791     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
  792         
  793     MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0})
  794     
  795     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
  796     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
  797     
  798     MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no")
  799     
  800     MiscUtil.ValidateOptionTextValue("-u, --useTethers", Options["--useTethers"], "yes no")
  801 
  802 # Setup a usage string for docopt...
  803 _docoptUsage_ = """
  804 RDKitGenerateConstrainedConformers.py - Generate constrained molecular conformations
  805 
  806 Usage:
  807     RDKitGenerateConstrainedConformers.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG, ETKDG, ETKDGv2>]
  808                                           [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>]
  809                                           [--energyOut  <yes or no>] [--enforceChirality <yes or no>]  [--energyRMSDCutoff <number>]
  810                                           [--energyWindow <number> ] [--infileParams <Name,Value,...>] [--maxConfs <number>]
  811                                           [--mcsParams <Name,Value,...>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
  812                                           [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>]
  813                                           [--scaffold <auto or SMARTS>]  [--scaffoldRMSDOut  <yes or no>] [--useTethers  <yes or no>] 
  814                                           [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
  815     RDKitGenerateConstrainedConformers.py -h | --help | -e | --examples
  816 
  817 Description:
  818     Generate molecular conformations  by performing a constrained energy minimization
  819     against a reference molecule. An initial set of 3D conformers are generated for the input
  820     molecules using distance geometry. A common core scaffold, corresponding to
  821     a Maximum Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
  822     between a pair of input and reference molecules. The core scaffold atoms in input
  823     molecules are aligned against the same atoms in the reference molecule. The energy
  824     of aligned structures are minimized using the forcefield to generate the final 3D structures.
  825     
  826     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
  827     .csv, .tsv, .txt)
  828 
  829     The supported output file formats are: SD (.sdf, .sd)
  830 
  831 Options:
  832     -a, --addHydrogens <yes or no>  [default: yes]
  833         Add hydrogens before minimization.
  834     -c, --conformerGenerator <text>  [default: ETKDGv2]
  835         Conformation generation methodology for generating initial 3D coordinates
  836         for molecules in input file. A common core scaffold is identified between a
  837         a pair of input and reference molecules. The atoms in common core scaffold 
  838         of input molecules are aligned against the reference molecule followed by
  839         energy minimization to generate final 3D structure.
  840         
  841         The possible values along with a brief description are shown below:
  842             
  843             SDG: Standard Distance Geometry
  844             KDG: basic Knowledge-terms with Distance Geometry
  845             ETDG: Experimental Torsion-angle preference with Distance Geometry
  846             ETKDG: Experimental Torsion-angle preference along with basic
  847                 Knowledge-terms and Distance Geometry [Ref 129]
  848             ETKDGv2: Experimental Torsion-angle preference along with basic
  849                 Knowledge-terms and Distance Geometry [Ref 167]
  850     -f, --forceField <UFF, MMFF>  [default: MMFF]
  851         Forcefield method to use for  constrained energy minimization. Possible values:
  852         Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force
  853         Field [ Ref 83-87 ] .
  854     --forceFieldMMFFVariant <MMFF94 or MMFF94s>  [default: MMFF94]
  855         Variant of MMFF forcefield to use for energy minimization.
  856     --energyOut <yes or no>  [default: No]
  857         Write out energy values.
  858     --enforceChirality <yes or no>  [default: Yes]
  859         Enforce chirality for defined chiral centers.
  860     --energyRMSDCutoff <number>  [default: 0.5]
  861         RMSD cutoff for retaining conformations after embedding and energy minimization.
  862         Possible values: A number or None
  863         
  864         The default is to keep only those conformations which are different from the
  865         lowest energy conformation by the specified RMSD cutoff. The None value may
  866         be used to keep all minimized conformations with in the specified energy window
  867         from the lowest energy conformation. The lowest energy conformation is always
  868         retained.
  869     --energyWindow <number>  [default: 20]
  870         Energy window in kcal/mol for selecting conformers.
  871     -e, --examples
  872         Print examples.
  873     -h, --help
  874         Print this help message.
  875     -i, --infile <infile>
  876         Input file name.
  877     --infileParams <Name,Value,...>  [default: auto]
  878         A comma delimited list of parameter name and value pairs for reading
  879         molecules from files. The supported parameter names for different file
  880         formats, along with their default values, are shown below:
  881             
  882             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
  883             
  884             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
  885                 smilesTitleLine,auto,sanitize,yes
  886             
  887         Possible values for smilesDelimiter: space, comma or tab.
  888     --maxConfs <number>  [default: 50]
  889         Maximum number of conformations to generate for each molecule by conformation
  890         generation methodology for initial 3D coordinates. A constrained minimization is
  891         performed using the specified forcefield and the lowest energy conformation is written
  892         to the output file.
  893     --mcsParams <Name,Value,...>  [default: auto]
  894         Parameter values to use for identifying a maximum common substructure
  895         (MCS) in between a pair of reference and input molecules.In general, it is a
  896         comma delimited list of parameter name and value pairs. The supported
  897         parameter names along with their default values are shown below:
  898             
  899             atomCompare,CompareElements,bondCompare,CompareOrder,
  900             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
  901             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
  902             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
  903             
  904         Possible values for atomCompare: CompareAny, CompareElements,
  905         CompareIsotopes. Possible values for bondCompare: CompareAny,
  906         CompareOrder, CompareOrderExact.
  907         
  908         A brief description of MCS parameters taken from RDKit documentation is
  909         as follows:
  910             
  911             atomCompare - Controls match between two atoms
  912             bondCompare - Controls match between two bonds
  913             maximizeBonds - Maximize number of bonds instead of atoms
  914             matchValences - Include atom valences in the MCS match
  915             matchChiralTag - Include atom chirality in the MCS match
  916             minNumAtoms - Minimum number of atoms in the MCS match
  917             minNumBonds - Minimum number of bonds in the MCS match
  918             ringMatchesRingOnly - Ring bonds only match other ring bonds
  919             completeRingsOnly - Partial rings not allowed during the match
  920             threshold - Fraction of the dataset that must contain the MCS
  921             seedSMARTS - SMARTS string as the seed of the MCS
  922             timeout - Timeout for the MCS calculation in seconds
  923             
  924     --mp <yes or no>  [default: no]
  925         Use multiprocessing.
  926          
  927         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
  928         function employing lazy RDKit data iterable. This allows processing of
  929         arbitrary large data sets without any additional requirements memory.
  930         
  931         All input data may be optionally loaded into memory by mp.Pool.map()
  932         before starting worker processes in a process pool by setting the value
  933         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
  934         
  935         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
  936         data mode may adversely impact the performance. The '--mpParams' section
  937         provides additional information to tune the value of 'chunkSize'.
  938     --mpParams <Name,Value,...>  [default: auto]
  939         A comma delimited list of parameter name and value pairs to configure
  940         multiprocessing.
  941         
  942         The supported parameter names along with their default and possible
  943         values are shown below:
  944         
  945             chunkSize, auto
  946             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
  947             numProcesses, auto   [ Default: mp.cpu_count() ]
  948         
  949         These parameters are used by the following functions to configure and
  950         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
  951         mp.Pool.imap().
  952         
  953         The chunkSize determines chunks of input data passed to each worker
  954         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
  955         The default value of chunkSize is dependent on the value of 'inputDataMode'.
  956         
  957         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
  958         automatically converts RDKit data iterable into a list, loads all data into
  959         memory, and calculates the default chunkSize using the following method
  960         as shown in its code:
  961         
  962             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
  963             if extra: chunkSize += 1
  964         
  965         For example, the default chunkSize will be 7 for a pool of 4 worker processes
  966         and 100 data items.
  967         
  968         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
  969         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
  970         data into memory. Consequently, the size of input data is not known a priori.
  971         It's not possible to estimate an optimal value for the chunkSize. The default 
  972         chunkSize is set to 1.
  973         
  974         The default value for the chunkSize during 'Lazy' data mode may adversely
  975         impact the performance due to the overhead associated with exchanging
  976         small chunks of data. It is generally a good idea to explicitly set chunkSize to
  977         a larger value during 'Lazy' input data mode, based on the size of your input
  978         data and number of processes in the process pool.
  979         
  980         The mp.Pool.map() function waits for all worker processes to process all
  981         the data and return the results. The mp.Pool.imap() function, however,
  982         returns the the results obtained from worker processes as soon as the
  983         results become available for specified chunks of data.
  984         
  985         The order of data in the results returned by both mp.Pool.map() and 
  986         mp.Pool.imap() functions always corresponds to the input data.
  987     -o, --outfile <outfile>
  988         Output file name.
  989     --outfileParams <Name,Value,...>  [default: auto]
  990         A comma delimited list of parameter name and value pairs for writing
  991         molecules to files. The supported parameter names for different file
  992         formats, along with their default values, are shown below:
  993             
  994             SD: kekulize,yes,forceV3000,no
  995             
  996     --overwrite
  997         Overwrite existing files.
  998     -q, --quiet <yes or no>  [default: no]
  999         Use quiet mode. The warning and information messages will not be printed.
 1000     -r, --reffile <reffile>
 1001         Reference input file name containing a 3D reference molecule. A common
 1002         core scaffold must be present in a pair of an input and reference molecules.
 1003         Otherwise, no constrained minimization is performed on the input molecule.
 1004     --removeHydrogens <yes or no>  [default: Yes]
 1005         Remove hydrogens after minimization.
 1006     -s, --scaffold <auto or SMARTS>  [default: auto]
 1007         Common core scaffold between a pair of input and reference molecules used for
 1008         constrained minimization of molecules in input file. Possible values: Auto or a
 1009         valid SMARTS pattern. The common core scaffold is automatically detected
 1010         corresponding to the Maximum Common Substructure (MCS) between a pair of
 1011         reference and input molecules. A valid SMARTS pattern may be optionally specified
 1012         for the common core scaffold.
 1013     --scaffoldRMSDOut <yes or no>  [default: No]
 1014         Write out RMSD value for common core alignment between a pair of input and
 1015         reference molecules.
 1016     -u, --useTethers <yes or no>  [default: yes]
 1017         Use tethers to optimize the final conformation by applying a series of extra forces
 1018         to align matching atoms to the positions of the core atoms. Otherwise, use simple
 1019         distance constraints during the optimization.
 1020     -w, --workingdir <dir>
 1021         Location of working directory which defaults to the current directory.
 1022 
 1023 Examples:
 1024     To generate conformers by performing constrained energy minimization for molecules
 1025     in a SMILES file against a reference 3D molecule in a SD file using a common core
 1026     scaffold between pairs of input and reference molecules identified using MCS,
 1027     generating up to 50 conformations using ETKDG methodology followed by MMFF
 1028     forcefield minimization within energy window of 20 kcal/mol and RMSD of greater
 1029     than 0.5 from the lowest energy conformation, and write out a SD file:
 1030 
 1031         % RDKitGenerateConstrainedConformers.py  -i SampleSeriesD3R.smi
 1032           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1033 
 1034     To rerun the first example in a quiet mode and write out a SD file, type:
 1035 
 1036         % RDKitGenerateConstrainedConformers.py  -q yes -i SampleSeriesD3R.smi
 1037           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1038 
 1039     To rerun the first example in multiprocessing mode on all available CPUs
 1040     without loading all data into memory and write out a SD file, type:
 1041 
 1042         % RDKitGenerateConstrainedConformers.py  --mp yes -i SampleSeriesD3R.smi
 1043           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1044 
 1045     To run the first example in multiprocessing mode on all available CPUs
 1046     by loading all data into memory and write out a SD file, type:
 1047 
 1048         % RDKitGenerateConstrainedConformers.py  --mp yes --mpParams
 1049           "inputDataMode,InMemory" -i SampleSeriesD3R.smi
 1050           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1051 
 1052     To rerun the first example in multiprocessing mode on specific number of
 1053     CPUs and chunk size without loading all data into memory and write out a SD file,
 1054     type:
 1055 
 1056         % RDKitGenerateConstrainedConformers.py  --mp yes --mpParams
 1057           "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
 1058           -i SampleSeriesD3R.smi -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1059 
 1060     To rerun the first example using an explicit SMARTS string for a common core
 1061     scaffold and write out a SD file, type:
 1062 
 1063         % RDKitGenerateConstrainedConformers.py  --scaffold
 1064           "c2cc(-c3nc(N)ncc3)cn2" -i SampleSeriesD3R.smi
 1065           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 1066 
 1067     To rerun the first example using molecules in a CSV SMILES file, SMILES
 1068     strings in column 1, name in column2, and write out a SD file, type:
 1069 
 1070         % RDKitGenerateConstrainedConformers.py  --infileParams
 1071           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 1072           smilesNameColumn,2" -i SampleSeriesD3R.csv -r SampleSeriesRef3D.sdf 
 1073           -o SampleOut.sdf
 1074 
 1075     To generate constrained conformers for molecules in a SD file against a reference
 1076     3D molecule in a SD file using a common core scaffold between pairs of input and
 1077     reference molecules identified using MCS, generating up to 10 conformations
 1078     using SDG methodology followed by UFF forcefield minimization, conformations 
 1079     with in an energy window of 10 kcal/mol and RMSD of greater that 1, and write out
 1080     a SD file containing minimum energy structure along with energy and embed RMS
 1081     values corresponding to each constrained molecule, type:
 1082 
 1083         % RDKitGenerateConstrainedConformers.py  --maxConfs 10  -c SDG -f UFF
 1084           --scaffoldRMSDOut yes --energyOut yes --energyRMSDCutoff 1.0
 1085           --energyWindow 10 -i SampleSeriesD3R.sdf -r SampleSeriesRef3D.sdf
 1086          -o SampleOut.sdf
 1087 
 1088 Author:
 1089     Manish Sud(msud@san.rr.com)
 1090 
 1091 See also:
 1092     RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py,
 1093     RDKitConvertFileFormat.py, RDKitGenerateConformers.py, RDKitPerformConstrainedMinimization.py
 1094 
 1095 Copyright:
 1096     Copyright (C) 2024 Manish Sud. All rights reserved.
 1097 
 1098     The functionality available in this script is implemented using RDKit, an
 1099     open source toolkit for cheminformatics developed by Greg Landrum.
 1100 
 1101     This file is part of MayaChemTools.
 1102 
 1103     MayaChemTools is free software; you can redistribute it and/or modify it under
 1104     the terms of the GNU Lesser General Public License as published by the Free
 1105     Software Foundation; either version 3 of the License, or (at your option) any
 1106     later version.
 1107 
 1108 """
 1109 
 1110 if __name__ == "__main__":
 1111     main()