MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitPerformConstrainedMinimization.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43     from rdkit.Chem import rdFMCS
  44 except ImportError as ErrMsg:
  45     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  46     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  47     sys.exit(1)
  48 
  49 # MayaChemTools imports...
  50 try:
  51     from docopt import docopt
  52     import MiscUtil
  53     import RDKitUtil
  54 except ImportError as ErrMsg:
  55     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  56     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  57     sys.exit(1)
  58 
  59 ScriptName = os.path.basename(sys.argv[0])
  60 Options = {}
  61 OptionsInfo = {}
  62 
  63 def main():
  64     """Start execution of the script."""
  65     
  66     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  67     
  68     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  69     
  70     # Retrieve command line arguments and options...
  71     RetrieveOptions()
  72     
  73     # Process and validate command line arguments and options...
  74     ProcessOptions()
  75     
  76     # Perform actions required by the script...
  77     PerformConstrainedMinimization()
  78     
  79     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  80     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  81 
  82 def PerformConstrainedMinimization():
  83     """Perform constrained minimization."""
  84     
  85     # Read and validate reference molecule...
  86     RefMol = RetrieveReferenceMolecule()
  87     
  88     # Setup a molecule reader for input file...
  89     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  90     OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
  91     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  92 
  93     # Set up a molecule writer...
  94     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  95     if Writer is None:
  96         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  97     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  98 
  99     MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer)
 100 
 101     if Writer is not None:
 102         Writer.close()
 103     
 104     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 105     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 106     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
 107     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
 108     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount))
 109 
 110 def ProcessMolecules(RefMol, Mols, Writer):
 111     """Process and minimize molecules."""
 112     
 113     if OptionsInfo["MPMode"]:
 114         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
 115     else:
 116         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
 117 
 118 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
 119     """Process and minimize molecules using a single process."""
 120 
 121     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
 122     
 123     for Mol in Mols:
 124         MolCount += 1
 125         
 126         if Mol is None:
 127             continue
 128         
 129         if RDKitUtil.IsMolEmpty(Mol):
 130             if not OptionsInfo["QuietMode"]:
 131                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 132                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 133             continue
 134         ValidMolCount += 1
 135 
 136         # Setup a reference molecule core containing common scaffold atoms...
 137         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
 138         if RefMolCore is None:
 139             CoreScaffoldMissingCount += 1
 140             continue
 141             
 142         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolCount)
 143         
 144         if not CalcStatus:
 145             MinimizationFailedCount += 1
 146             continue
 147         
 148         WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
 149 
 150     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
 151 
 152 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
 153     """Process and minimize molecules using multiprocessing."""
 154 
 155     MPParams = OptionsInfo["MPParams"]
 156     
 157     # Setup data for initializing a worker process...
 158     MiscUtil.PrintInfo("Encoding options info and reference molecule...")
 159     
 160     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
 161     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 162     
 163     # Setup a encoded mols data iterable for a worker process...
 164     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 165 
 166     # Setup process pool along with data initialization for each process...
 167     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 168     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 169     
 170     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 171     
 172     # Start processing...
 173     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 174         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 175     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 176         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 177     else:
 178         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 179     
 180     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
 181     for Result in Results:
 182         MolCount += 1
 183         MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD  = Result
 184         
 185         if EncodedMol is None:
 186             continue
 187         ValidMolCount += 1
 188 
 189         if CoreScaffoldMissingStatus:
 190             CoreScaffoldMissingStatus += 1
 191             continue
 192         
 193         if not CalcStatus:
 194             MinimizationFailedCount += 1
 195             continue
 196             
 197         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 198         WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
 199     
 200     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
 201     
 202 def InitializeWorkerProcess(*EncodedArgs):
 203     """Initialize data for a worker process."""
 204     
 205     global Options, OptionsInfo
 206 
 207     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 208 
 209     # Decode Options and OptionInfo...
 210     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 211     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 212 
 213     # Decode RefMol...
 214     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
 215     
 216 def WorkerProcess(EncodedMolInfo):
 217     """Process data for a worker process."""
 218 
 219     MolIndex, EncodedMol = EncodedMolInfo
 220 
 221     CoreScaffoldMissingStatus = False
 222     CalcStatus = False
 223     Energy = None
 224     ScaffoldEmbedRMSD = None
 225     
 226     if EncodedMol is None:
 227         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
 228 
 229     RefMol = OptionsInfo["RefMol"]
 230     
 231     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 232     if RDKitUtil.IsMolEmpty(Mol):
 233         if not OptionsInfo["QuietMode"]:
 234             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 235             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 236         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
 237     
 238     # Setup a reference molecule core containing common scaffold atoms...
 239     RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1))
 240     if RefMolCore is None:
 241         CoreScaffoldMissingStatus = True
 242         return [MolIndex, None, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
 243     
 244     Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, (MolIndex + 1))
 245 
 246     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
 247 
 248 def RetrieveReferenceMolecule():
 249     """Retrieve and validate reference molecule."""
 250     
 251     RefFile = OptionsInfo["RefFile"]
 252     
 253     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
 254     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
 255     ValidRefMols, RefMolCount, ValidRefMolCount  = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"])
 256     
 257     if ValidRefMolCount == 0:
 258         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
 259     elif ValidRefMolCount > 1:
 260         MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount))
 261     
 262     RefMol = ValidRefMols[0]
 263 
 264     if OptionsInfo["UseScaffoldSMARTS"]:
 265         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
 266         if ScaffoldPatternMol is None:
 267             MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"]))
 268         
 269         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
 270             MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"]))
 271             
 272     return RefMol
 273 
 274 def SetupCoreScaffold(RefMol, Mol, MolCount):
 275     """Setup a reference molecule core containing common scaffold atoms between
 276     a pair of molecules."""
 277 
 278     if OptionsInfo["UseScaffoldMCS"]:
 279         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
 280     elif OptionsInfo["UseScaffoldSMARTS"]:
 281         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
 282     else:
 283         MiscUtil.PrintError("The  value, %s, specified for  \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"]))
 284         
 285 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
 286     """Setup a reference molecule core containing common scaffold atoms between
 287     a pair of molecules using MCS."""
 288     
 289     MCSParams = OptionsInfo["MCSParams"]
 290     Mols = [RefMol, Mol]
 291 
 292     MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 
 293     
 294     if MCSResultObject.canceled:
 295         if not OptionsInfo["QuietMode"]:
 296             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
 297         return None
 298     
 299     CoreNumAtoms = MCSResultObject.numAtoms
 300     CoreNumBonds = MCSResultObject.numBonds
 301     
 302     SMARTSCore = MCSResultObject.smartsString
 303     
 304     if not len(SMARTSCore):
 305         if not OptionsInfo["QuietMode"]:
 306             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
 307         return None
 308         
 309     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
 310         if not OptionsInfo["QuietMode"]:
 311             MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in  \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"]))
 312         return None
 313     
 314     if CoreNumBonds < MCSParams["MinNumBonds"]:
 315         if not OptionsInfo["QuietMode"]:
 316             MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in  \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"]))
 317         return None
 318 
 319     return GenerateCoreMol(RefMol, SMARTSCore)
 320     
 321 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
 322     """Setup a reference molecule core containing common scaffold atoms between
 323     a pair of molecules using specified SMARTS."""
 324     
 325     if OptionsInfo["ScaffoldPatternMol"] is None:
 326         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
 327         
 328     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
 329         if not OptionsInfo["QuietMode"]:
 330             MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule,  %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount)))
 331         return None
 332 
 333     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
 334 
 335 def GenerateCoreMol(RefMol, SMARTSCore):
 336     """Generate core molecule for embedding."""
 337 
 338     # Create a molecule corresponding to core atoms...
 339     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
 340 
 341     # Setup a ref molecule containing core atoms with dummy atoms as
 342     # attachment points for atoms around the core atoms...
 343     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
 344 
 345     # Delete any substructures containing dummy atoms..
 346     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*'))
 347     RefMolCore.UpdatePropertyCache()
 348     
 349     return RefMolCore
 350 
 351 def ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolNum = None):
 352     "Constrain and minimize molecule."
 353 
 354     if  OptionsInfo["AddHydrogens"]:
 355         Mol = Chem.AddHs(Mol, addCoords = True)
 356 
 357     # Setup forcefield function to use for constrained minimization...
 358     ForceFieldFunction = None
 359     ForceFieldName = None
 360     if OptionsInfo["UseUFF"]:
 361         ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId)
 362         ForeceFieldName = "UFF"
 363     else:
 364         ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["MMFFVariant"]) , confId = confId)
 365         ForeceFieldName = "MMFF"
 366 
 367     if ForceFieldFunction is None:
 368         if not OptionsInfo["QuietMode"]:
 369             MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum)))
 370         return (None, False, None, None)
 371     
 372     MaxConfs = OptionsInfo["MaxConfs"]
 373     EnforceChirality = OptionsInfo["EnforceChirality"]
 374     UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"]
 375     ETVersion = OptionsInfo["ETVersion"]
 376     UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"]
 377     UseTethers = OptionsInfo["UseTethers"]
 378 
 379     CalcEnergyMap = {}
 380     MolConfsMap = {}
 381     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
 382 
 383     for ConfID in ConfIDs:
 384         try:
 385             MolConf = Chem.Mol(Mol)
 386             AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion)
 387         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException)  as ErrMsg:
 388             if not OptionsInfo["QuietMode"]:
 389                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 390                 MiscUtil.PrintWarning("Constrained embedding couldn't  be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg))
 391             return (None, False, None, None)
 392         
 393         EnergyStatus, Energy = GetEnergy(MolConf)
 394         
 395         if not EnergyStatus:
 396             if not OptionsInfo["QuietMode"]:
 397                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 398                 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName))
 399             return (None, False, None, None)
 400         
 401         CalcEnergyMap[ConfID] = Energy
 402         MolConfsMap[ConfID] = MolConf
 403 
 404     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
 405     MinEnergyConfID = SortedConfIDs[0]
 406     
 407     MinEnergy = "%.2f" % CalcEnergyMap[MinEnergyConfID]  if OptionsInfo["EnergyOut"] else None
 408     MinEnergyMolConf = MolConfsMap[MinEnergyConfID]
 409     
 410     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None
 411     MinEnergyMolConf.ClearProp('EmbedRMS')
 412     
 413     if  OptionsInfo["RemoveHydrogens"]:
 414         MinEnergyMolConf = Chem.RemoveHs(MinEnergyMolConf)
 415         
 416     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
 417 
 418 def GetEnergy(Mol, ConfID = None):
 419     """Calculate energy."""
 420 
 421     Status = True
 422     Energy = None
 423 
 424     if ConfID is None:
 425         ConfID = -1
 426     
 427     if OptionsInfo["UseUFF"]:
 428         UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID)
 429         if UFFMoleculeForcefield is None:
 430             Status = False
 431         else:
 432             Energy = UFFMoleculeForcefield.CalcEnergy()
 433     elif OptionsInfo["UseMMFF"]:
 434         MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"])
 435         MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID)
 436         if MMFFMoleculeForcefield is None:
 437             Status = False
 438         else:
 439             Energy = MMFFMoleculeForcefield.CalcEnergy()
 440     else:
 441         MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 442     
 443     return (Status, Energy)
 444     
 445 def WriteMolecule(Writer, Mol, Energy = None, ScaffoldEmbedRMSD = None, ConfID = None,):
 446     """Write molecule."""
 447 
 448     if ScaffoldEmbedRMSD is not None:
 449         Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
 450             
 451     if Energy is not None:
 452         Mol.SetProp(OptionsInfo["EnergyLabel"], Energy)
 453             
 454     if ConfID is None:
 455         Writer.write(Mol)
 456     else:
 457         Writer.write(Mol, confId = ConfID)
 458     
 459 def ProcessMCSParameters():
 460     """Set up and process MCS parameters."""
 461 
 462     SetupMCSParameters()
 463     ProcessSpecifiedMCSParameters()
 464 
 465 def SetupMCSParameters():
 466     """Set up default MCS parameters."""
 467     
 468     OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0}
 469     
 470 def ProcessSpecifiedMCSParameters():
 471     """Process specified MCS parameters."""
 472 
 473     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
 474         # Nothing to process...
 475         return
 476     
 477     # Parse specified parameters...
 478     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
 479     if not MCSParams:
 480         MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.")
 481 
 482     MCSParamsWords = MCSParams.split(",")
 483     if len(MCSParamsWords) % 2:
 484         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords)))
 485     
 486     # Setup  canonical parameter names...
 487     ValidParamNames = []
 488     CanonicalParamNamesMap = {}
 489     for ParamName in sorted(OptionsInfo["MCSParams"]):
 490         ValidParamNames.append(ParamName)
 491         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 492 
 493     # Validate and set paramater names and value...
 494     for Index in range(0, len(MCSParamsWords), 2):
 495         Name = MCSParamsWords[Index]
 496         Value = MCSParamsWords[Index + 1]
 497 
 498         CanonicalName = Name.lower()
 499         if  not CanonicalName in CanonicalParamNamesMap:
 500             MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name,  " ".join(ValidParamNames)))
 501 
 502         ParamName = CanonicalParamNamesMap[CanonicalName]
 503         if re.match("^Threshold$", ParamName, re.I):
 504             Value = float(Value)
 505             if Value <= 0.0 or Value > 1.0 :
 506                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name))
 507             ParamValue = Value
 508         elif re.match("^Timeout$", ParamName, re.I):
 509             Value = int(Value)
 510             if Value <= 0:
 511                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name))
 512             ParamValue = Value
 513         elif re.match("^MinNumAtoms$", ParamName, re.I):
 514             Value = int(Value)
 515             if Value < 1:
 516                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name))
 517             ParamValue = Value
 518         elif re.match("^MinNumBonds$", ParamName, re.I):
 519             Value = int(Value)
 520             if Value < 0:
 521                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name))
 522             ParamValue = Value
 523         elif re.match("^AtomCompare$", ParamName, re.I):
 524             if re.match("^CompareAny$", Value, re.I):
 525                 ParamValue = rdFMCS.AtomCompare.CompareAny
 526             elif re.match("^CompareElements$", Value, re.I):
 527                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
 528             elif re.match("^CompareIsotopes$", Value, re.I):
 529                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
 530             else:
 531                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name))
 532         elif re.match("^BondCompare$", ParamName, re.I):
 533             if re.match("^CompareAny$", Value, re.I):
 534                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
 535             elif re.match("^CompareOrder$", Value, re.I):
 536                 ParamValue = rdFMCS.BondCompare.CompareOrder
 537             elif re.match("^CompareOrderExact$", Value, re.I):
 538                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
 539             else:
 540                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name))
 541         elif re.match("^SeedSMARTS$", ParamName, re.I):
 542             if not len(Value):
 543                 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option  for parameter, %s, is empty. " % (Name))
 544             ParamValue = Value
 545         else:
 546             if not re.match("^(Yes|No|True|False)$", Value, re.I):
 547                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name))
 548             ParamValue = False
 549             if re.match("^(Yes|True)$", Value, re.I):
 550                 ParamValue = True
 551         
 552         # Set value...
 553         OptionsInfo["MCSParams"][ParamName] = ParamValue
 554 
 555 def ProcesssConformerGeneratorOption():
 556     """Process comformer generator option. """
 557     
 558     ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"])
 559     
 560     OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"]
 561     OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"]
 562     OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"]
 563     OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"]
 564 
 565 def ProcessOptions():
 566     """Process and validate command line arguments and options."""
 567     
 568     MiscUtil.PrintInfo("Processing options...")
 569     
 570     # Validate options...
 571     ValidateOptions()
 572     
 573     OptionsInfo["Infile"] = Options["--infile"]
 574     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 575     
 576     OptionsInfo["RefFile"] = Options["--reffile"]
 577 
 578     OptionsInfo["Scaffold"] = Options["--scaffold"]
 579     if re.match("^auto$", Options["--scaffold"], re.I):
 580         UseScaffoldMCS = True
 581         UseScaffoldSMARTS = False
 582         ScaffoldSMARTS = None
 583     else:
 584         UseScaffoldMCS = False
 585         UseScaffoldSMARTS = True
 586         ScaffoldSMARTS = OptionsInfo["Scaffold"]
 587     
 588     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
 589     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
 590     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
 591     OptionsInfo["ScaffoldPatternMol"] = None
 592 
 593     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
 594     ProcessMCSParameters()
 595     
 596     OptionsInfo["Outfile"] = Options["--outfile"]
 597     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 598     
 599     OptionsInfo["Overwrite"] = Options["--overwrite"]
 600 
 601     OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
 602     
 603     ProcesssConformerGeneratorOption()
 604 
 605     if re.match("^UFF$", Options["--forceField"], re.I):
 606         ForceField = "UFF"
 607         UseUFF = True
 608         UseMMFF = False
 609     elif re.match("^MMFF$", Options["--forceField"], re.I):
 610         ForceField = "MMFF"
 611         UseUFF = False
 612         UseMMFF = True
 613     else:
 614         MiscUtil.PrintError("The value, %s, specified for \"--forceField\" is not supported." % (Options["--forceField"],))
 615     
 616     MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s"
 617     
 618     OptionsInfo["ForceField"] = ForceField
 619     OptionsInfo["MMFFVariant"] = MMFFVariant
 620     OptionsInfo["UseMMFF"] = UseMMFF
 621     OptionsInfo["UseUFF"] = UseUFF
 622     
 623     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
 624     
 625     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 626     if UseMMFF:
 627         OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant
 628     else:
 629         OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField
 630     
 631     OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False
 632     
 633     OptionsInfo["MaxConfs"] = int(Options["--maxConfs"])
 634     
 635     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 636     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 637     
 638     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 639     
 640     OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False
 641     OptionsInfo["UseTethers"] = True if re.match("^yes$", Options["--useTethers"], re.I) else False
 642 
 643 def RetrieveOptions():
 644     """Retrieve command line arguments and options."""
 645     
 646     # Get options...
 647     global Options
 648     Options = docopt(_docoptUsage_)
 649     
 650     # Set current working directory to the specified directory...
 651     WorkingDir = Options["--workingdir"]
 652     if WorkingDir:
 653         os.chdir(WorkingDir)
 654     
 655     # Handle examples option...
 656     if "--examples" in Options and Options["--examples"]:
 657         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 658         sys.exit(0)
 659 
 660 def ValidateOptions():
 661     """Validate option values."""
 662     
 663     MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no")
 664     MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG KDG ETDG ETKDG ETKDGv2")
 665     
 666     MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF")
 667     MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s")
 668     
 669     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
 670     
 671     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 672     MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no")
 673     
 674     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 675     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 676 
 677     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
 678     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
 679     
 680     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 681     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 682     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 683         
 684     MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0})
 685     
 686     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 687     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 688     
 689     MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no")
 690     
 691     MiscUtil.ValidateOptionTextValue("-u, --useTethers", Options["--useTethers"], "yes no")
 692 
 693 # Setup a usage string for docopt...
 694 _docoptUsage_ = """
 695 RDKitPerformConstrainedMinimization.py - Perform constrained minimization
 696 
 697 Usage:
 698     RDKitPerformConstrainedMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG, ETKDG, ETKDGv2>]
 699                                            [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>]
 700                                            [--energyOut  <yes or no>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>]
 701                                            [--maxConfs <number>]  [--mcsParams <Name,Value,...>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
 702                                            [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>]
 703                                            [--scaffold <auto or SMARTS>]  [--scaffoldRMSDOut  <yes or no>] [--useTethers  <yes or no>] 
 704                                            [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
 705     RDKitPerformConstrainedMinimization.py -h | --help | -e | --examples
 706 
 707 Description:
 708     Generate 3D structures for molecules by performing a constrained energy minimization
 709     against a reference molecule. An initial set of 3D conformers are generated for the
 710     input molecules using distance geometry. A common core scaffold, corresponding to
 711     a Maximum Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
 712     between a pair of input and reference molecules. The core scaffold atoms in input
 713     molecules are aligned against the same atoms in the reference molecule. The energy
 714     of aligned structures are minimized using the forcefield to generate the final 3D structures.
 715 
 716     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi
 717     .csv, .tsv, .txt)
 718 
 719     The supported output file formats are: SD (.sdf, .sd)
 720 
 721 Options:
 722     -a, --addHydrogens <yes or no>  [default: yes]
 723         Add hydrogens before minimization.
 724     -c, --conformerGenerator <text>  [default: ETKDGv2]
 725         Conformation generation methodology for generating initial 3D coordinates
 726         for molecules in input file. A common core scaffold is identified between a
 727         pair of input and reference molecules. The atoms in common core scaffold 
 728         of input molecules are aligned against the reference molecule followed by
 729         energy minimization to generate final 3D structure.
 730         
 731         The possible values along with a brief description are shown below:
 732             
 733             SDG: Standard Distance Geometry
 734             KDG: basic Knowledge-terms with Distance Geometry
 735             ETDG: Experimental Torsion-angle preference with Distance Geometry
 736             ETKDG: Experimental Torsion-angle preference along with basic
 737                 Knowledge-terms and Distance Geometry [Ref 129]
 738             ETKDGv2: Experimental Torsion-angle preference along with basic
 739                 Knowledge-terms and Distance Geometry [Ref 167]
 740     -f, --forceField <UFF, MMFF>  [default: MMFF]
 741         Forcefield method to use for  constrained energy minimization. Possible values:
 742         Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force
 743         Field [ Ref 83-87 ] .
 744     --forceFieldMMFFVariant <MMFF94 or MMFF94s>  [default: MMFF94]
 745         Variant of MMFF forcefield to use for energy minimization.
 746     --energyOut <yes or no>  [default: No]
 747         Write out energy values.
 748     --enforceChirality <yes or no>  [default: Yes]
 749         Enforce chirality for defined chiral centers.
 750     -e, --examples
 751         Print examples.
 752     -h, --help
 753         Print this help message.
 754     -i, --infile <infile>
 755         Input file name.
 756     --infileParams <Name,Value,...>  [default: auto]
 757         A comma delimited list of parameter name and value pairs for reading
 758         molecules from files. The supported parameter names for different file
 759         formats, along with their default values, are shown below:
 760             
 761             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 762             
 763             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 764                 smilesTitleLine,auto,sanitize,yes
 765             
 766         Possible values for smilesDelimiter: space, comma or tab.
 767     --maxConfs <number>  [default: 250]
 768         Maximum number of conformations to generate for each molecule by conformation
 769         generation methodology for initial 3D coordinates. A constrained minimization is
 770         performed using the specified forcefield and the lowest energy conformation is written
 771         to the output file.
 772     --mcsParams <Name,Value,...>  [default: auto]
 773         Parameter values to use for identifying a maximum common substructure
 774         (MCS) in between a pair of reference and input molecules.In general, it is a
 775         comma delimited list of parameter name and value pairs. The supported
 776         parameter names along with their default values are shown below:
 777             
 778             atomCompare,CompareElements,bondCompare,CompareOrder,
 779             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
 780             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
 781             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
 782             
 783         Possible values for atomCompare: CompareAny, CompareElements,
 784         CompareIsotopes. Possible values for bondCompare: CompareAny,
 785         CompareOrder, CompareOrderExact.
 786         
 787         A brief description of MCS parameters taken from RDKit documentation is
 788         as follows:
 789             
 790             atomCompare - Controls match between two atoms
 791             bondCompare - Controls match between two bonds
 792             maximizeBonds - Maximize number of bonds instead of atoms
 793             matchValences - Include atom valences in the MCS match
 794             matchChiralTag - Include atom chirality in the MCS match
 795             minNumAtoms - Minimum number of atoms in the MCS match
 796             minNumBonds - Minimum number of bonds in the MCS match
 797             ringMatchesRingOnly - Ring bonds only match other ring bonds
 798             completeRingsOnly - Partial rings not allowed during the match
 799             threshold - Fraction of the dataset that must contain the MCS
 800             seedSMARTS - SMARTS string as the seed of the MCS
 801             timeout - Timeout for the MCS calculation in seconds
 802             
 803     --mp <yes or no>  [default: no]
 804         Use multiprocessing.
 805          
 806         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 807         function employing lazy RDKit data iterable. This allows processing of
 808         arbitrary large data sets without any additional requirements memory.
 809         
 810         All input data may be optionally loaded into memory by mp.Pool.map()
 811         before starting worker processes in a process pool by setting the value
 812         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 813         
 814         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 815         data mode may adversely impact the performance. The '--mpParams' section
 816         provides additional information to tune the value of 'chunkSize'.
 817     --mpParams <Name,Value,...>  [default: auto]
 818         A comma delimited list of parameter name and value pairs to configure
 819         multiprocessing.
 820         
 821         The supported parameter names along with their default and possible
 822         values are shown below:
 823         
 824             chunkSize, auto
 825             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 826             numProcesses, auto   [ Default: mp.cpu_count() ]
 827         
 828         These parameters are used by the following functions to configure and
 829         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 830         mp.Pool.imap().
 831         
 832         The chunkSize determines chunks of input data passed to each worker
 833         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 834         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 835         
 836         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 837         automatically converts RDKit data iterable into a list, loads all data into
 838         memory, and calculates the default chunkSize using the following method
 839         as shown in its code:
 840         
 841             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 842             if extra: chunkSize += 1
 843         
 844         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 845         and 100 data items.
 846         
 847         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 848         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 849         data into memory. Consequently, the size of input data is not known a priori.
 850         It's not possible to estimate an optimal value for the chunkSize. The default 
 851         chunkSize is set to 1.
 852         
 853         The default value for the chunkSize during 'Lazy' data mode may adversely
 854         impact the performance due to the overhead associated with exchanging
 855         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 856         a larger value during 'Lazy' input data mode, based on the size of your input
 857         data and number of processes in the process pool.
 858         
 859         The mp.Pool.map() function waits for all worker processes to process all
 860         the data and return the results. The mp.Pool.imap() function, however,
 861         returns the the results obtained from worker processes as soon as the
 862         results become available for specified chunks of data.
 863         
 864         The order of data in the results returned by both mp.Pool.map() and 
 865         mp.Pool.imap() functions always corresponds to the input data.
 866     -o, --outfile <outfile>
 867         Output file name.
 868     --outfileParams <Name,Value,...>  [default: auto]
 869         A comma delimited list of parameter name and value pairs for writing
 870         molecules to files. The supported parameter names for different file
 871         formats, along with their default values, are shown below:
 872             
 873             SD: kekulize,yes,forceV3000,no
 874             
 875     --overwrite
 876         Overwrite existing files.
 877     -q, --quiet <yes or no>  [default: no]
 878         Use quiet mode. The warning and information messages will not be printed.
 879     -r, --reffile <reffile>
 880         Reference input file name containing a 3D reference molecule. A common
 881         core scaffold must be present in a pair of an input and reference molecules.
 882         Otherwise, no constrained minimization is performed on the input molecule.
 883     --removeHydrogens <yes or no>  [default: Yes]
 884         Remove hydrogens after minimization.
 885     -s, --scaffold <auto or SMARTS>  [default: auto]
 886         Common core scaffold between a pair of input and reference molecules used for
 887         constrained minimization of molecules in input file. Possible values: Auto or a
 888         valid SMARTS pattern. The common core scaffold is automatically detected
 889         corresponding to the Maximum Common Substructure (MCS) between a pair of
 890         reference and input molecules. A valid SMARTS pattern may be optionally specified
 891         for the common core scaffold.
 892     --scaffoldRMSDOut <yes or no>  [default: No]
 893         Write out RMSD value for common core alignment between a pair of input and
 894         reference molecules.
 895     -u, --useTethers <yes or no>  [default: yes]
 896         Use tethers to optimize the final conformation by applying a series of extra forces
 897         to align matching atoms to the positions of the core atoms. Otherwise, use simple
 898         distance constraints during the optimization.
 899     -w, --workingdir <dir>
 900         Location of working directory which defaults to the current directory.
 901 
 902 Examples:
 903     To perform constrained energy minimization for molecules in a SMILES file against
 904     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 905     input and reference molecules identified using MCS, generating up to 250 conformations
 906     using ETKDG methodology followed by MMFF forcefield minimization, and write out
 907     a SD file containing minimum energy structure corresponding to each constrained
 908     molecule, type:
 909 
 910         % RDKitPerformConstrainedMinimization.py  -i SampleSeriesD3R.smi
 911           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 912 
 913     To rerun the first example in a quiet mode and write out a SD file, type:
 914 
 915         % RDKitPerformConstrainedMinimization.py  -q yes -i SampleSeriesD3R.smi
 916           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 917 
 918     To run the first example in multiprocessing mode on all available CPUs
 919     without loading all data into memory and write out a SD file, type:
 920 
 921         % RDKitPerformConstrainedMinimization.py  --mp yes
 922           -i SampleSeriesD3R.smi -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 923 
 924     To rerun the first example in multiprocessing mode on all available CPUs
 925     by loading all data into memory and write out a SD file, type:
 926 
 927         % RDKitPerformConstrainedMinimization.py  --mp yes --mpParams
 928           "inputDataMode,InMemory" -i SampleSeriesD3R.smi
 929           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 930 
 931     To rerun the first example using an explicit SMARTS string for a common core
 932     scaffold and write out a SD file, type:
 933 
 934         % RDKitPerformConstrainedMinimization.py  --scaffold
 935           "c1c(C(N(C(c2cc(-c3nc(N)ncc3)cn2))))cccc1" -i SampleSeriesD3R.smi -r
 936           SampleSeriesRef3D.sdf -o SampleOut.sdf 
 937 
 938     To rerun the first example using molecules in a CSV SMILES file, SMILES
 939     strings in column 1, name in column2, and write out a SD file, type:
 940 
 941         % RDKitPerformConstrainedMinimization.py  --infileParams "smilesDelimiter,
 942           comma,smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
 943           -i SampleSeriesD3R.csv -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 944 
 945     To perform constrained energy minimization for molecules in a SD file against
 946     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 947     input and reference molecules identified using MCS, generating up to 50 conformations
 948     using SDG methodology followed by UFF forcefield minimization, and write out
 949     a SD file containing minimum energy structure along with energy and embed RMS values
 950     corresponding to each constrained molecule, type:
 951 
 952         % RDKitPerformConstrainedMinimization.py  --maxConfs 50  -c SDG -f UFF
 953           --scaffoldRMSDOut yes --energyOut yes -i SampleSeriesD3R.sdf
 954           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 955 
 956 Author:
 957     Manish Sud(msud@san.rr.com)
 958 
 959 See also:
 960     RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py,
 961     RDKitConvertFileFormat.py, RDKitGenerateConstrainedConformers.py, RDKitPerformMinimization.py
 962 
 963 Copyright:
 964     Copyright (C) 2024 Manish Sud. All rights reserved.
 965 
 966     The functionality available in this script is implemented using RDKit, an
 967     open source toolkit for cheminformatics developed by Greg Landrum.
 968 
 969     This file is part of MayaChemTools.
 970 
 971     MayaChemTools is free software; you can redistribute it and/or modify it under
 972     the terms of the GNU Lesser General Public License as published by the Free
 973     Software Foundation; either version 3 of the License, or (at your option) any
 974     later version.
 975 
 976 """
 977 
 978 if __name__ == "__main__":
 979     main()