MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitPerformConstrainedMinimization.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2020 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43     from rdkit.Chem import rdFMCS
  44 except ImportError as ErrMsg:
  45     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  46     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  47     sys.exit(1)
  48 
  49 # MayaChemTools imports...
  50 try:
  51     from docopt import docopt
  52     import MiscUtil
  53     import RDKitUtil
  54 except ImportError as ErrMsg:
  55     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  56     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  57     sys.exit(1)
  58 
  59 ScriptName = os.path.basename(sys.argv[0])
  60 Options = {}
  61 OptionsInfo = {}
  62 
  63 def main():
  64     """Start execution of the script"""
  65     
  66     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
  67     
  68     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  69     
  70     # Retrieve command line arguments and options...
  71     RetrieveOptions()
  72     
  73     # Process and validate command line arguments and options...
  74     ProcessOptions()
  75     
  76     # Perform actions required by the script...
  77     PerformConstrainedMinimization()
  78     
  79     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  80     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  81 
  82 def PerformConstrainedMinimization():
  83     """Perform constrained minimization."""
  84     
  85     # Read and validate reference molecule...
  86     RefMol = RetrieveReferenceMolecule()
  87     
  88     # Setup a molecule reader for input file...
  89     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  90     OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
  91     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  92 
  93     # Set up a molecule writer...
  94     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  95     if Writer is None:
  96         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  97     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  98 
  99     MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer)
 100 
 101     if Writer is not None:
 102         Writer.close()
 103     
 104     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 105     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 106     MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
 107     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
 108     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount))
 109 
 110 def ProcessMolecules(RefMol, Mols, Writer):
 111     """Process and minimize molecules. """
 112     
 113     if OptionsInfo["MPMode"]:
 114         return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
 115     else:
 116         return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
 117 
 118 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
 119     """Process and minimize molecules using a single process."""
 120 
 121     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
 122     
 123     for Mol in Mols:
 124         MolCount += 1
 125         
 126         if Mol is None:
 127             continue
 128         
 129         if RDKitUtil.IsMolEmpty(Mol):
 130             if not OptionsInfo["QuietMode"]:
 131                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 132                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 133             continue
 134         ValidMolCount += 1
 135 
 136         # Setup a reference molecule core containing common scaffold atoms...
 137         RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
 138         if RefMolCore is None:
 139             CoreScaffoldMissingCount += 1
 140             continue
 141             
 142         Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolCount)
 143         
 144         if not CalcStatus:
 145             MinimizationFailedCount += 1
 146             continue
 147         
 148         WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
 149 
 150     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
 151 
 152 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
 153     """Process and minimize molecules using multiprocessing."""
 154 
 155     MPParams = OptionsInfo["MPParams"]
 156     
 157     # Setup data for initializing a worker process...
 158     MiscUtil.PrintInfo("Encoding options info and reference molecule...")
 159     
 160     OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
 161     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 162     
 163     # Setup a encoded mols data iterable for a worker process...
 164     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 165 
 166     # Setup process pool along with data initialization for each process...
 167     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 168     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 169     
 170     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 171     
 172     # Start processing...
 173     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 174         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 175     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 176         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 177     else:
 178         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 179     
 180     (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
 181     for Result in Results:
 182         MolCount += 1
 183         MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD  = Result
 184         
 185         if EncodedMol is None:
 186             continue
 187         ValidMolCount += 1
 188 
 189         if CoreScaffoldMissingStatus:
 190             CoreScaffoldMissingStatus += 1
 191             continue
 192         
 193         if not CalcStatus:
 194             MinimizationFailedCount += 1
 195             continue
 196             
 197         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 198         WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
 199     
 200     return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
 201     
 202 def InitializeWorkerProcess(*EncodedArgs):
 203     """Initialize data for a worker process."""
 204     
 205     global Options, OptionsInfo
 206 
 207     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 208 
 209     # Decode Options and OptionInfo...
 210     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 211     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 212 
 213     # Decode RefMol...
 214     OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
 215     
 216 def WorkerProcess(EncodedMolInfo):
 217     """Process data for a worker process."""
 218 
 219     MolIndex, EncodedMol = EncodedMolInfo
 220 
 221     CoreScaffoldMissingStatus = False
 222     CalcStatus = False
 223     Energy = None
 224     ScaffoldEmbedRMSD = None
 225     
 226     if EncodedMol is None:
 227         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
 228 
 229     RefMol = OptionsInfo["RefMol"]
 230     
 231     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 232     if RDKitUtil.IsMolEmpty(Mol):
 233         if not OptionsInfo["QuietMode"]:
 234             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 235             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 236         return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
 237     
 238     # Setup a reference molecule core containing common scaffold atoms...
 239     RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1))
 240     if RefMolCore is None:
 241         CoreScaffoldMissingStatus = True
 242         return [MolIndex, None, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
 243     
 244     Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, (MolIndex + 1))
 245 
 246     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
 247 
 248 def RetrieveReferenceMolecule():
 249     """Retrieve and validate reference molecule """
 250     
 251     RefFile = OptionsInfo["RefFile"]
 252     
 253     MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
 254     OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
 255     ValidRefMols, RefMolCount, ValidRefMolCount  = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"])
 256     
 257     if ValidRefMolCount == 0:
 258         MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
 259     elif ValidRefMolCount > 1:
 260         MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount))
 261     
 262     RefMol = ValidRefMols[0]
 263 
 264     if OptionsInfo["UseScaffoldSMARTS"]:
 265         ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
 266         if ScaffoldPatternMol is None:
 267             MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"]))
 268         
 269         if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
 270             MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"]))
 271             
 272     return RefMol
 273 
 274 def SetupCoreScaffold(RefMol, Mol, MolCount):
 275     """Setup a reference molecule core containing common scaffold atoms between
 276     a pair of molecules."""
 277 
 278     if OptionsInfo["UseScaffoldMCS"]:
 279         return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
 280     elif OptionsInfo["UseScaffoldSMARTS"]:
 281         return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
 282     else:
 283         MiscUtil.PrintError("The  value, %s, specified for  \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"]))
 284         
 285 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
 286     """Setup a reference molecule core containing common scaffold atoms between
 287     a pair of molecules using MCS."""
 288     
 289     MCSParams = OptionsInfo["MCSParams"]
 290     Mols = [RefMol, Mol]
 291 
 292     MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 
 293     
 294     if MCSResultObject.canceled:
 295         if not OptionsInfo["QuietMode"]:
 296             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
 297         return None
 298     
 299     CoreNumAtoms = MCSResultObject.numAtoms
 300     CoreNumBonds = MCSResultObject.numBonds
 301     
 302     SMARTSCore = MCSResultObject.smartsString
 303     
 304     if not len(SMARTSCore):
 305         if not OptionsInfo["QuietMode"]:
 306             MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount)))
 307         return None
 308         
 309     if CoreNumAtoms < MCSParams["MinNumAtoms"]:
 310         if not OptionsInfo["QuietMode"]:
 311             MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in  \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"]))
 312         return None
 313     
 314     if CoreNumBonds < MCSParams["MinNumBonds"]:
 315         if not OptionsInfo["QuietMode"]:
 316             MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in  \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"]))
 317         return None
 318 
 319     return GenerateCoreMol(RefMol, SMARTSCore)
 320     
 321 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
 322     """Setup a reference molecule core containing common scaffold atoms between
 323     a pair of molecules using specified SMARTS."""
 324     
 325     if OptionsInfo["ScaffoldPatternMol"] is None:
 326         OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
 327         
 328     if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
 329         if not OptionsInfo["QuietMode"]:
 330             MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule,  %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount)))
 331         return None
 332 
 333     return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
 334 
 335 def GenerateCoreMol(RefMol, SMARTSCore):
 336     """Generate core molecule for embedding. """
 337 
 338     # Create a molecule corresponding to core atoms...
 339     SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
 340 
 341     # Setup a ref molecule containing core atoms with dummy atoms as
 342     # attachment points for atoms around the core atoms...
 343     Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
 344 
 345     # Delete any substructures containing dummy atoms..
 346     RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*'))
 347     RefMolCore.UpdatePropertyCache()
 348     
 349     return RefMolCore
 350 
 351 def ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolNum = None):
 352     "Constrain and Minimize molecule."
 353 
 354     if  OptionsInfo["AddHydrogens"]:
 355         Mol = Chem.AddHs(Mol, addCoords = True)
 356 
 357     # Setup forcefield function to use for constrained minimization...
 358     ForceFieldFunction = None
 359     ForceFieldName = None
 360     if OptionsInfo["UseUFF"]:
 361         ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId)
 362         ForeceFieldName = "UFF"
 363     else:
 364         ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["MMFFVariant"]) , confId = confId)
 365         ForeceFieldName = "MMFF"
 366 
 367     if ForceFieldFunction is None:
 368         if not OptionsInfo["QuietMode"]:
 369             MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum)))
 370         return (None, False, None, None)
 371         
 372     MaxConfs = OptionsInfo["MaxConfs"]
 373     EnforceChirality = OptionsInfo["EnforceChirality"]
 374     UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"]
 375     UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"]
 376     UseTethers = OptionsInfo["UseTethers"]
 377 
 378     CalcEnergyMap = {}
 379     MolConfsMap = {}
 380     ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
 381 
 382     for ConfID in ConfIDs:
 383         try:
 384             MolConf = Chem.Mol(Mol)
 385             AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge)
 386         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException)  as ErrMsg:
 387             if not OptionsInfo["QuietMode"]:
 388                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 389                 MiscUtil.PrintWarning("Constrained embedding coupldn't  be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg))
 390             return (None, False, None, None)
 391         
 392         EnergyStatus, Energy = GetEnergy(MolConf)
 393         
 394         if not EnergyStatus:
 395             if not OptionsInfo["QuietMode"]:
 396                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 397                 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName))
 398             return (None, False, None, None)
 399         
 400         CalcEnergyMap[ConfID] = Energy
 401         MolConfsMap[ConfID] = MolConf
 402 
 403     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
 404     MinEnergyConfID = SortedConfIDs[0]
 405     
 406     MinEnergy = "%.2f" % CalcEnergyMap[MinEnergyConfID]  if OptionsInfo["EnergyOut"] else None
 407     MinEnergyMolConf = MolConfsMap[MinEnergyConfID]
 408     
 409     ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None
 410     MinEnergyMolConf.ClearProp('EmbedRMS')
 411     
 412     if  OptionsInfo["RemoveHydrogens"]:
 413         MinEnergyMolConf = Chem.RemoveHs(MinEnergyMolConf)
 414         
 415     return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
 416 
 417 def GetEnergy(Mol, ConfID = None):
 418     "Calculate energy."
 419 
 420     Status = True
 421     Energy = None
 422 
 423     if ConfID is None:
 424         ConfID = -1
 425     
 426     if OptionsInfo["UseUFF"]:
 427         UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID)
 428         if UFFMoleculeForcefield is None:
 429             Status = False
 430         else:
 431             Energy = UFFMoleculeForcefield.CalcEnergy()
 432     elif OptionsInfo["UseMMFF"]:
 433         MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"])
 434         MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID)
 435         if MMFFMoleculeForcefield is None:
 436             Status = False
 437         else:
 438             Energy = MMFFMoleculeForcefield.CalcEnergy()
 439     else:
 440         MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 441     
 442     return (Status, Energy)
 443     
 444 def WriteMolecule(Writer, Mol, Energy = None, ScaffoldEmbedRMSD = None, ConfID = None,):
 445     """Write molecule. """
 446 
 447     if ScaffoldEmbedRMSD is not None:
 448         Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
 449             
 450     if Energy is not None:
 451         Mol.SetProp(OptionsInfo["EnergyLabel"], Energy)
 452             
 453     if ConfID is None:
 454         Writer.write(Mol)
 455     else:
 456         Writer.write(Mol, confId = ConfID)
 457     
 458 def ProcessMCSParameters():
 459     """Set up and process MCS parameters."""
 460 
 461     SetupMCSParameters()
 462     ProcessSpecifiedMCSParameters()
 463 
 464 def SetupMCSParameters():
 465     """Set up default MCS parameters."""
 466     
 467     OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0}
 468     
 469 def ProcessSpecifiedMCSParameters():
 470     """Process specified MCS parameters."""
 471 
 472     if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
 473         # Nothing to process...
 474         return
 475     
 476     # Parse specified parameters...
 477     MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
 478     if not MCSParams:
 479         MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.")
 480 
 481     MCSParamsWords = MCSParams.split(",")
 482     if len(MCSParamsWords) % 2:
 483         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords)))
 484     
 485     # Setup  canonical parameter names...
 486     ValidParamNames = []
 487     CanonicalParamNamesMap = {}
 488     for ParamName in sorted(OptionsInfo["MCSParams"]):
 489         ValidParamNames.append(ParamName)
 490         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 491 
 492     # Validate and set paramater names and value...
 493     for Index in range(0, len(MCSParamsWords), 2):
 494         Name = MCSParamsWords[Index]
 495         Value = MCSParamsWords[Index + 1]
 496 
 497         CanonicalName = Name.lower()
 498         if  not CanonicalName in CanonicalParamNamesMap:
 499             MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name,  " ".join(ValidParamNames)))
 500 
 501         ParamName = CanonicalParamNamesMap[CanonicalName]
 502         if re.match("^Threshold$", ParamName, re.I):
 503             Value = float(Value)
 504             if Value <= 0.0 or Value > 1.0 :
 505                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name))
 506             ParamValue = Value
 507         elif re.match("^Timeout$", ParamName, re.I):
 508             Value = int(Value)
 509             if Value <= 0:
 510                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name))
 511             ParamValue = Value
 512         elif re.match("^MinNumAtoms$", ParamName, re.I):
 513             Value = int(Value)
 514             if Value < 1:
 515                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name))
 516             ParamValue = Value
 517         elif re.match("^MinNumBonds$", ParamName, re.I):
 518             Value = int(Value)
 519             if Value < 0:
 520                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name))
 521             ParamValue = Value
 522         elif re.match("^AtomCompare$", ParamName, re.I):
 523             if re.match("^CompareAny$", Value, re.I):
 524                 ParamValue = rdFMCS.AtomCompare.CompareAny
 525             elif re.match("^CompareElements$", Value, re.I):
 526                 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
 527             elif re.match("^CompareIsotopes$", Value, re.I):
 528                 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
 529             else:
 530                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name))
 531         elif re.match("^BondCompare$", ParamName, re.I):
 532             if re.match("^CompareAny$", Value, re.I):
 533                 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
 534             elif re.match("^CompareOrder$", Value, re.I):
 535                 ParamValue = rdFMCS.BondCompare.CompareOrder
 536             elif re.match("^CompareOrderExact$", Value, re.I):
 537                 ParamValue = rdFMCS.BondCompare.CompareOrderExact
 538             else:
 539                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name))
 540         elif re.match("^SeedSMARTS$", ParamName, re.I):
 541             if not len(Value):
 542                 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option  for parameter, %s, is empty. " % (Name))
 543             ParamValue = Value
 544         else:
 545             if not re.match("^(Yes|No|True|False)$", Value, re.I):
 546                 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option  for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name))
 547             ParamValue = False
 548             if re.match("^(Yes|True)$", Value, re.I):
 549                 ParamValue = True
 550         
 551         # Set value...
 552         OptionsInfo["MCSParams"][ParamName] = ParamValue
 553 
 554 def ProcessOptions():
 555     """Process and validate command line arguments and options"""
 556     
 557     MiscUtil.PrintInfo("Processing options...")
 558     
 559     # Validate options...
 560     ValidateOptions()
 561     
 562     OptionsInfo["Infile"] = Options["--infile"]
 563     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 564     
 565     OptionsInfo["RefFile"] = Options["--reffile"]
 566 
 567     OptionsInfo["Scaffold"] = Options["--scaffold"]
 568     if re.match("^auto$", Options["--scaffold"], re.I):
 569         UseScaffoldMCS = True
 570         UseScaffoldSMARTS = False
 571         ScaffoldSMARTS = None
 572     else:
 573         UseScaffoldMCS = False
 574         UseScaffoldSMARTS = True
 575         ScaffoldSMARTS = OptionsInfo["Scaffold"]
 576     
 577     OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
 578     OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
 579     OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
 580     OptionsInfo["ScaffoldPatternMol"] = None
 581 
 582     OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
 583     ProcessMCSParameters()
 584     
 585     OptionsInfo["Outfile"] = Options["--outfile"]
 586     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 587     
 588     OptionsInfo["Overwrite"] = Options["--overwrite"]
 589 
 590     OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
 591     
 592     if re.match("^ETDG$", Options["--conformerGenerator"], re.I):
 593         ConformerGenerator = "ETDG"
 594         UseExpTorsionAnglePrefs = True
 595         UseBasicKnowledge = False
 596     elif re.match("^KDG$", Options["--conformerGenerator"], re.I):
 597         ConformerGenerator = "KDG"
 598         UseExpTorsionAnglePrefs = False
 599         UseBasicKnowledge = True
 600     elif re.match("^ETKDG$", Options["--conformerGenerator"], re.I):
 601         ConformerGenerator = "ETKDG"
 602         UseExpTorsionAnglePrefs = True
 603         UseBasicKnowledge = True
 604     elif re.match("^SDG$", Options["--conformerGenerator"], re.I):
 605         ConformerGenerator = "SDG"
 606         UseExpTorsionAnglePrefs = False
 607         UseBasicKnowledge = False
 608     else:
 609         MiscUtil.PrintError("The value, %s, specified for option \"-c, --conformerGenerator\" is not supported." % (Options["--conformerGenerator"]))
 610     
 611     OptionsInfo["ConformerGenerator"] = ConformerGenerator
 612     OptionsInfo["UseExpTorsionAnglePrefs"] = UseExpTorsionAnglePrefs
 613     OptionsInfo["UseBasicKnowledge"] = UseBasicKnowledge
 614 
 615     if re.match("^UFF$", Options["--forceField"], re.I):
 616         ForceField = "UFF"
 617         UseUFF = True
 618         UseMMFF = False
 619     elif re.match("^MMFF$", Options["--forceField"], re.I):
 620         ForceField = "MMFF"
 621         UseUFF = False
 622         UseMMFF = True
 623     else:
 624         MiscUtil.PrintError("The value, %s, specified for \"--forceField\" is not supported." % (Options["--forceField"],))
 625     
 626     MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s"
 627     
 628     OptionsInfo["ForceField"] = ForceField
 629     OptionsInfo["MMFFVariant"] = MMFFVariant
 630     OptionsInfo["UseMMFF"] = UseMMFF
 631     OptionsInfo["UseUFF"] = UseUFF
 632     
 633     OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
 634     
 635     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 636     if UseMMFF:
 637         OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant
 638     else:
 639         OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField
 640     
 641     OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False
 642     
 643     OptionsInfo["MaxConfs"] = int(Options["--maxConfs"])
 644     
 645     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 646     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 647     
 648     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 649     
 650     OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False
 651     OptionsInfo["UseTethers"] = True if re.match("^yes$", Options["--useTethers"], re.I) else False
 652 
 653 def RetrieveOptions():
 654     """Retrieve command line arguments and options"""
 655     
 656     # Get options...
 657     global Options
 658     Options = docopt(_docoptUsage_)
 659     
 660     # Set current working directory to the specified directory...
 661     WorkingDir = Options["--workingdir"]
 662     if WorkingDir:
 663         os.chdir(WorkingDir)
 664     
 665     # Handle examples option...
 666     if "--examples" in Options and Options["--examples"]:
 667         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 668         sys.exit(0)
 669 
 670 def ValidateOptions():
 671     """Validate option values"""
 672     
 673     MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no")
 674     MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG ETDG KDG ETKDG")
 675     
 676     MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF")
 677     MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s")
 678     
 679     MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
 680     
 681     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 682     MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no")
 683     
 684     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 685     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 686 
 687     MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
 688     MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
 689     
 690     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 691     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 692     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 693         
 694     MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0})
 695     
 696     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 697     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 698     
 699     MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no")
 700     
 701     MiscUtil.ValidateOptionTextValue("-u, --useTethers", Options["--useTethers"], "yes no")
 702 
 703 # Setup a usage string for docopt...
 704 _docoptUsage_ = """
 705 RDKitPerformConstrainedMinimization.py - Perform constrained minimization
 706 
 707 Usage:
 708     RDKitPerformConstrainedMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, ETDG, KDG, ETKDG>]
 709                                            [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>]
 710                                            [--energyOut  <yes or no>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>]
 711                                            [--maxConfs <number>]  [--mcsParams <Name,Value,...>] [--mp <yes or no>] [--mpParams <Name.Value,...>]
 712                                            [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>]
 713                                            [--scaffold <auto or SMARTS>]  [--scaffoldRMSDOut  <yes or no>] [--useTethers  <yes or no>] 
 714                                            [-w <dir>] -i <infile> -r <reffile> -o <outfile> 
 715     RDKitPerformConstrainedMinimization.py -h | --help | -e | --examples
 716 
 717 Description:
 718     Generate 3D structures for molecules by performing a constrained energy minimization
 719     against a reference molecule. An initial set of 3D conformers are generated for the
 720     input molecules using distance geometry. A common core scaffold, corresponding to
 721     a Maximum Common Substructure (MCS) or an explicit SMARTS pattern,  is identified
 722     between a pair of input and reference molecules. The core scaffold atoms in input
 723     molecules are aligned against the same atoms in the reference molecule. The energy
 724     of aligned structures are minimized using the forcefield to generate the final 3D structures.
 725 
 726     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 727     .csv, .tsv .txt)
 728 
 729     The supported output file formats are: SD (.sdf, .sd)
 730 
 731 Options:
 732     -a, --addHydrogens <yes or no>  [default: yes]
 733         Add hydrogens before minimization.
 734     -c, --conformerGenerator <SDG, ETDG, KDG, ETKDG>  [default: ETKDG]
 735         Conformation generation methodology for generating initial 3D coordinates
 736         for molecules in input file. A common core scaffold is identified between a
 737         a pair of input and reference molecules. The atoms in common core scaffold 
 738         of input molecules are aligned against the reference molecule followed by
 739         energy minimization to generate final 3D structure.
 740         
 741         Possible values: Standard Distance Geometry, (SDG), Experimental Torsion-angle
 742         preference with Distance Geometry (ETDG), basic Knowledge-terms with Distance
 743         Geometry (KDG),  and Experimental Torsion-angle preference along with basic
 744         Knowledge-terms with Distance Geometry (ETKDG) [Ref 129] .
 745     -f, --forceField <UFF, MMFF>  [default: MMFF]
 746         Forcefield method to use for  constrained energy minimization. Possible values:
 747         Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force
 748         Field [ Ref 83-87 ] .
 749     --forceFieldMMFFVariant <MMFF94 or MMFF94s>  [default: MMFF94]
 750         Variant of MMFF forcefield to use for energy minimization.
 751     --energyOut <yes or no>  [default: No]
 752         Write out energy values.
 753     --enforceChirality <yes or no>  [default: Yes]
 754         Enforce chirality for defined chiral centers.
 755     -e, --examples
 756         Print examples.
 757     -h, --help
 758         Print this help message.
 759     -i, --infile <infile>
 760         Input file name.
 761     --infileParams <Name,Value,...>  [default: auto]
 762         A comma delimited list of parameter name and value pairs for reading
 763         molecules from files. The supported parameter names for different file
 764         formats, along with their default values, are shown below:
 765             
 766             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 767             
 768             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 769                 smilesTitleLine,auto,sanitize,yes
 770             
 771         Possible values for smilesDelimiter: space, comma or tab.
 772     --maxConfs <number>  [default: 250]
 773         Maximum number of conformations to generate for each molecule by conformation
 774         generation methodology for initial 3D coordinates. A constrained minimization is
 775         performed using the specified forcefield and the lowest energy conformation is written
 776         to the output file.
 777     --mcsParams <Name,Value,...>  [default: auto]
 778         Parameter values to use for identifying a maximum common substructure
 779         (MCS) in between a pair of reference and input molecules.In general, it is a
 780         comma delimited list of parameter name and value pairs. The supported
 781         parameter names along with their default values are shown below:
 782             
 783             atomCompare,CompareElements,bondCompare,CompareOrder,
 784             maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
 785             minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
 786             completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
 787             
 788         Possible values for atomCompare: CompareAny, CompareElements,
 789         CompareIsotopes. Possible values for bondCompare: CompareAny,
 790         CompareOrder, CompareOrderExact.
 791         
 792         A brief description of MCS parameters taken from RDKit documentation is
 793         as follows:
 794             
 795             atomCompare - Controls match between two atoms
 796             bondCompare - Controls match between two bonds
 797             maximizeBonds - Maximize number of bonds instead of atoms
 798             matchValences - Include atom valences in the MCS match
 799             matchChiralTag - Include atom chirality in the MCS match
 800             minNumAtoms - Minimum number of atoms in the MCS match
 801             minNumBonds - Minimum number of bonds in the MCS match
 802             ringMatchesRingOnly - Ring bonds only match other ring bonds
 803             completeRingsOnly - Partial rings not allowed during the match
 804             threshold - Fraction of the dataset that must contain the MCS
 805             seedSMARTS - SMARTS string as the seed of the MCS
 806             timeout - Timeout for the MCS calculation in seconds
 807             
 808     --mp <yes or no>  [default: no]
 809         Use multiprocessing.
 810          
 811         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 812         function employing lazy RDKit data iterable. This allows processing of
 813         arbitrary large data sets without any additional requirements memory.
 814         
 815         All input data may be optionally loaded into memory by mp.Pool.map()
 816         before starting worker processes in a process pool by setting the value
 817         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 818         
 819         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 820         data mode may adversely impact the performance. The '--mpParams' section
 821         provides additional information to tune the value of 'chunkSize'.
 822     --mpParams <Name,Value,...>  [default: auto]
 823         A comma delimited list of parameter name and value pairs for to
 824         configure multiprocessing.
 825         
 826         The supported parameter names along with their default and possible
 827         values are shown below:
 828         
 829             chunkSize, auto
 830             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 831             numProcesses, auto   [ Default: mp.cpu_count() ]
 832         
 833         These parameters are used by the following functions to configure and
 834         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 835         mp.Pool.imap().
 836         
 837         The chunkSize determines chunks of input data passed to each worker
 838         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 839         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 840         
 841         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 842         automatically converts RDKit data iterable into a list, loads all data into
 843         memory, and calculates the default chunkSize using the following method
 844         as shown in its code:
 845         
 846             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 847             if extra: chunkSize += 1
 848         
 849         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 850         and 100 data items.
 851         
 852         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 853         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 854         data into memory. Consequently, the size of input data is not known a priori.
 855         It's not possible to estimate an optimal value for the chunkSize. The default 
 856         chunkSize is set to 1.
 857         
 858         The default value for the chunkSize during 'Lazy' data mode may adversely
 859         impact the performance due to the overhead associated with exchanging
 860         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 861         a larger value during 'Lazy' input data mode, based on the size of your input
 862         data and number of processes in the process pool.
 863         
 864         The mp.Pool.map() function waits for all worker processes to process all
 865         the data and return the results. The mp.Pool.imap() function, however,
 866         returns the the results obtained from worker processes as soon as the
 867         results become available for specified chunks of data.
 868         
 869         The order of data in the results returned by both mp.Pool.map() and 
 870         mp.Pool.imap() functions always corresponds to the input data.
 871     -o, --outfile <outfile>
 872         Output file name.
 873     --outfileParams <Name,Value,...>  [default: auto]
 874         A comma delimited list of parameter name and value pairs for writing
 875         molecules to files. The supported parameter names for different file
 876         formats, along with their default values, are shown below:
 877             
 878             SD: kekulize,no
 879             
 880     --overwrite
 881         Overwrite existing files.
 882     -q, --quiet <yes or no>  [default: no]
 883         Use quiet mode. The warning and information messages will not be printed.
 884     -r, --reffile <reffile>
 885         Reference input file name containing a 3D reference molecule. A common
 886         core scaffold must be present in a pair of an input and reference molecules.
 887         Otherwise, no constrained minimization is performed on the input molecule.
 888     --removeHydrogens <yes or no>  [default: Yes]
 889         Remove hydrogens after minimization.
 890     -s, --scaffold <auto or SMARTS>  [default: auto]
 891         Common core scaffold between a pair of input and reference molecules used for
 892         constrained minimization of molecules in input file. Possible values: Auto or a
 893         valid SMARTS pattern. The common core scaffold is automatically detected
 894         corresponding to the Maximum Common Substructure (MCS) between a pair of
 895         reference and input molecules. A valid SMARTS pattern may be optionally specified
 896         for the common core scaffold.
 897     --scaffoldRMSDOut <yes or no>  [default: No]
 898         Write out RMSD value for common core alignment between a pair of input and
 899         reference molecules.
 900     -u, --useTethers <yes or no>  [default: yes]
 901         Use tethers to optimize the final conformation by applying a series of extra forces
 902         to align matching atoms to the positions of the core atoms. Otherwise, use simple
 903         distance constraints during the optimization.
 904     -w, --workingdir <dir>
 905         Location of working directory which defaults to the current directory.
 906 
 907 Examples:
 908     To perform constrained energy minimization for molecules in a SMILES file against
 909     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 910     input and reference molecules identified using MCS, generating up to 250 conformations
 911     using ETKDG methodology followed by MMFF forcefield minimization, and write out
 912     a SD file containing minimum energy structure corresponding to each constrained
 913     molecule, type:
 914 
 915         % RDKitPerformConstrainedMinimization.py  -i SampleSeriesD3R.smi
 916           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 917 
 918     To rerun the first example in a quiet mode and write out a SD file, type:
 919 
 920         % RDKitPerformConstrainedMinimization.py  -q yes -i SampleSeriesD3R.smi
 921           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 922 
 923     To run the first example in multiprocessing mode on all available CPUs
 924     without loading all data into memory and write out a SD file, type:
 925 
 926         % RDKitPerformConstrainedMinimization.py  --mp yes
 927           -i SampleSeriesD3R.smi -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 928 
 929     To rerun the first example in multiprocessing mode on all available CPUs
 930     by loading all data into memory and write out a SD file, type:
 931 
 932         % RDKitPerformConstrainedMinimization.py  --mp yes --mpParams
 933           "inputDataMode,InMemory" -i SampleSeriesD3R.smi
 934           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 935 
 936     To rerun the first example using an explicit SMARTS string for a common core
 937     scaffold and write out a SD file, type:
 938 
 939         % RDKitPerformConstrainedMinimization.py  --scaffold
 940           "c1c(C(N(C(c2cc(-c3nc(N)ncc3)cn2))))cccc1" -i SampleSeriesD3R.smi -r
 941           SampleSeriesRef3D.sdf -o SampleOut.sdf 
 942 
 943     To rerun the first example using molecules in a CSV SMILES file, SMILES
 944     strings in column 1, name in column2, and write out a SD file, type:
 945 
 946         % RDKitPerformConstrainedMinimization.py  --infileParams "smilesDelimiter,
 947           comma,smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
 948           -i SampleSeriesD3R.csv -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 949 
 950     To perform constrained energy minimization for molecules in a SD file against
 951     a reference 3D molecule in a SD file using a common core scaffold between pairs of
 952     input and reference molecules identified using MCS, generating up to 50 conformations
 953     using SDG methodology followed by UFF forcefield minimization, and write out
 954     a SD file containing minimum energy structure along with energy and embed RMS values
 955     corresponding to each constrained molecule, type:
 956 
 957         % RDKitPerformConstrainedMinimization.py  --maxConfs 50  -c SDG -f UFF
 958           --scaffoldRMSDOut yes --energyOut yes -i SampleSeriesD3R.sdf
 959           -r SampleSeriesRef3D.sdf  -o SampleOut.sdf
 960 
 961 Author:
 962     Manish Sud(msud@san.rr.com)
 963 
 964 See also:
 965     RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py,
 966     RDKitConvertFileFormat.py, RDKitGenerateConstrainedConformers.py, RDKitPerformMinimization.py
 967 
 968 Copyright:
 969     Copyright (C) 2020 Manish Sud. All rights reserved.
 970 
 971     The functionality available in this script is implemented using RDKit, an
 972     open source toolkit for cheminformatics developed by Greg Landrum.
 973 
 974     This file is part of MayaChemTools.
 975 
 976     MayaChemTools is free software; you can redistribute it and/or modify it under
 977     the terms of the GNU Lesser General Public License as published by the Free
 978     Software Foundation; either version 3 of the License, or (at your option) any
 979     later version.
 980 
 981 """
 982 
 983 if __name__ == "__main__":
 984     main()