MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitPerformMinimization.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2019 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43     from rdkit.Chem import Descriptors
  44 except ImportError as ErrMsg:
  45     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  46     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  47     sys.exit(1)
  48 
  49 # MayaChemTools imports...
  50 try:
  51     from docopt import docopt
  52     import MiscUtil
  53     import RDKitUtil
  54 except ImportError as ErrMsg:
  55     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  56     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  57     sys.exit(1)
  58 
  59 ScriptName = os.path.basename(sys.argv[0])
  60 Options = {}
  61 OptionsInfo = {}
  62 
  63 def main():
  64     """Start execution of the script"""
  65     
  66     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
  67     
  68     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  69     
  70     # Retrieve command line arguments and options...
  71     RetrieveOptions()
  72     
  73     # Process and validate command line arguments and options...
  74     ProcessOptions()
  75     
  76     # Perform actions required by the script...
  77     PerformMinimization()
  78     
  79     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  80     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  81 
  82 def PerformMinimization():
  83     """Perform minimization."""
  84     
  85     # Setup a molecule reader...
  86     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  87     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  88     
  89     # Set up a molecule writer...
  90     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  91     if Writer is None:
  92         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  93     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  94 
  95     MolCount, ValidMolCount, MinimizationFailedCount = ProcessMolecules(Mols, Writer)
  96 
  97     if Writer is not None:
  98         Writer.close()
  99     
 100     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 101     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 102     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
 103     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount))
 104 
 105 def ProcessMolecules(Mols, Writer):
 106     """Process and minimize molecules. """
 107     
 108     if OptionsInfo["MPMode"]:
 109         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
 110     else:
 111         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
 112 
 113 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
 114     """Process and minimize molecules using a single process."""
 115     
 116     if OptionsInfo["SkipConformerGeneration"]:
 117         MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...")
 118     else:
 119         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...")
 120     
 121     (MolCount, ValidMolCount, MinimizationFailedCount) = [0] * 3
 122     for Mol in Mols:
 123         MolCount += 1
 124         
 125         if Mol is None:
 126             continue
 127         
 128         if RDKitUtil.IsMolEmpty(Mol):
 129             if not OptionsInfo["QuietMode"]:
 130                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 131                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 132             continue
 133         ValidMolCount += 1
 134 
 135         Mol, CalcStatus, ConfID = MinimizeMoleculeOrConformers(Mol, MolCount)
 136         
 137         if not CalcStatus:
 138             MinimizationFailedCount += 1
 139             continue
 140         
 141         WriteMolecule(Writer, Mol, ConfID)
 142     
 143     return (MolCount, ValidMolCount, MinimizationFailedCount)
 144 
 145 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
 146     """Process and minimize molecules using multiprocessing."""
 147     
 148     if OptionsInfo["SkipConformerGeneration"]:
 149         MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers using multiprocessing...")
 150     else:
 151         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing...")
 152         
 153     MPParams = OptionsInfo["MPParams"]
 154     
 155     # Setup data for initializing a worker process...
 156     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 157     
 158     # Setup a encoded mols data iterable for a worker process...
 159     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 160 
 161     # Setup process pool along with data initialization for each process...
 162     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 163     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 164     
 165     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 166     
 167     # Start processing...
 168     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 169         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 170     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 171         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 172     else:
 173         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 174     
 175     (MolCount, ValidMolCount, MinimizationFailedCount) = [0] * 3
 176     for Result in Results:
 177         MolCount += 1
 178         MolIndex, EncodedMol, CalcStatus, ConfID = Result
 179         
 180         if EncodedMol is None:
 181             continue
 182         ValidMolCount += 1
 183 
 184         if not CalcStatus:
 185             MinimizationFailedCount += 1
 186             continue
 187             
 188         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 189         WriteMolecule(Writer, Mol, ConfID)
 190     
 191     return (MolCount, ValidMolCount, MinimizationFailedCount)
 192     
 193 def InitializeWorkerProcess(*EncodedArgs):
 194     """Initialize data for a worker process."""
 195     
 196     global Options, OptionsInfo
 197 
 198     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 199     
 200     # Decode Options and OptionInfo...
 201     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 202     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 203     
 204 def WorkerProcess(EncodedMolInfo):
 205     """Process data for a worker process."""
 206 
 207     MolIndex, EncodedMol = EncodedMolInfo
 208     
 209     CalcStatus = False
 210     ConfID = None
 211     
 212     if EncodedMol is None:
 213         return [MolIndex, None, CalcStatus, ConfID]
 214 
 215     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 216     if RDKitUtil.IsMolEmpty(Mol):
 217         if not OptionsInfo["QuietMode"]:
 218             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 219             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 220         return [MolIndex, None, CalcStatus, ConfID]
 221     
 222     Mol, CalcStatus, ConfID = MinimizeMoleculeOrConformers(Mol, (MolIndex + 1))
 223 
 224     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID]
 225 
 226 def MinimizeMoleculeOrConformers(Mol, MolNum = None):
 227     """Minimize molecule or conformers."""
 228 
 229     ConfID = None
 230     if OptionsInfo["SkipConformerGeneration"]:
 231         Mol, CalcStatus = MinimizeMolecule(Mol, MolNum)
 232     else:
 233         Mol, CalcStatus, ConfID = MinimizeConformers(Mol, MolNum)
 234 
 235     return (Mol, CalcStatus, ConfID)
 236     
 237 def MinimizeMolecule(Mol, MolNum = None):
 238     "Minimize molecule."
 239     
 240     if  OptionsInfo["AddHydrogens"]:
 241         Mol = Chem.AddHs(Mol)
 242 
 243     Status = 0
 244     try:
 245         if OptionsInfo["UseUFF"]:
 246             Status = AllChem.UFFOptimizeMolecule(Mol, maxIters = OptionsInfo["MaxIters"])
 247         elif OptionsInfo["UseMMFF"]:
 248             Status = AllChem.MMFFOptimizeMolecule(Mol,  maxIters = OptionsInfo["MaxIters"])
 249         else:
 250             MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 251     except RuntimeError as ErrMsg:
 252         if not OptionsInfo["QuietMode"]:
 253             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 254             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg))
 255         return (Mol, False)
 256     
 257     if Status != 0:
 258         if not OptionsInfo["QuietMode"]:
 259             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 260             MiscUtil.PrintWarning("Minimization failed to converge for molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (MolName, OptionsInfo["MaxIters"]))
 261         
 262     if  OptionsInfo["RemoveHydrogens"]:
 263         Mol = Chem.RemoveHs(Mol)
 264     
 265     return (Mol, True)
 266 
 267 def MinimizeConformers(Mol, MolNum = None):
 268     "Generate and mininize conformers for a molecule to get the lowest energy conformer."
 269     
 270     if  OptionsInfo["AddHydrogens"]:
 271         Mol = Chem.AddHs(Mol)
 272 
 273     ConfIDs = EmbedMolecule(Mol, MolNum)
 274     if not len(ConfIDs):
 275         if not OptionsInfo["QuietMode"]:
 276             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 277             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
 278         return (Mol, False, None)
 279 
 280     CalcEnergyMap = {}
 281     for ConfID in ConfIDs:
 282         try:
 283             if OptionsInfo["UseUFF"]:
 284                 Status = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["MaxIters"])
 285             elif OptionsInfo["UseMMFF"]:
 286                 Status = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["MaxIters"])
 287             else:
 288                 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 289         except RuntimeError as ErrMsg:
 290             if not OptionsInfo["QuietMode"]:
 291                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 292                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg))
 293             return (Mol, False, None)
 294         
 295         EnergyStatus, Energy = GetConformerEnergy(Mol, ConfID)
 296         if not EnergyStatus:
 297             if not OptionsInfo["QuietMode"]:
 298                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 299                 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName))
 300             return (Mol, False, None)
 301         
 302         if Status != 0:
 303             if not OptionsInfo["QuietMode"]:
 304                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 305                 MiscUtil.PrintWarning("Minimization failed to converge for conformation number %d of molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (ConfID, MolName, OptionsInfo["MaxIters"]))
 306             
 307         CalcEnergyMap[ConfID] = Energy
 308 
 309     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
 310     MinEnergyConfID = SortedConfIDs[0]
 311         
 312     if  OptionsInfo["RemoveHydrogens"]:
 313         Mol = Chem.RemoveHs(Mol)
 314 
 315     return (Mol, True, MinEnergyConfID)
 316     
 317 def GetConformerEnergy(Mol, ConfID):
 318     "Calculate conformer energy"
 319 
 320     Status = True
 321     Energy = 0.0
 322     
 323     if OptionsInfo["UseUFF"]:
 324         UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID)
 325         if UFFMoleculeForcefield is None:
 326             Status = False
 327         else:
 328             Energy = UFFMoleculeForcefield.CalcEnergy()
 329     elif OptionsInfo["UseMMFF"]:
 330         MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol)
 331         MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID)
 332         if MMFFMoleculeForcefield is None:
 333             Status = False
 334         else:
 335             Energy = MMFFMoleculeForcefield.CalcEnergy()
 336     else:
 337         MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 338     
 339     return (Status, Energy)
 340     
 341 def EmbedMolecule(Mol, MolNum = None):
 342     "Embed conformations"
 343 
 344     ConfIDs = []
 345     
 346     MaxConfs = OptionsInfo["MaxConfs"]
 347     RandomSeed = OptionsInfo["RandomSeed"]
 348     EnforceChirality = OptionsInfo["EnforceChirality"]
 349     UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"]
 350     UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"]
 351 
 352     try:
 353         ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge)
 354     except ValueError as ErrMsg:
 355         if not OptionsInfo["QuietMode"]:
 356             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 357             MiscUtil.PrintWarning("Embedding failed  for molecule %s:\n%s\n" % (MolName, ErrMsg))
 358         ConfIDs = []
 359     
 360     return ConfIDs
 361 
 362 def WriteMolecule(Writer, Mol, ConfID):
 363     """Write molecule. """
 364     
 365     if ConfID is None:
 366         Writer.write(Mol)
 367     else:
 368         Writer.write(Mol, confId = ConfID)
 369     
 370 def ProcessOptions():
 371     """Process and validate command line arguments and options"""
 372     
 373     MiscUtil.PrintInfo("Processing options...")
 374     
 375     # Validate options...
 376     ValidateOptions()
 377     
 378     OptionsInfo["Infile"] = Options["--infile"]
 379     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 380     
 381     OptionsInfo["Outfile"] = Options["--outfile"]
 382     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 383     
 384     OptionsInfo["Overwrite"] = Options["--overwrite"]
 385 
 386     OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
 387     
 388     if re.match("^ETDG$", Options["--conformerGenerator"], re.I):
 389         ConformerGenerator = "ETDG"
 390         UseExpTorsionAnglePrefs = True
 391         UseBasicKnowledge = False
 392         SkipConformerGeneration = False
 393     elif re.match("^KDG$", Options["--conformerGenerator"], re.I):
 394         ConformerGenerator = "KDG"
 395         UseExpTorsionAnglePrefs = False
 396         UseBasicKnowledge = True
 397         SkipConformerGeneration = False
 398     elif re.match("^ETKDG$", Options["--conformerGenerator"], re.I):
 399         ConformerGenerator = "ETKDG"
 400         UseExpTorsionAnglePrefs = True
 401         UseBasicKnowledge = True
 402         SkipConformerGeneration = False
 403     elif re.match("^SDG$", Options["--conformerGenerator"], re.I):
 404         ConformerGenerator = "SDG"
 405         UseExpTorsionAnglePrefs = False
 406         UseBasicKnowledge = False
 407         SkipConformerGeneration = False
 408     else:
 409         ConformerGenerator = "None"
 410         UseExpTorsionAnglePrefs = False
 411         UseBasicKnowledge = False
 412         SkipConformerGeneration = True
 413     
 414     OptionsInfo["SkipConformerGeneration"] = SkipConformerGeneration
 415     OptionsInfo["ConformerGenerator"] = ConformerGenerator
 416     OptionsInfo["UseExpTorsionAnglePrefs"] = UseExpTorsionAnglePrefs
 417     OptionsInfo["UseBasicKnowledge"] = UseBasicKnowledge
 418 
 419     if re.match("^UFF$", Options["--forceField"], re.I):
 420         ForceField = "UFF"
 421         UseUFF = True
 422         UseMMFF = False
 423     elif re.match("^MMFF$", Options["--forceField"], re.I):
 424         ForceField = "MMFF"
 425         UseUFF = False
 426         UseMMFF = True
 427     
 428     OptionsInfo["ForceField"] = ForceField
 429     OptionsInfo["UseMMFF"] = UseMMFF
 430     OptionsInfo["UseUFF"] = UseUFF
 431         
 432     OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False
 433     
 434     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
 435     OptionsInfo["MaxConfs"] = int(Options["--maxConfs"])
 436     
 437     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 438     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 439     
 440     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 441     
 442     RandomSeed = -1
 443     if not re.match("^auto$", Options["--randomSeed"], re.I):
 444         RandomSeed = int(Options["--randomSeed"])
 445     OptionsInfo["RandomSeed"] = RandomSeed
 446     
 447     OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False
 448 
 449 def RetrieveOptions():
 450     """Retrieve command line arguments and options"""
 451     
 452     # Get options...
 453     global Options
 454     Options = docopt(_docoptUsage_)
 455     
 456     # Set current working directory to the specified directory...
 457     WorkingDir = Options["--workingdir"]
 458     if WorkingDir:
 459         os.chdir(WorkingDir)
 460     
 461     # Handle examples option...
 462     if "--examples" in Options and Options["--examples"]:
 463         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 464         sys.exit(0)
 465 
 466 def ValidateOptions():
 467     """Validate option values"""
 468     
 469     MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no")
 470     MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG ETDG KDG ETKDG None")
 471     MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF")
 472     
 473     MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no")
 474     
 475     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 476     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 477     
 478     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 479     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 480     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 481         
 482     MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0})
 483     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 484     
 485     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 486     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 487     
 488     if not re.match("^auto$", Options["--randomSeed"], re.I):
 489         MiscUtil.ValidateOptionIntegerValue("--randomSeed", Options["--randomSeed"], {})
 490         RandomSeed = int(Options["--randomSeed"])
 491     
 492     MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no")
 493 
 494 # Setup a usage string for docopt...
 495 _docoptUsage_ = """
 496 RDKitPerformMinimization.py - Perform structure minimization
 497 
 498 Usage:
 499     RDKitPerformMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, ETDG, KDG, ETKDG, None> ]
 500                                 [--forceField <UFF or MMFF>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>]
 501                                 [--maxConfs <number>] [--maxIters <number>] [--mp <yes or no>] [--mpParams <Name.Value,...>]
 502                                 [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>]
 503                                 [--randomSeed <number>] [-w <dir>] -i <infile> -o <outfile> 
 504     RDKitPerformMinimization.py -h | --help | -e | --examples
 505 
 506 Description:
 507     Generate 3D structures for molecules using combination of distance geometry
 508     and forcefield minimization or minimize existing 3D structures using a specified
 509     forcefield.
 510 
 511     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi)
 512     .csv, .tcsv .txt)
 513 
 514     The supported output file formats are: SD (.sdf, .sd)
 515 
 516 Options:
 517     -a, --addHydrogens <yes or no>  [default: yes]
 518         Add hydrogens before minimization.
 519     -c, --conformerGenerator <SDG, ETDG, KDG, ETKDG, None>  [default: ETKDG]
 520         Conformation generation methodology for generating initial 3D coordinates.
 521         Possible values: Standard Distance Geometry, (SDG), Experimental Torsion-angle
 522         preference with Distance Geometry (ETDG), basic Knowledge-terms with Distance
 523         Geometry (KDG),  and Experimental Torsion-angle preference along with basic
 524         Knowledge-terms with Distance Geometry (ETKDG) [Ref 129] .
 525         
 526         The conformation generation step may be skipped by specifying 'None' value to
 527         perform only forcefield minimization of molecules with 3D structures in input
 528         file.  This doesn't work for molecules in SMILES file or molecules in SD/MOL files
 529         containing 2D structures.
 530     -f, --forceField <UFF or MMFF>  [default: MMFF]
 531         Forcefield method to use for energy minimization. Possible values: Universal Force
 532         Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force Field (MMFF) [ Ref 83-87 ] .
 533     --enforceChirality <yes or no>  [default: Yes]
 534         Enforce chirality for defined chiral centers.
 535     -e, --examples
 536         Print examples.
 537     -h, --help
 538         Print this help message.
 539     -i, --infile <infile>
 540         Input file name.
 541     --infileParams <Name,Value,...>  [default: auto]
 542         A comma delimited list of parameter name and value pairs for reading
 543         molecules from files. The supported parameter names for different file
 544         formats, along with their default values, are shown below:
 545             
 546             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 547             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 548                 smilesTitleLine,auto,sanitize,yes
 549             
 550         Possible values for smilesDelimiter: space, comma or tab.
 551     --maxConfs <number>  [default: 250]
 552         Maximum number of conformations to generate for each molecule by conformation
 553         generation methodology for initial 3D coordinates. The conformations are minimized
 554         using the specified forcefield and the lowest energy conformation is written to the
 555         output file. This option is ignored during 'None' value of '-c --conformerGenerator'
 556         option.
 557     --maxIters <number>  [default: 500]
 558         Maximum number of iterations to perform for each molecule during forcefield
 559         minimization.
 560     --mp <yes or no>  [default: no]
 561         Use multiprocessing.
 562          
 563         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 564         function employing lazy RDKit data iterable. This allows processing of
 565         arbitrary large data sets without any additional requirements memory.
 566         
 567         All input data may be optionally loaded into memory by mp.Pool.map()
 568         before starting worker processes in a process pool by setting the value
 569         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 570         
 571         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 572         data mode may adversely impact the performance. The '--mpParams' section
 573         provides additional information to tune the value of 'chunkSize'.
 574     --mpParams <Name,Value,...>  [default: auto]
 575         A comma delimited list of parameter name and value pairs for to
 576         configure multiprocessing.
 577         
 578         The supported parameter names along with their default and possible
 579         values are shown below:
 580         
 581             chunkSize, auto
 582             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 583             numProcesses, auto   [ Default: mp.cpu_count() ]
 584         
 585         These parameters are used by the following functions to configure and
 586         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 587         mp.Pool.imap().
 588         
 589         The chunkSize determines chunks of input data passed to each worker
 590         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 591         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 592         
 593         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 594         automatically converts RDKit data iterable into a list, loads all data into
 595         memory, and calculates the default chunkSize using the following method
 596         as shown in its code:
 597         
 598             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 599             if extra: chunkSize += 1
 600         
 601         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 602         and 100 data items.
 603         
 604         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 605         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 606         data into memory. Consequently, the size of input data is not known a priori.
 607         It's not possible to estimate an optimal value for the chunkSize. The default 
 608         chunkSize is set to 1.
 609         
 610         The default value for the chunkSize during 'Lazy' data mode may adversely
 611         impact the performance due to the overhead associated with exchanging
 612         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 613         a larger value during 'Lazy' input data mode, based on the size of your input
 614         data and number of processes in the process pool.
 615         
 616         The mp.Pool.map() function waits for all worker processes to process all
 617         the data and return the results. The mp.Pool.imap() function, however,
 618         returns the the results obtained from worker processes as soon as the
 619         results become available for specified chunks of data.
 620         
 621         The order of data in the results returned by both mp.Pool.map() and 
 622         mp.Pool.imap() functions always corresponds to the input data.
 623     -o, --outfile <outfile>
 624         Output file name.
 625     --outfileParams <Name,Value,...>  [default: auto]
 626         A comma delimited list of parameter name and value pairs for writing
 627         molecules to files. The supported parameter names for different file
 628         formats, along with their default values, are shown below:
 629             
 630             SD: kekulize,no
 631             
 632     --overwrite
 633         Overwrite existing files.
 634     -q, --quiet <yes or no>  [default: no]
 635         Use quiet mode. The warning and information messages will not be printed.
 636     -r, --removeHydrogens <yes or no>  [default: Yes]
 637         Remove hydrogens after minimization.
 638     --randomSeed <number>  [default: auto]
 639         Seed for the random number generator for reproducing 3D coordinates.
 640         Default is to use a random seed.
 641     -w, --workingdir <dir>
 642         Location of working directory which defaults to the current directory.
 643 
 644 Examples:
 645     To generate up to 250 conformations using ETKDG methodology followed by MMFF
 646     forcefield minimization for a maximum of 500 iterations for molecules in a SMILES file
 647     and write out a SD file containing minimum energy structure corresponding to each
 648     molecule, type:
 649 
 650         % RDKitPerformMinimization.py  -i Sample.smi -o SampleOut.sdf
 651 
 652     To rerun the first example in a quiet mode and write out a SD file, type:
 653 
 654         % RDKitPerformMinimization.py  -q yes -i Sample.smi -o SampleOut.sdf
 655 
 656     To run the first example in multiprocessing mode on all available CPUs
 657     without loading all data into memory and write out a SD file, type:
 658 
 659         % RDKitPerformMinimization.py --mp yes -i Sample.smi -o SampleOut.sdf
 660 
 661     To rerun the first example in multiprocessing mode on all available CPUs
 662     by loading all data into memory and write out a SD file, type:
 663 
 664         % RDKitPerformMinimization.py --mp yes --mpParams "inputDataMode,
 665           InMemory" -i Sample.smi -o SampleOut.sdf
 666 
 667     To run the first example in multiprocessing mode on specific number of
 668     CPUs and chunk size without loading all data into memory and write out a SD file,
 669     type:
 670 
 671         % RDKitPerformMinimization.py --mp yes --mpParams "inputDataMode,Lazy,
 672           numProcesses,4,chunkSize,8" -i Sample.smi -o SampleOut.sdf
 673 
 674     To generate up to 150 conformations using ETKDG methodology followed by MMFF
 675     forcefield minimization for a maximum of 250 iterations along with a specified random
 676     seed  for molecules in a SMILES file and write out a SD file containing minimum energy
 677     structures corresponding to each molecule, type
 678 
 679         % RDKitPerformMinimization.py  --maxConfs 150  --randomSeed 201780117 
 680           --maxIters 250  -i Sample.smi -o SampleOut.sdf
 681 
 682     To minimize structures in a 3D SD file using UFF forcefield for a maximum of 150
 683     iterations without generating any conformations and write out a SD file containing
 684     minimum energy structures corresponding to each molecule, type
 685 
 686         % RDKitPerformMinimization.py  -c None -f UFF --maxIters 150
 687           -i Sample3D.sdf -o SampleOut.sdf
 688 
 689     To generate up to 50 conformations using SDG methodology followed
 690     by UFF forcefield minimization for a maximum of 50 iterations for 
 691     molecules in a CSV SMILES file, SMILES strings in column 1, name in
 692     column 2, and write out a SD file, type:
 693 
 694         % RDKitPerformMinimization.py  --maxConfs 50  --maxIters 50 -c SDG
 695           -f UFF --infileParams "smilesDelimiter,comma,smilesTitleLine,yes,
 696           smilesColumn,1,smilesNameColumn,2"  -i SampleSMILES.csv
 697           -o SampleOut.sdf
 698 
 699 Author:
 700     Manish Sud(msud@san.rr.com)
 701 
 702 See also:
 703     RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py,
 704     RDKitConvertFileFormat.py, RDKitGenerateConformers.py
 705 
 706 Copyright:
 707     Copyright (C) 2019 Manish Sud. All rights reserved.
 708 
 709     The functionality available in this script is implemented using RDKit, an
 710     open source toolkit for cheminformatics developed by Greg Landrum.
 711 
 712     This file is part of MayaChemTools.
 713 
 714     MayaChemTools is free software; you can redistribute it and/or modify it under
 715     the terms of the GNU Lesser General Public License as published by the Free
 716     Software Foundation; either version 3 of the License, or (at your option) any
 717     later version.
 718 
 719 """
 720 
 721 if __name__ == "__main__":
 722     main()