MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitPerformMinimization.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43 except ImportError as ErrMsg:
  44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  46     sys.exit(1)
  47 
  48 # MayaChemTools imports...
  49 try:
  50     from docopt import docopt
  51     import MiscUtil
  52     import RDKitUtil
  53 except ImportError as ErrMsg:
  54     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  55     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  56     sys.exit(1)
  57 
  58 ScriptName = os.path.basename(sys.argv[0])
  59 Options = {}
  60 OptionsInfo = {}
  61 
  62 def main():
  63     """Start execution of the script."""
  64     
  65     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  66     
  67     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  68     
  69     # Retrieve command line arguments and options...
  70     RetrieveOptions()
  71     
  72     # Process and validate command line arguments and options...
  73     ProcessOptions()
  74     
  75     # Perform actions required by the script...
  76     PerformMinimization()
  77     
  78     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  79     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  80 
  81 def PerformMinimization():
  82     """Perform minimization."""
  83     
  84     # Setup a molecule reader...
  85     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  86     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  87     
  88     # Set up a molecule writer...
  89     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  90     if Writer is None:
  91         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  92     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  93 
  94     MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer)
  95 
  96     if Writer is not None:
  97         Writer.close()
  98     
  99     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 100     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 101     MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount)
 102     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
 103     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount))
 104 
 105 def ProcessMolecules(Mols, Writer):
 106     """Process and minimize molecules."""
 107     
 108     if OptionsInfo["MPMode"]:
 109         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
 110     else:
 111         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
 112 
 113 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
 114     """Process and minimize molecules using a single process."""
 115     
 116     if OptionsInfo["SkipConformerGeneration"]:
 117         MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...")
 118     else:
 119         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...")
 120     
 121     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
 122     for Mol in Mols:
 123         MolCount += 1
 124         
 125         if Mol is None:
 126             continue
 127         
 128         if RDKitUtil.IsMolEmpty(Mol):
 129             if not OptionsInfo["QuietMode"]:
 130                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 131                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 132             continue
 133         ValidMolCount += 1
 134 
 135         Mol, CalcStatus, ConfID, Energy = MinimizeMoleculeOrConformers(Mol, MolCount)
 136         
 137         if not CalcStatus:
 138             MinimizationFailedCount += 1
 139             continue
 140         
 141         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy)
 142         if not WriteStatus:
 143             WriteFailedCount += 1
 144     
 145     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
 146 
 147 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
 148     """Process and minimize molecules using multiprocessing."""
 149     
 150     if OptionsInfo["SkipConformerGeneration"]:
 151         MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers using multiprocessing...")
 152     else:
 153         MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing...")
 154         
 155     MPParams = OptionsInfo["MPParams"]
 156     
 157     # Setup data for initializing a worker process...
 158     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 159     
 160     # Setup a encoded mols data iterable for a worker process...
 161     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 162 
 163     # Setup process pool along with data initialization for each process...
 164     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 165     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 166     
 167     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 168     
 169     # Start processing...
 170     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 171         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 172     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 173         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 174     else:
 175         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 176     
 177     (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
 178     for Result in Results:
 179         MolCount += 1
 180         MolIndex, EncodedMol, CalcStatus, ConfID, Energy = Result
 181         
 182         if EncodedMol is None:
 183             continue
 184         ValidMolCount += 1
 185 
 186         if not CalcStatus:
 187             MinimizationFailedCount += 1
 188             continue
 189             
 190         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 191         WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy)
 192         if not WriteStatus:
 193             WriteFailedCount += 1
 194     
 195     return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
 196     
 197 def InitializeWorkerProcess(*EncodedArgs):
 198     """Initialize data for a worker process."""
 199     
 200     global Options, OptionsInfo
 201 
 202     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 203     
 204     # Decode Options and OptionInfo...
 205     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 206     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 207     
 208 def WorkerProcess(EncodedMolInfo):
 209     """Process data for a worker process."""
 210 
 211     MolIndex, EncodedMol = EncodedMolInfo
 212     
 213     CalcStatus = False
 214     ConfID = None
 215     Energy = None
 216     
 217     if EncodedMol is None:
 218         return [MolIndex, None, CalcStatus, ConfID, Energy]
 219 
 220     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 221     if RDKitUtil.IsMolEmpty(Mol):
 222         if not OptionsInfo["QuietMode"]:
 223             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 224             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 225         return [MolIndex, None, CalcStatus, ConfID, Energy]
 226     
 227     Mol, CalcStatus, ConfID, Energy = MinimizeMoleculeOrConformers(Mol, (MolIndex + 1))
 228 
 229     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy]
 230 
 231 def MinimizeMoleculeOrConformers(Mol, MolNum = None):
 232     """Minimize molecule or conformers."""
 233 
 234     ConfID = None
 235     if OptionsInfo["SkipConformerGeneration"]:
 236         Mol, CalcStatus, Energy = MinimizeMolecule(Mol, MolNum)
 237     else:
 238         Mol, CalcStatus, ConfID, Energy = MinimizeConformers(Mol, MolNum)
 239 
 240     return (Mol, CalcStatus, ConfID, Energy)
 241     
 242 def MinimizeMolecule(Mol, MolNum = None):
 243     """Minimize molecule."""
 244     
 245     if  OptionsInfo["AddHydrogens"]:
 246         Mol = Chem.AddHs(Mol, addCoords = True)
 247 
 248     Status = 0
 249     try:
 250         if OptionsInfo["UseUFF"]:
 251             Status = AllChem.UFFOptimizeMolecule(Mol, maxIters = OptionsInfo["MaxIters"])
 252         elif OptionsInfo["UseMMFF"]:
 253             Status = AllChem.MMFFOptimizeMolecule(Mol,  maxIters = OptionsInfo["MaxIters"], mmffVariant = OptionsInfo["MMFFVariant"])
 254         else:
 255             MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 256     except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
 257         if not OptionsInfo["QuietMode"]:
 258             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 259             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg))
 260         return (Mol, False, None)
 261     
 262     if Status != 0:
 263         if not OptionsInfo["QuietMode"]:
 264             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 265             MiscUtil.PrintWarning("Minimization failed to converge for molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (MolName, OptionsInfo["MaxIters"]))
 266 
 267     Energy = None
 268     if OptionsInfo["EnergyOut"]:
 269         EnergyStatus, Energy = GetEnergy(Mol)
 270         if EnergyStatus:
 271             Energy = "%.2f" % Energy
 272         else:
 273             if not OptionsInfo["QuietMode"]:
 274                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 275                 MiscUtil.PrintWarning("Failed to retrieve calculated energy for molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (MolName))
 276     
 277     if  OptionsInfo["RemoveHydrogens"]:
 278         Mol = Chem.RemoveHs(Mol)
 279     
 280     return (Mol, True, Energy)
 281 
 282 def MinimizeConformers(Mol, MolNum = None):
 283     """Generate and minimize conformers for a molecule to get the lowest energy conformer."""
 284     
 285     if  OptionsInfo["AddHydrogens"]:
 286         Mol = Chem.AddHs(Mol)
 287 
 288     ConfIDs = EmbedMolecule(Mol, MolNum)
 289     if not len(ConfIDs):
 290         if not OptionsInfo["QuietMode"]:
 291             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 292             MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
 293         return (Mol, False, None, None)
 294 
 295     CalcEnergyMap = {}
 296     for ConfID in ConfIDs:
 297         try:
 298             if OptionsInfo["UseUFF"]:
 299                 Status = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["MaxIters"])
 300             elif OptionsInfo["UseMMFF"]:
 301                 Status = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["MaxIters"], mmffVariant = OptionsInfo["MMFFVariant"])
 302             else:
 303                 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 304         except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
 305             if not OptionsInfo["QuietMode"]:
 306                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 307                 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg))
 308             return (Mol, False, None, None)
 309         
 310         EnergyStatus, Energy = GetEnergy(Mol, ConfID)
 311         if not EnergyStatus:
 312             if not OptionsInfo["QuietMode"]:
 313                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 314                 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName))
 315             return (Mol, False, None, None)
 316         
 317         if Status != 0:
 318             if not OptionsInfo["QuietMode"]:
 319                 MolName = RDKitUtil.GetMolName(Mol, MolNum)
 320                 MiscUtil.PrintWarning("Minimization failed to converge for conformation number %d of molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (ConfID, MolName, OptionsInfo["MaxIters"]))
 321             
 322         CalcEnergyMap[ConfID] = Energy
 323 
 324     SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID])
 325     MinEnergyConfID = SortedConfIDs[0]
 326         
 327     if  OptionsInfo["RemoveHydrogens"]:
 328         Mol = Chem.RemoveHs(Mol)
 329 
 330     Energy = "%.2f" % CalcEnergyMap[MinEnergyConfID]  if OptionsInfo["EnergyOut"] else None
 331     
 332     return (Mol, True, MinEnergyConfID, Energy)
 333     
 334 def GetEnergy(Mol, ConfID = None):
 335     """Calculate energy."""
 336 
 337     Status = True
 338     Energy = None
 339 
 340     if ConfID is None:
 341         ConfID = -1
 342     
 343     if OptionsInfo["UseUFF"]:
 344         UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID)
 345         if UFFMoleculeForcefield is None:
 346             Status = False
 347         else:
 348             Energy = UFFMoleculeForcefield.CalcEnergy()
 349     elif OptionsInfo["UseMMFF"]:
 350         MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"])
 351         MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID)
 352         if MMFFMoleculeForcefield is None:
 353             Status = False
 354         else:
 355             Energy = MMFFMoleculeForcefield.CalcEnergy()
 356     else:
 357         MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"])
 358     
 359     return (Status, Energy)
 360     
 361 def EmbedMolecule(Mol, MolNum = None):
 362     """Embed conformations."""
 363 
 364     ConfIDs = []
 365     
 366     MaxConfs = OptionsInfo["MaxConfs"]
 367     RandomSeed = OptionsInfo["RandomSeed"]
 368     EnforceChirality = OptionsInfo["EnforceChirality"]
 369     UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"]
 370     ETVersion = OptionsInfo["ETVersion"]
 371     UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"]
 372 
 373     try:
 374         ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion)
 375     except ValueError as ErrMsg:
 376         if not OptionsInfo["QuietMode"]:
 377             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 378             MiscUtil.PrintWarning("Embedding failed  for molecule %s:\n%s\n" % (MolName, ErrMsg))
 379         ConfIDs = []
 380     
 381     return ConfIDs
 382 
 383 def WriteMolecule(Writer, Mol, MolNum = None, ConfID = None, Energy = None):
 384     """Write molecule. """
 385 
 386     if Energy is not None:
 387         Mol.SetProp(OptionsInfo["EnergyLabel"], Energy)
 388 
 389     try:
 390         if ConfID is None:
 391             Writer.write(Mol)
 392         else:
 393             Writer.write(Mol, confId = ConfID)
 394     except (ValueError, RuntimeError) as ErrMsg:
 395         if not OptionsInfo["QuietMode"]:
 396             MolName = RDKitUtil.GetMolName(Mol, MolNum)
 397             MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
 398         return False
 399 
 400     return True
 401 
 402 def ProcesssConformerGeneratorOption():
 403     """Process comformer generator option. """
 404     
 405     ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"])
 406     
 407     OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"]
 408     OptionsInfo["SkipConformerGeneration"] = ConfGenParams["SkipConformerGeneration"]
 409     OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"]
 410     OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"]
 411     OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"]
 412 
 413 def ProcessOptions():
 414     """Process and validate command line arguments and options."""
 415     
 416     MiscUtil.PrintInfo("Processing options...")
 417     
 418     # Validate options...
 419     ValidateOptions()
 420     
 421     OptionsInfo["Infile"] = Options["--infile"]
 422     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 423     
 424     OptionsInfo["Outfile"] = Options["--outfile"]
 425     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 426     
 427     OptionsInfo["Overwrite"] = Options["--overwrite"]
 428 
 429     OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
 430 
 431     ProcesssConformerGeneratorOption()
 432 
 433     if re.match("^UFF$", Options["--forceField"], re.I):
 434         ForceField = "UFF"
 435         UseUFF = True
 436         UseMMFF = False
 437     elif re.match("^MMFF$", Options["--forceField"], re.I):
 438         ForceField = "MMFF"
 439         UseUFF = False
 440         UseMMFF = True
 441     else:
 442         MiscUtil.PrintError("The value, %s, specified for \"--forceField\" is not supported." % (Options["--forceField"],))
 443     
 444     MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s"
 445     
 446     OptionsInfo["ForceField"] = ForceField
 447     OptionsInfo["MMFFVariant"] = MMFFVariant
 448     OptionsInfo["UseMMFF"] = UseMMFF
 449     OptionsInfo["UseUFF"] = UseUFF
 450         
 451     OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
 452     if UseMMFF:
 453         OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant
 454     else:
 455         OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField
 456     
 457     OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False
 458     
 459     OptionsInfo["MaxIters"] = int(Options["--maxIters"])
 460     OptionsInfo["MaxConfs"] = int(Options["--maxConfs"])
 461     
 462     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 463     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 464     
 465     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
 466     
 467     RandomSeed = -1
 468     if not re.match("^auto$", Options["--randomSeed"], re.I):
 469         RandomSeed = int(Options["--randomSeed"])
 470     OptionsInfo["RandomSeed"] = RandomSeed
 471     
 472     OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False
 473 
 474 def RetrieveOptions():
 475     """Retrieve command line arguments and options."""
 476     
 477     # Get options...
 478     global Options
 479     Options = docopt(_docoptUsage_)
 480     
 481     # Set current working directory to the specified directory...
 482     WorkingDir = Options["--workingdir"]
 483     if WorkingDir:
 484         os.chdir(WorkingDir)
 485     
 486     # Handle examples option...
 487     if "--examples" in Options and Options["--examples"]:
 488         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 489         sys.exit(0)
 490 
 491 def ValidateOptions():
 492     """Validate option values."""
 493     
 494     MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no")
 495     MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG ETDG KDG ETKDG ETKDGv2 None")
 496     
 497     MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF")
 498     MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s")
 499     
 500     MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
 501     MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no")
 502     
 503     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 504     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
 505     
 506     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 507     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 508     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 509         
 510     MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0})
 511     MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
 512     
 513     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 514     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
 515     
 516     if not re.match("^auto$", Options["--randomSeed"], re.I):
 517         MiscUtil.ValidateOptionIntegerValue("--randomSeed", Options["--randomSeed"], {})
 518     
 519     MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no")
 520 
 521 # Setup a usage string for docopt...
 522 _docoptUsage_ = """
 523 RDKitPerformMinimization.py - Perform structure minimization
 524 
 525 Usage:
 526     RDKitPerformMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG,  ETKDG, ETKDGv2, None>]
 527                                 [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>]
 528                                 [--energyOut  <yes or no>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>]
 529                                 [--maxConfs <number>] [--maxIters <number>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
 530                                 [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>]
 531                                 [--randomSeed <number>] [-w <dir>] -i <infile> -o <outfile> 
 532     RDKitPerformMinimization.py -h | --help | -e | --examples
 533 
 534 Description:
 535     Generate 3D structures for molecules using a combination of distance geometry
 536     and forcefield minimization or minimize existing 3D structures using a specified
 537     forcefield.
 538 
 539     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi
 540     .csv, .tsv, .txt)
 541 
 542     The supported output file formats are: SD (.sdf, .sd)
 543 
 544 Options:
 545     -a, --addHydrogens <yes or no>  [default: yes]
 546         Add hydrogens before minimization.
 547     -c, --conformerGenerator <text or None>  [default: ETKDGv2]
 548         Conformation generation methodology for generating initial 3D coordinates. The
 549         possible values along with a brief description are shown below:
 550             
 551             SDG: Standard Distance Geometry
 552             KDG: basic Knowledge-terms with Distance Geometry
 553             ETDG: Experimental Torsion-angle preference with Distance Geometry
 554             ETKDG: Experimental Torsion-angle preference along with basic
 555                 Knowledge-terms and Distance Geometry [Ref 129]
 556             ETKDGv2: Experimental Torsion-angle preference along with basic
 557                 Knowledge-terms and Distance Geometry [Ref 167]
 558             None: No conformation generation
 559             
 560         The conformation generation step may be skipped by specifying 'None' value to
 561         perform only forcefield minimization of molecules with 3D structures in input
 562         file.  This doesn't work for molecules in SMILES file or molecules in SD/MOL files
 563         containing 2D structures.
 564     -f, --forceField <UFF, MMFF>  [default: MMFF]
 565         Forcefield method to use for energy minimization. Possible values: Universal Force
 566         Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force Field [ Ref 83-87 ] .
 567     --forceFieldMMFFVariant <MMFF94 or MMFF94s>  [default: MMFF94]
 568         Variant of MMFF forcefield to use for energy minimization.
 569     --energyOut <yes or no>  [default: No]
 570         Write out energy values.
 571     --enforceChirality <yes or no>  [default: Yes]
 572         Enforce chirality for defined chiral centers.
 573     -e, --examples
 574         Print examples.
 575     -h, --help
 576         Print this help message.
 577     -i, --infile <infile>
 578         Input file name.
 579     --infileParams <Name,Value,...>  [default: auto]
 580         A comma delimited list of parameter name and value pairs for reading
 581         molecules from files. The supported parameter names for different file
 582         formats, along with their default values, are shown below:
 583             
 584             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 585             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 586                 smilesTitleLine,auto,sanitize,yes
 587             
 588         Possible values for smilesDelimiter: space, comma or tab.
 589     --maxConfs <number>  [default: 250]
 590         Maximum number of conformations to generate for each molecule by conformation
 591         generation methodology for initial 3D coordinates. The conformations are minimized
 592         using the specified forcefield and the lowest energy conformation is written to the
 593         output file. This option is ignored during 'None' value of '-c --conformerGenerator'
 594         option.
 595     --maxIters <number>  [default: 500]
 596         Maximum number of iterations to perform for each molecule during forcefield
 597         minimization.
 598     --mp <yes or no>  [default: no]
 599         Use multiprocessing.
 600          
 601         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 602         function employing lazy RDKit data iterable. This allows processing of
 603         arbitrary large data sets without any additional requirements memory.
 604         
 605         All input data may be optionally loaded into memory by mp.Pool.map()
 606         before starting worker processes in a process pool by setting the value
 607         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 608         
 609         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 610         data mode may adversely impact the performance. The '--mpParams' section
 611         provides additional information to tune the value of 'chunkSize'.
 612     --mpParams <Name,Value,...>  [default: auto]
 613         A comma delimited list of parameter name and value pairs to configure
 614         multiprocessing.
 615         
 616         The supported parameter names along with their default and possible
 617         values are shown below:
 618         
 619             chunkSize, auto
 620             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 621             numProcesses, auto   [ Default: mp.cpu_count() ]
 622         
 623         These parameters are used by the following functions to configure and
 624         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 625         mp.Pool.imap().
 626         
 627         The chunkSize determines chunks of input data passed to each worker
 628         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 629         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 630         
 631         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 632         automatically converts RDKit data iterable into a list, loads all data into
 633         memory, and calculates the default chunkSize using the following method
 634         as shown in its code:
 635         
 636             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 637             if extra: chunkSize += 1
 638         
 639         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 640         and 100 data items.
 641         
 642         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 643         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 644         data into memory. Consequently, the size of input data is not known a priori.
 645         It's not possible to estimate an optimal value for the chunkSize. The default 
 646         chunkSize is set to 1.
 647         
 648         The default value for the chunkSize during 'Lazy' data mode may adversely
 649         impact the performance due to the overhead associated with exchanging
 650         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 651         a larger value during 'Lazy' input data mode, based on the size of your input
 652         data and number of processes in the process pool.
 653         
 654         The mp.Pool.map() function waits for all worker processes to process all
 655         the data and return the results. The mp.Pool.imap() function, however,
 656         returns the the results obtained from worker processes as soon as the
 657         results become available for specified chunks of data.
 658         
 659         The order of data in the results returned by both mp.Pool.map() and 
 660         mp.Pool.imap() functions always corresponds to the input data.
 661     -o, --outfile <outfile>
 662         Output file name.
 663     --outfileParams <Name,Value,...>  [default: auto]
 664         A comma delimited list of parameter name and value pairs for writing
 665         molecules to files. The supported parameter names for different file
 666         formats, along with their default values, are shown below:
 667             
 668             SD: kekulize,yes,forceV3000,no
 669             
 670     --overwrite
 671         Overwrite existing files.
 672     -q, --quiet <yes or no>  [default: no]
 673         Use quiet mode. The warning and information messages will not be printed.
 674     -r, --removeHydrogens <yes or no>  [default: Yes]
 675         Remove hydrogens after minimization.
 676     --randomSeed <number>  [default: auto]
 677         Seed for the random number generator for reproducing 3D coordinates.
 678         Default is to use a random seed.
 679     -w, --workingdir <dir>
 680         Location of working directory which defaults to the current directory.
 681 
 682 Examples:
 683     To generate up to 250 conformations using ETKDG methodology followed by MMFF
 684     forcefield minimization for a maximum of 500 iterations for molecules in a SMILES file
 685     and write out a SD file containing minimum energy structure corresponding to each
 686     molecule, type:
 687 
 688         % RDKitPerformMinimization.py  -i Sample.smi -o SampleOut.sdf
 689 
 690     To rerun the first example in a quiet mode and write out a SD file, type:
 691 
 692         % RDKitPerformMinimization.py  -q yes -i Sample.smi -o SampleOut.sdf
 693 
 694     To run the first example in multiprocessing mode on all available CPUs
 695     without loading all data into memory and write out a SD file, type:
 696 
 697         % RDKitPerformMinimization.py --mp yes -i Sample.smi -o SampleOut.sdf
 698 
 699     To run the first example in multiprocessing mode on all available CPUs
 700     by loading all data into memory and write out a SD file, type:
 701 
 702         % RDKitPerformMinimization.py --mp yes --mpParams "inputDataMode,
 703           InMemory" -i Sample.smi -o SampleOut.sdf
 704 
 705     To run the first example in multiprocessing mode on specific number of
 706     CPUs and chunk size without loading all data into memory and write out a SD file,
 707     type:
 708 
 709         % RDKitPerformMinimization.py --mp yes --mpParams "inputDataMode,Lazy,
 710           numProcesses,4,chunkSize,8" -i Sample.smi -o SampleOut.sdf
 711 
 712     To generate up to 150 conformations using ETKDG methodology followed by MMFF
 713     forcefield minimization for a maximum of 250 iterations along with a specified random
 714     seed  for molecules in a SMILES file and write out a SD file containing minimum energy
 715     structures corresponding to each molecule, type
 716 
 717         % RDKitPerformMinimization.py  --maxConfs 150  --randomSeed 201780117 
 718           --maxIters 250  -i Sample.smi -o SampleOut.sdf
 719 
 720     To minimize structures in a 3D SD file using UFF forcefield for a maximum of 150
 721     iterations without generating any conformations and write out a SD file containing
 722     minimum energy structures corresponding to each molecule, type
 723 
 724         % RDKitPerformMinimization.py  -c None -f UFF --maxIters 150
 725           -i Sample3D.sdf -o SampleOut.sdf
 726 
 727     To generate up to 50 conformations using SDG methodology followed
 728     by UFF forcefield minimization for a maximum of 50 iterations for 
 729     molecules in a CSV SMILES file, SMILES strings in column 1, name in
 730     column 2, and write out a SD file, type:
 731 
 732         % RDKitPerformMinimization.py  --maxConfs 50  --maxIters 50 -c SDG
 733           -f UFF --infileParams "smilesDelimiter,comma,smilesTitleLine,yes,
 734           smilesColumn,1,smilesNameColumn,2"  -i SampleSMILES.csv
 735           -o SampleOut.sdf
 736 
 737 Author:
 738     Manish Sud(msud@san.rr.com)
 739 
 740 See also:
 741     RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py,
 742     RDKitConvertFileFormat.py, RDKitGenerateConformers.py, RDKitPerformConstrainedMinimization.py
 743 
 744 Copyright:
 745     Copyright (C) 2024 Manish Sud. All rights reserved.
 746 
 747     The functionality available in this script is implemented using RDKit, an
 748     open source toolkit for cheminformatics developed by Greg Landrum.
 749 
 750     This file is part of MayaChemTools.
 751 
 752     MayaChemTools is free software; you can redistribute it and/or modify it under
 753     the terms of the GNU Lesser General Public License as published by the Free
 754     Software Foundation; either version 3 of the License, or (at your option) any
 755     later version.
 756 
 757 """
 758 
 759 if __name__ == "__main__":
 760     main()