1 #!/bin/env python 2 # 3 # File: RDKitPerformMinimization.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 import multiprocessing as mp 37 38 # RDKit imports... 39 try: 40 from rdkit import rdBase 41 from rdkit import Chem 42 from rdkit.Chem import AllChem 43 except ImportError as ErrMsg: 44 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 45 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 46 sys.exit(1) 47 48 # MayaChemTools imports... 49 try: 50 from docopt import docopt 51 import MiscUtil 52 import RDKitUtil 53 except ImportError as ErrMsg: 54 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 55 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 56 sys.exit(1) 57 58 ScriptName = os.path.basename(sys.argv[0]) 59 Options = {} 60 OptionsInfo = {} 61 62 def main(): 63 """Start execution of the script.""" 64 65 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 66 67 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 68 69 # Retrieve command line arguments and options... 70 RetrieveOptions() 71 72 # Process and validate command line arguments and options... 73 ProcessOptions() 74 75 # Perform actions required by the script... 76 PerformMinimization() 77 78 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 79 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 80 81 def PerformMinimization(): 82 """Perform minimization.""" 83 84 # Setup a molecule reader... 85 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 86 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 87 88 # Set up a molecule writer... 89 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 90 if Writer is None: 91 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 92 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 93 94 MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer) 95 96 if Writer is not None: 97 Writer.close() 98 99 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 100 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 101 MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount) 102 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount) 103 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount)) 104 105 def ProcessMolecules(Mols, Writer): 106 """Process and minimize molecules.""" 107 108 if OptionsInfo["MPMode"]: 109 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 110 else: 111 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 112 113 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 114 """Process and minimize molecules using a single process.""" 115 116 if OptionsInfo["SkipConformerGeneration"]: 117 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...") 118 else: 119 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...") 120 121 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 122 for Mol in Mols: 123 MolCount += 1 124 125 if Mol is None: 126 continue 127 128 if RDKitUtil.IsMolEmpty(Mol): 129 if not OptionsInfo["QuietMode"]: 130 MolName = RDKitUtil.GetMolName(Mol, MolCount) 131 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 132 continue 133 ValidMolCount += 1 134 135 Mol, CalcStatus, ConfID, Energy = MinimizeMoleculeOrConformers(Mol, MolCount) 136 137 if not CalcStatus: 138 MinimizationFailedCount += 1 139 continue 140 141 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy) 142 if not WriteStatus: 143 WriteFailedCount += 1 144 145 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 146 147 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 148 """Process and minimize molecules using multiprocessing.""" 149 150 if OptionsInfo["SkipConformerGeneration"]: 151 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers using multiprocessing...") 152 else: 153 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing...") 154 155 MPParams = OptionsInfo["MPParams"] 156 157 # Setup data for initializing a worker process... 158 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 159 160 # Setup a encoded mols data iterable for a worker process... 161 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 162 163 # Setup process pool along with data initialization for each process... 164 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 165 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 166 167 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 168 169 # Start processing... 170 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 171 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 172 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 173 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 174 else: 175 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 176 177 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 178 for Result in Results: 179 MolCount += 1 180 MolIndex, EncodedMol, CalcStatus, ConfID, Energy = Result 181 182 if EncodedMol is None: 183 continue 184 ValidMolCount += 1 185 186 if not CalcStatus: 187 MinimizationFailedCount += 1 188 continue 189 190 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 191 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy) 192 if not WriteStatus: 193 WriteFailedCount += 1 194 195 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 196 197 def InitializeWorkerProcess(*EncodedArgs): 198 """Initialize data for a worker process.""" 199 200 global Options, OptionsInfo 201 202 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 203 204 # Decode Options and OptionInfo... 205 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 206 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 207 208 def WorkerProcess(EncodedMolInfo): 209 """Process data for a worker process.""" 210 211 MolIndex, EncodedMol = EncodedMolInfo 212 213 CalcStatus = False 214 ConfID = None 215 Energy = None 216 217 if EncodedMol is None: 218 return [MolIndex, None, CalcStatus, ConfID, Energy] 219 220 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 221 if RDKitUtil.IsMolEmpty(Mol): 222 if not OptionsInfo["QuietMode"]: 223 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 224 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 225 return [MolIndex, None, CalcStatus, ConfID, Energy] 226 227 Mol, CalcStatus, ConfID, Energy = MinimizeMoleculeOrConformers(Mol, (MolIndex + 1)) 228 229 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy] 230 231 def MinimizeMoleculeOrConformers(Mol, MolNum = None): 232 """Minimize molecule or conformers.""" 233 234 ConfID = None 235 if OptionsInfo["SkipConformerGeneration"]: 236 Mol, CalcStatus, Energy = MinimizeMolecule(Mol, MolNum) 237 else: 238 Mol, CalcStatus, ConfID, Energy = MinimizeConformers(Mol, MolNum) 239 240 return (Mol, CalcStatus, ConfID, Energy) 241 242 def MinimizeMolecule(Mol, MolNum = None): 243 """Minimize molecule.""" 244 245 if OptionsInfo["AddHydrogens"]: 246 Mol = Chem.AddHs(Mol, addCoords = True) 247 248 Status = 0 249 try: 250 if OptionsInfo["UseUFF"]: 251 Status = AllChem.UFFOptimizeMolecule(Mol, maxIters = OptionsInfo["MaxIters"]) 252 elif OptionsInfo["UseMMFF"]: 253 Status = AllChem.MMFFOptimizeMolecule(Mol, maxIters = OptionsInfo["MaxIters"], mmffVariant = OptionsInfo["MMFFVariant"]) 254 else: 255 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) 256 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 257 if not OptionsInfo["QuietMode"]: 258 MolName = RDKitUtil.GetMolName(Mol, MolNum) 259 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 260 return (Mol, False, None) 261 262 if Status != 0: 263 if not OptionsInfo["QuietMode"]: 264 MolName = RDKitUtil.GetMolName(Mol, MolNum) 265 MiscUtil.PrintWarning("Minimization failed to converge for molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (MolName, OptionsInfo["MaxIters"])) 266 267 Energy = None 268 if OptionsInfo["EnergyOut"]: 269 EnergyStatus, Energy = GetEnergy(Mol) 270 if EnergyStatus: 271 Energy = "%.2f" % Energy 272 else: 273 if not OptionsInfo["QuietMode"]: 274 MolName = RDKitUtil.GetMolName(Mol, MolNum) 275 MiscUtil.PrintWarning("Failed to retrieve calculated energy for molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (MolName)) 276 277 if OptionsInfo["RemoveHydrogens"]: 278 Mol = Chem.RemoveHs(Mol) 279 280 return (Mol, True, Energy) 281 282 def MinimizeConformers(Mol, MolNum = None): 283 """Generate and minimize conformers for a molecule to get the lowest energy conformer.""" 284 285 if OptionsInfo["AddHydrogens"]: 286 Mol = Chem.AddHs(Mol) 287 288 ConfIDs = EmbedMolecule(Mol, MolNum) 289 if not len(ConfIDs): 290 if not OptionsInfo["QuietMode"]: 291 MolName = RDKitUtil.GetMolName(Mol, MolNum) 292 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) 293 return (Mol, False, None, None) 294 295 CalcEnergyMap = {} 296 for ConfID in ConfIDs: 297 try: 298 if OptionsInfo["UseUFF"]: 299 Status = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["MaxIters"]) 300 elif OptionsInfo["UseMMFF"]: 301 Status = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["MaxIters"], mmffVariant = OptionsInfo["MMFFVariant"]) 302 else: 303 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) 304 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 305 if not OptionsInfo["QuietMode"]: 306 MolName = RDKitUtil.GetMolName(Mol, MolNum) 307 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 308 return (Mol, False, None, None) 309 310 EnergyStatus, Energy = GetEnergy(Mol, ConfID) 311 if not EnergyStatus: 312 if not OptionsInfo["QuietMode"]: 313 MolName = RDKitUtil.GetMolName(Mol, MolNum) 314 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) 315 return (Mol, False, None, None) 316 317 if Status != 0: 318 if not OptionsInfo["QuietMode"]: 319 MolName = RDKitUtil.GetMolName(Mol, MolNum) 320 MiscUtil.PrintWarning("Minimization failed to converge for conformation number %d of molecule %s in %d steps. Try using higher value for \"--maxIters\" option...\n" % (ConfID, MolName, OptionsInfo["MaxIters"])) 321 322 CalcEnergyMap[ConfID] = Energy 323 324 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 325 MinEnergyConfID = SortedConfIDs[0] 326 327 if OptionsInfo["RemoveHydrogens"]: 328 Mol = Chem.RemoveHs(Mol) 329 330 Energy = "%.2f" % CalcEnergyMap[MinEnergyConfID] if OptionsInfo["EnergyOut"] else None 331 332 return (Mol, True, MinEnergyConfID, Energy) 333 334 def GetEnergy(Mol, ConfID = None): 335 """Calculate energy.""" 336 337 Status = True 338 Energy = None 339 340 if ConfID is None: 341 ConfID = -1 342 343 if OptionsInfo["UseUFF"]: 344 UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID) 345 if UFFMoleculeForcefield is None: 346 Status = False 347 else: 348 Energy = UFFMoleculeForcefield.CalcEnergy() 349 elif OptionsInfo["UseMMFF"]: 350 MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"]) 351 MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID) 352 if MMFFMoleculeForcefield is None: 353 Status = False 354 else: 355 Energy = MMFFMoleculeForcefield.CalcEnergy() 356 else: 357 MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) 358 359 return (Status, Energy) 360 361 def EmbedMolecule(Mol, MolNum = None): 362 """Embed conformations.""" 363 364 ConfIDs = [] 365 366 MaxConfs = OptionsInfo["MaxConfs"] 367 RandomSeed = OptionsInfo["RandomSeed"] 368 EnforceChirality = OptionsInfo["EnforceChirality"] 369 UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"] 370 ETVersion = OptionsInfo["ETVersion"] 371 UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"] 372 373 try: 374 ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 375 except ValueError as ErrMsg: 376 if not OptionsInfo["QuietMode"]: 377 MolName = RDKitUtil.GetMolName(Mol, MolNum) 378 MiscUtil.PrintWarning("Embedding failed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 379 ConfIDs = [] 380 381 return ConfIDs 382 383 def WriteMolecule(Writer, Mol, MolNum = None, ConfID = None, Energy = None): 384 """Write molecule. """ 385 386 if Energy is not None: 387 Mol.SetProp(OptionsInfo["EnergyLabel"], Energy) 388 389 try: 390 if ConfID is None: 391 Writer.write(Mol) 392 else: 393 Writer.write(Mol, confId = ConfID) 394 except (ValueError, RuntimeError) as ErrMsg: 395 if not OptionsInfo["QuietMode"]: 396 MolName = RDKitUtil.GetMolName(Mol, MolNum) 397 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg)) 398 return False 399 400 return True 401 402 def ProcesssConformerGeneratorOption(): 403 """Process comformer generator option. """ 404 405 ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"]) 406 407 OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"] 408 OptionsInfo["SkipConformerGeneration"] = ConfGenParams["SkipConformerGeneration"] 409 OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"] 410 OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"] 411 OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"] 412 413 def ProcessOptions(): 414 """Process and validate command line arguments and options.""" 415 416 MiscUtil.PrintInfo("Processing options...") 417 418 # Validate options... 419 ValidateOptions() 420 421 OptionsInfo["Infile"] = Options["--infile"] 422 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) 423 424 OptionsInfo["Outfile"] = Options["--outfile"] 425 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 426 427 OptionsInfo["Overwrite"] = Options["--overwrite"] 428 429 OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False 430 431 ProcesssConformerGeneratorOption() 432 433 if re.match("^UFF$", Options["--forceField"], re.I): 434 ForceField = "UFF" 435 UseUFF = True 436 UseMMFF = False 437 elif re.match("^MMFF$", Options["--forceField"], re.I): 438 ForceField = "MMFF" 439 UseUFF = False 440 UseMMFF = True 441 else: 442 MiscUtil.PrintError("The value, %s, specified for \"--forceField\" is not supported." % (Options["--forceField"],)) 443 444 MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s" 445 446 OptionsInfo["ForceField"] = ForceField 447 OptionsInfo["MMFFVariant"] = MMFFVariant 448 OptionsInfo["UseMMFF"] = UseMMFF 449 OptionsInfo["UseUFF"] = UseUFF 450 451 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False 452 if UseMMFF: 453 OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant 454 else: 455 OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField 456 457 OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False 458 459 OptionsInfo["MaxIters"] = int(Options["--maxIters"]) 460 OptionsInfo["MaxConfs"] = int(Options["--maxConfs"]) 461 462 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 463 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 464 465 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 466 467 RandomSeed = -1 468 if not re.match("^auto$", Options["--randomSeed"], re.I): 469 RandomSeed = int(Options["--randomSeed"]) 470 OptionsInfo["RandomSeed"] = RandomSeed 471 472 OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False 473 474 def RetrieveOptions(): 475 """Retrieve command line arguments and options.""" 476 477 # Get options... 478 global Options 479 Options = docopt(_docoptUsage_) 480 481 # Set current working directory to the specified directory... 482 WorkingDir = Options["--workingdir"] 483 if WorkingDir: 484 os.chdir(WorkingDir) 485 486 # Handle examples option... 487 if "--examples" in Options and Options["--examples"]: 488 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 489 sys.exit(0) 490 491 def ValidateOptions(): 492 """Validate option values.""" 493 494 MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no") 495 MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG ETDG KDG ETKDG ETKDGv2 None") 496 497 MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF") 498 MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s") 499 500 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no") 501 MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no") 502 503 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 504 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 505 506 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 507 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 508 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 509 510 MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0}) 511 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0}) 512 513 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 514 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 515 516 if not re.match("^auto$", Options["--randomSeed"], re.I): 517 MiscUtil.ValidateOptionIntegerValue("--randomSeed", Options["--randomSeed"], {}) 518 519 MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no") 520 521 # Setup a usage string for docopt... 522 _docoptUsage_ = """ 523 RDKitPerformMinimization.py - Perform structure minimization 524 525 Usage: 526 RDKitPerformMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG, ETKDG, ETKDGv2, None>] 527 [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>] 528 [--energyOut <yes or no>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>] 529 [--maxConfs <number>] [--maxIters <number>] [--mp <yes or no>] [--mpParams <Name,Value,...>] 530 [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>] 531 [--randomSeed <number>] [-w <dir>] -i <infile> -o <outfile> 532 RDKitPerformMinimization.py -h | --help | -e | --examples 533 534 Description: 535 Generate 3D structures for molecules using a combination of distance geometry 536 and forcefield minimization or minimize existing 3D structures using a specified 537 forcefield. 538 539 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi 540 .csv, .tsv, .txt) 541 542 The supported output file formats are: SD (.sdf, .sd) 543 544 Options: 545 -a, --addHydrogens <yes or no> [default: yes] 546 Add hydrogens before minimization. 547 -c, --conformerGenerator <text or None> [default: ETKDGv2] 548 Conformation generation methodology for generating initial 3D coordinates. The 549 possible values along with a brief description are shown below: 550 551 SDG: Standard Distance Geometry 552 KDG: basic Knowledge-terms with Distance Geometry 553 ETDG: Experimental Torsion-angle preference with Distance Geometry 554 ETKDG: Experimental Torsion-angle preference along with basic 555 Knowledge-terms and Distance Geometry [Ref 129] 556 ETKDGv2: Experimental Torsion-angle preference along with basic 557 Knowledge-terms and Distance Geometry [Ref 167] 558 None: No conformation generation 559 560 The conformation generation step may be skipped by specifying 'None' value to 561 perform only forcefield minimization of molecules with 3D structures in input 562 file. This doesn't work for molecules in SMILES file or molecules in SD/MOL files 563 containing 2D structures. 564 -f, --forceField <UFF, MMFF> [default: MMFF] 565 Forcefield method to use for energy minimization. Possible values: Universal Force 566 Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force Field [ Ref 83-87 ] . 567 --forceFieldMMFFVariant <MMFF94 or MMFF94s> [default: MMFF94] 568 Variant of MMFF forcefield to use for energy minimization. 569 --energyOut <yes or no> [default: No] 570 Write out energy values. 571 --enforceChirality <yes or no> [default: Yes] 572 Enforce chirality for defined chiral centers. 573 -e, --examples 574 Print examples. 575 -h, --help 576 Print this help message. 577 -i, --infile <infile> 578 Input file name. 579 --infileParams <Name,Value,...> [default: auto] 580 A comma delimited list of parameter name and value pairs for reading 581 molecules from files. The supported parameter names for different file 582 formats, along with their default values, are shown below: 583 584 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 585 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 586 smilesTitleLine,auto,sanitize,yes 587 588 Possible values for smilesDelimiter: space, comma or tab. 589 --maxConfs <number> [default: 250] 590 Maximum number of conformations to generate for each molecule by conformation 591 generation methodology for initial 3D coordinates. The conformations are minimized 592 using the specified forcefield and the lowest energy conformation is written to the 593 output file. This option is ignored during 'None' value of '-c --conformerGenerator' 594 option. 595 --maxIters <number> [default: 500] 596 Maximum number of iterations to perform for each molecule during forcefield 597 minimization. 598 --mp <yes or no> [default: no] 599 Use multiprocessing. 600 601 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 602 function employing lazy RDKit data iterable. This allows processing of 603 arbitrary large data sets without any additional requirements memory. 604 605 All input data may be optionally loaded into memory by mp.Pool.map() 606 before starting worker processes in a process pool by setting the value 607 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 608 609 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 610 data mode may adversely impact the performance. The '--mpParams' section 611 provides additional information to tune the value of 'chunkSize'. 612 --mpParams <Name,Value,...> [default: auto] 613 A comma delimited list of parameter name and value pairs to configure 614 multiprocessing. 615 616 The supported parameter names along with their default and possible 617 values are shown below: 618 619 chunkSize, auto 620 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 621 numProcesses, auto [ Default: mp.cpu_count() ] 622 623 These parameters are used by the following functions to configure and 624 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 625 mp.Pool.imap(). 626 627 The chunkSize determines chunks of input data passed to each worker 628 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 629 The default value of chunkSize is dependent on the value of 'inputDataMode'. 630 631 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 632 automatically converts RDKit data iterable into a list, loads all data into 633 memory, and calculates the default chunkSize using the following method 634 as shown in its code: 635 636 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 637 if extra: chunkSize += 1 638 639 For example, the default chunkSize will be 7 for a pool of 4 worker processes 640 and 100 data items. 641 642 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 643 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 644 data into memory. Consequently, the size of input data is not known a priori. 645 It's not possible to estimate an optimal value for the chunkSize. The default 646 chunkSize is set to 1. 647 648 The default value for the chunkSize during 'Lazy' data mode may adversely 649 impact the performance due to the overhead associated with exchanging 650 small chunks of data. It is generally a good idea to explicitly set chunkSize to 651 a larger value during 'Lazy' input data mode, based on the size of your input 652 data and number of processes in the process pool. 653 654 The mp.Pool.map() function waits for all worker processes to process all 655 the data and return the results. The mp.Pool.imap() function, however, 656 returns the the results obtained from worker processes as soon as the 657 results become available for specified chunks of data. 658 659 The order of data in the results returned by both mp.Pool.map() and 660 mp.Pool.imap() functions always corresponds to the input data. 661 -o, --outfile <outfile> 662 Output file name. 663 --outfileParams <Name,Value,...> [default: auto] 664 A comma delimited list of parameter name and value pairs for writing 665 molecules to files. The supported parameter names for different file 666 formats, along with their default values, are shown below: 667 668 SD: kekulize,yes,forceV3000,no 669 670 --overwrite 671 Overwrite existing files. 672 -q, --quiet <yes or no> [default: no] 673 Use quiet mode. The warning and information messages will not be printed. 674 -r, --removeHydrogens <yes or no> [default: Yes] 675 Remove hydrogens after minimization. 676 --randomSeed <number> [default: auto] 677 Seed for the random number generator for reproducing 3D coordinates. 678 Default is to use a random seed. 679 -w, --workingdir <dir> 680 Location of working directory which defaults to the current directory. 681 682 Examples: 683 To generate up to 250 conformations using ETKDG methodology followed by MMFF 684 forcefield minimization for a maximum of 500 iterations for molecules in a SMILES file 685 and write out a SD file containing minimum energy structure corresponding to each 686 molecule, type: 687 688 % RDKitPerformMinimization.py -i Sample.smi -o SampleOut.sdf 689 690 To rerun the first example in a quiet mode and write out a SD file, type: 691 692 % RDKitPerformMinimization.py -q yes -i Sample.smi -o SampleOut.sdf 693 694 To run the first example in multiprocessing mode on all available CPUs 695 without loading all data into memory and write out a SD file, type: 696 697 % RDKitPerformMinimization.py --mp yes -i Sample.smi -o SampleOut.sdf 698 699 To run the first example in multiprocessing mode on all available CPUs 700 by loading all data into memory and write out a SD file, type: 701 702 % RDKitPerformMinimization.py --mp yes --mpParams "inputDataMode, 703 InMemory" -i Sample.smi -o SampleOut.sdf 704 705 To run the first example in multiprocessing mode on specific number of 706 CPUs and chunk size without loading all data into memory and write out a SD file, 707 type: 708 709 % RDKitPerformMinimization.py --mp yes --mpParams "inputDataMode,Lazy, 710 numProcesses,4,chunkSize,8" -i Sample.smi -o SampleOut.sdf 711 712 To generate up to 150 conformations using ETKDG methodology followed by MMFF 713 forcefield minimization for a maximum of 250 iterations along with a specified random 714 seed for molecules in a SMILES file and write out a SD file containing minimum energy 715 structures corresponding to each molecule, type 716 717 % RDKitPerformMinimization.py --maxConfs 150 --randomSeed 201780117 718 --maxIters 250 -i Sample.smi -o SampleOut.sdf 719 720 To minimize structures in a 3D SD file using UFF forcefield for a maximum of 150 721 iterations without generating any conformations and write out a SD file containing 722 minimum energy structures corresponding to each molecule, type 723 724 % RDKitPerformMinimization.py -c None -f UFF --maxIters 150 725 -i Sample3D.sdf -o SampleOut.sdf 726 727 To generate up to 50 conformations using SDG methodology followed 728 by UFF forcefield minimization for a maximum of 50 iterations for 729 molecules in a CSV SMILES file, SMILES strings in column 1, name in 730 column 2, and write out a SD file, type: 731 732 % RDKitPerformMinimization.py --maxConfs 50 --maxIters 50 -c SDG 733 -f UFF --infileParams "smilesDelimiter,comma,smilesTitleLine,yes, 734 smilesColumn,1,smilesNameColumn,2" -i SampleSMILES.csv 735 -o SampleOut.sdf 736 737 Author: 738 Manish Sud(msud@san.rr.com) 739 740 See also: 741 RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py, 742 RDKitConvertFileFormat.py, RDKitGenerateConformers.py, RDKitPerformConstrainedMinimization.py 743 744 Copyright: 745 Copyright (C) 2025 Manish Sud. All rights reserved. 746 747 The functionality available in this script is implemented using RDKit, an 748 open source toolkit for cheminformatics developed by Greg Landrum. 749 750 This file is part of MayaChemTools. 751 752 MayaChemTools is free software; you can redistribute it and/or modify it under 753 the terms of the GNU Lesser General Public License as published by the Free 754 Software Foundation; either version 3 of the License, or (at your option) any 755 later version. 756 757 """ 758 759 if __name__ == "__main__": 760 main()