1 #!/bin/env python 2 # 3 # File: RDKitPerformConstrainedMinimization.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 import multiprocessing as mp 37 38 # RDKit imports... 39 try: 40 from rdkit import rdBase 41 from rdkit import Chem 42 from rdkit.Chem import AllChem 43 from rdkit.Chem import rdFMCS 44 except ImportError as ErrMsg: 45 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 46 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 47 sys.exit(1) 48 49 # MayaChemTools imports... 50 try: 51 from docopt import docopt 52 import MiscUtil 53 import RDKitUtil 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 57 sys.exit(1) 58 59 ScriptName = os.path.basename(sys.argv[0]) 60 Options = {} 61 OptionsInfo = {} 62 63 def main(): 64 """Start execution of the script.""" 65 66 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 67 68 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 69 70 # Retrieve command line arguments and options... 71 RetrieveOptions() 72 73 # Process and validate command line arguments and options... 74 ProcessOptions() 75 76 # Perform actions required by the script... 77 PerformConstrainedMinimization() 78 79 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 80 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 81 82 def PerformConstrainedMinimization(): 83 """Perform constrained minimization.""" 84 85 # Read and validate reference molecule... 86 RefMol = RetrieveReferenceMolecule() 87 88 # Setup a molecule reader for input file... 89 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 90 OptionsInfo["InfileParams"]["AllowEmptyMols"] = True 91 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 92 93 # Set up a molecule writer... 94 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 95 if Writer is None: 96 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 97 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 98 99 MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer) 100 101 if Writer is not None: 102 Writer.close() 103 104 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 105 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 106 MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount) 107 MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount) 108 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount)) 109 110 def ProcessMolecules(RefMol, Mols, Writer): 111 """Process and minimize molecules.""" 112 113 if OptionsInfo["MPMode"]: 114 return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer) 115 else: 116 return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer) 117 118 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer): 119 """Process and minimize molecules using a single process.""" 120 121 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4 122 123 for Mol in Mols: 124 MolCount += 1 125 126 if Mol is None: 127 continue 128 129 if RDKitUtil.IsMolEmpty(Mol): 130 if not OptionsInfo["QuietMode"]: 131 MolName = RDKitUtil.GetMolName(Mol, MolCount) 132 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 133 continue 134 ValidMolCount += 1 135 136 # Setup a reference molecule core containing common scaffold atoms... 137 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount) 138 if RefMolCore is None: 139 CoreScaffoldMissingCount += 1 140 continue 141 142 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolCount) 143 144 if not CalcStatus: 145 MinimizationFailedCount += 1 146 continue 147 148 WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD) 149 150 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) 151 152 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer): 153 """Process and minimize molecules using multiprocessing.""" 154 155 MPParams = OptionsInfo["MPParams"] 156 157 # Setup data for initializing a worker process... 158 MiscUtil.PrintInfo("Encoding options info and reference molecule...") 159 160 OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol) 161 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 162 163 # Setup a encoded mols data iterable for a worker process... 164 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 165 166 # Setup process pool along with data initialization for each process... 167 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 168 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 169 170 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 171 172 # Start processing... 173 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 174 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 175 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 176 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 177 else: 178 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 179 180 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4 181 for Result in Results: 182 MolCount += 1 183 MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD = Result 184 185 if EncodedMol is None: 186 continue 187 ValidMolCount += 1 188 189 if CoreScaffoldMissingStatus: 190 CoreScaffoldMissingStatus += 1 191 continue 192 193 if not CalcStatus: 194 MinimizationFailedCount += 1 195 continue 196 197 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 198 WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD) 199 200 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) 201 202 def InitializeWorkerProcess(*EncodedArgs): 203 """Initialize data for a worker process.""" 204 205 global Options, OptionsInfo 206 207 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 208 209 # Decode Options and OptionInfo... 210 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 211 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 212 213 # Decode RefMol... 214 OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"]) 215 216 def WorkerProcess(EncodedMolInfo): 217 """Process data for a worker process.""" 218 219 MolIndex, EncodedMol = EncodedMolInfo 220 221 CoreScaffoldMissingStatus = False 222 CalcStatus = False 223 Energy = None 224 ScaffoldEmbedRMSD = None 225 226 if EncodedMol is None: 227 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] 228 229 RefMol = OptionsInfo["RefMol"] 230 231 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 232 if RDKitUtil.IsMolEmpty(Mol): 233 if not OptionsInfo["QuietMode"]: 234 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 235 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 236 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] 237 238 # Setup a reference molecule core containing common scaffold atoms... 239 RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1)) 240 if RefMolCore is None: 241 CoreScaffoldMissingStatus = True 242 return [MolIndex, None, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD] 243 244 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, (MolIndex + 1)) 245 246 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] 247 248 def RetrieveReferenceMolecule(): 249 """Retrieve and validate reference molecule.""" 250 251 RefFile = OptionsInfo["RefFile"] 252 253 MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) 254 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False 255 ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"]) 256 257 if ValidRefMolCount == 0: 258 MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile) 259 elif ValidRefMolCount > 1: 260 MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount)) 261 262 RefMol = ValidRefMols[0] 263 264 if OptionsInfo["UseScaffoldSMARTS"]: 265 ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 266 if ScaffoldPatternMol is None: 267 MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"])) 268 269 if not RefMol.HasSubstructMatch(ScaffoldPatternMol): 270 MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"])) 271 272 return RefMol 273 274 def SetupCoreScaffold(RefMol, Mol, MolCount): 275 """Setup a reference molecule core containing common scaffold atoms between 276 a pair of molecules.""" 277 278 if OptionsInfo["UseScaffoldMCS"]: 279 return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount) 280 elif OptionsInfo["UseScaffoldSMARTS"]: 281 return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount) 282 else: 283 MiscUtil.PrintError("The value, %s, specified for \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"])) 284 285 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount): 286 """Setup a reference molecule core containing common scaffold atoms between 287 a pair of molecules using MCS.""" 288 289 MCSParams = OptionsInfo["MCSParams"] 290 Mols = [RefMol, Mol] 291 292 MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 293 294 if MCSResultObject.canceled: 295 if not OptionsInfo["QuietMode"]: 296 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 297 return None 298 299 CoreNumAtoms = MCSResultObject.numAtoms 300 CoreNumBonds = MCSResultObject.numBonds 301 302 SMARTSCore = MCSResultObject.smartsString 303 304 if not len(SMARTSCore): 305 if not OptionsInfo["QuietMode"]: 306 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 307 return None 308 309 if CoreNumAtoms < MCSParams["MinNumAtoms"]: 310 if not OptionsInfo["QuietMode"]: 311 MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"])) 312 return None 313 314 if CoreNumBonds < MCSParams["MinNumBonds"]: 315 if not OptionsInfo["QuietMode"]: 316 MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"])) 317 return None 318 319 return GenerateCoreMol(RefMol, SMARTSCore) 320 321 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount): 322 """Setup a reference molecule core containing common scaffold atoms between 323 a pair of molecules using specified SMARTS.""" 324 325 if OptionsInfo["ScaffoldPatternMol"] is None: 326 OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 327 328 if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]): 329 if not OptionsInfo["QuietMode"]: 330 MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule, %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))) 331 return None 332 333 return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"]) 334 335 def GenerateCoreMol(RefMol, SMARTSCore): 336 """Generate core molecule for embedding.""" 337 338 # Create a molecule corresponding to core atoms... 339 SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore) 340 341 # Setup a ref molecule containing core atoms with dummy atoms as 342 # attachment points for atoms around the core atoms... 343 Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol) 344 345 # Delete any substructures containing dummy atoms.. 346 RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*')) 347 RefMolCore.UpdatePropertyCache() 348 349 return RefMolCore 350 351 def ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolNum = None): 352 "Constrain and minimize molecule." 353 354 if OptionsInfo["AddHydrogens"]: 355 Mol = Chem.AddHs(Mol, addCoords = True) 356 357 # Setup forcefield function to use for constrained minimization... 358 ForceFieldFunction = None 359 ForceFieldName = None 360 if OptionsInfo["UseUFF"]: 361 ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId) 362 ForeceFieldName = "UFF" 363 else: 364 ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["MMFFVariant"]) , confId = confId) 365 ForeceFieldName = "MMFF" 366 367 if ForceFieldFunction is None: 368 if not OptionsInfo["QuietMode"]: 369 MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))) 370 return (None, False, None, None) 371 372 MaxConfs = OptionsInfo["MaxConfs"] 373 EnforceChirality = OptionsInfo["EnforceChirality"] 374 UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"] 375 ETVersion = OptionsInfo["ETVersion"] 376 UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"] 377 UseTethers = OptionsInfo["UseTethers"] 378 379 CalcEnergyMap = {} 380 MolConfsMap = {} 381 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)] 382 383 for ConfID in ConfIDs: 384 try: 385 MolConf = Chem.Mol(Mol) 386 AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 387 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 388 if not OptionsInfo["QuietMode"]: 389 MolName = RDKitUtil.GetMolName(Mol, MolNum) 390 MiscUtil.PrintWarning("Constrained embedding couldn't be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 391 return (None, False, None, None) 392 393 EnergyStatus, Energy = GetEnergy(MolConf) 394 395 if not EnergyStatus: 396 if not OptionsInfo["QuietMode"]: 397 MolName = RDKitUtil.GetMolName(Mol, MolNum) 398 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) 399 return (None, False, None, None) 400 401 CalcEnergyMap[ConfID] = Energy 402 MolConfsMap[ConfID] = MolConf 403 404 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 405 MinEnergyConfID = SortedConfIDs[0] 406 407 MinEnergy = "%.2f" % CalcEnergyMap[MinEnergyConfID] if OptionsInfo["EnergyOut"] else None 408 MinEnergyMolConf = MolConfsMap[MinEnergyConfID] 409 410 ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None 411 MinEnergyMolConf.ClearProp('EmbedRMS') 412 413 if OptionsInfo["RemoveHydrogens"]: 414 MinEnergyMolConf = Chem.RemoveHs(MinEnergyMolConf) 415 416 return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD) 417 418 def GetEnergy(Mol, ConfID = None): 419 """Calculate energy.""" 420 421 Status = True 422 Energy = None 423 424 if ConfID is None: 425 ConfID = -1 426 427 if OptionsInfo["UseUFF"]: 428 UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID) 429 if UFFMoleculeForcefield is None: 430 Status = False 431 else: 432 Energy = UFFMoleculeForcefield.CalcEnergy() 433 elif OptionsInfo["UseMMFF"]: 434 MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"]) 435 MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID) 436 if MMFFMoleculeForcefield is None: 437 Status = False 438 else: 439 Energy = MMFFMoleculeForcefield.CalcEnergy() 440 else: 441 MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) 442 443 return (Status, Energy) 444 445 def WriteMolecule(Writer, Mol, Energy = None, ScaffoldEmbedRMSD = None, ConfID = None,): 446 """Write molecule.""" 447 448 if ScaffoldEmbedRMSD is not None: 449 Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD) 450 451 if Energy is not None: 452 Mol.SetProp(OptionsInfo["EnergyLabel"], Energy) 453 454 if ConfID is None: 455 Writer.write(Mol) 456 else: 457 Writer.write(Mol, confId = ConfID) 458 459 def ProcessMCSParameters(): 460 """Set up and process MCS parameters.""" 461 462 SetupMCSParameters() 463 ProcessSpecifiedMCSParameters() 464 465 def SetupMCSParameters(): 466 """Set up default MCS parameters.""" 467 468 OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0} 469 470 def ProcessSpecifiedMCSParameters(): 471 """Process specified MCS parameters.""" 472 473 if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I): 474 # Nothing to process... 475 return 476 477 # Parse specified parameters... 478 MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"]) 479 if not MCSParams: 480 MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.") 481 482 MCSParamsWords = MCSParams.split(",") 483 if len(MCSParamsWords) % 2: 484 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords))) 485 486 # Setup canonical parameter names... 487 ValidParamNames = [] 488 CanonicalParamNamesMap = {} 489 for ParamName in sorted(OptionsInfo["MCSParams"]): 490 ValidParamNames.append(ParamName) 491 CanonicalParamNamesMap[ParamName.lower()] = ParamName 492 493 # Validate and set paramater names and value... 494 for Index in range(0, len(MCSParamsWords), 2): 495 Name = MCSParamsWords[Index] 496 Value = MCSParamsWords[Index + 1] 497 498 CanonicalName = Name.lower() 499 if not CanonicalName in CanonicalParamNamesMap: 500 MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name, " ".join(ValidParamNames))) 501 502 ParamName = CanonicalParamNamesMap[CanonicalName] 503 if re.match("^Threshold$", ParamName, re.I): 504 Value = float(Value) 505 if Value <= 0.0 or Value > 1.0 : 506 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name)) 507 ParamValue = Value 508 elif re.match("^Timeout$", ParamName, re.I): 509 Value = int(Value) 510 if Value <= 0: 511 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name)) 512 ParamValue = Value 513 elif re.match("^MinNumAtoms$", ParamName, re.I): 514 Value = int(Value) 515 if Value < 1: 516 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name)) 517 ParamValue = Value 518 elif re.match("^MinNumBonds$", ParamName, re.I): 519 Value = int(Value) 520 if Value < 0: 521 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name)) 522 ParamValue = Value 523 elif re.match("^AtomCompare$", ParamName, re.I): 524 if re.match("^CompareAny$", Value, re.I): 525 ParamValue = rdFMCS.AtomCompare.CompareAny 526 elif re.match("^CompareElements$", Value, re.I): 527 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements 528 elif re.match("^CompareIsotopes$", Value, re.I): 529 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes 530 else: 531 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name)) 532 elif re.match("^BondCompare$", ParamName, re.I): 533 if re.match("^CompareAny$", Value, re.I): 534 ParamValue = Chem.rdFMCS.BondCompare.CompareAny 535 elif re.match("^CompareOrder$", Value, re.I): 536 ParamValue = rdFMCS.BondCompare.CompareOrder 537 elif re.match("^CompareOrderExact$", Value, re.I): 538 ParamValue = rdFMCS.BondCompare.CompareOrderExact 539 else: 540 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name)) 541 elif re.match("^SeedSMARTS$", ParamName, re.I): 542 if not len(Value): 543 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option for parameter, %s, is empty. " % (Name)) 544 ParamValue = Value 545 else: 546 if not re.match("^(Yes|No|True|False)$", Value, re.I): 547 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name)) 548 ParamValue = False 549 if re.match("^(Yes|True)$", Value, re.I): 550 ParamValue = True 551 552 # Set value... 553 OptionsInfo["MCSParams"][ParamName] = ParamValue 554 555 def ProcesssConformerGeneratorOption(): 556 """Process comformer generator option. """ 557 558 ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"]) 559 560 OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"] 561 OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"] 562 OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"] 563 OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"] 564 565 def ProcessOptions(): 566 """Process and validate command line arguments and options.""" 567 568 MiscUtil.PrintInfo("Processing options...") 569 570 # Validate options... 571 ValidateOptions() 572 573 OptionsInfo["Infile"] = Options["--infile"] 574 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) 575 576 OptionsInfo["RefFile"] = Options["--reffile"] 577 578 OptionsInfo["Scaffold"] = Options["--scaffold"] 579 if re.match("^auto$", Options["--scaffold"], re.I): 580 UseScaffoldMCS = True 581 UseScaffoldSMARTS = False 582 ScaffoldSMARTS = None 583 else: 584 UseScaffoldMCS = False 585 UseScaffoldSMARTS = True 586 ScaffoldSMARTS = OptionsInfo["Scaffold"] 587 588 OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS 589 OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS 590 OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS 591 OptionsInfo["ScaffoldPatternMol"] = None 592 593 OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"] 594 ProcessMCSParameters() 595 596 OptionsInfo["Outfile"] = Options["--outfile"] 597 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 598 599 OptionsInfo["Overwrite"] = Options["--overwrite"] 600 601 OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False 602 603 ProcesssConformerGeneratorOption() 604 605 if re.match("^UFF$", Options["--forceField"], re.I): 606 ForceField = "UFF" 607 UseUFF = True 608 UseMMFF = False 609 elif re.match("^MMFF$", Options["--forceField"], re.I): 610 ForceField = "MMFF" 611 UseUFF = False 612 UseMMFF = True 613 else: 614 MiscUtil.PrintError("The value, %s, specified for \"--forceField\" is not supported." % (Options["--forceField"],)) 615 616 MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s" 617 618 OptionsInfo["ForceField"] = ForceField 619 OptionsInfo["MMFFVariant"] = MMFFVariant 620 OptionsInfo["UseMMFF"] = UseMMFF 621 OptionsInfo["UseUFF"] = UseUFF 622 623 OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False 624 625 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False 626 if UseMMFF: 627 OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant 628 else: 629 OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField 630 631 OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False 632 633 OptionsInfo["MaxConfs"] = int(Options["--maxConfs"]) 634 635 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 636 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 637 638 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 639 640 OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False 641 OptionsInfo["UseTethers"] = True if re.match("^yes$", Options["--useTethers"], re.I) else False 642 643 def RetrieveOptions(): 644 """Retrieve command line arguments and options.""" 645 646 # Get options... 647 global Options 648 Options = docopt(_docoptUsage_) 649 650 # Set current working directory to the specified directory... 651 WorkingDir = Options["--workingdir"] 652 if WorkingDir: 653 os.chdir(WorkingDir) 654 655 # Handle examples option... 656 if "--examples" in Options and Options["--examples"]: 657 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 658 sys.exit(0) 659 660 def ValidateOptions(): 661 """Validate option values.""" 662 663 MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no") 664 MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG KDG ETDG ETKDG ETKDGv2") 665 666 MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF") 667 MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s") 668 669 MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no") 670 671 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no") 672 MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no") 673 674 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 675 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 676 677 MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"]) 678 MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol") 679 680 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 681 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 682 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 683 684 MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0}) 685 686 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 687 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 688 689 MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no") 690 691 MiscUtil.ValidateOptionTextValue("-u, --useTethers", Options["--useTethers"], "yes no") 692 693 # Setup a usage string for docopt... 694 _docoptUsage_ = """ 695 RDKitPerformConstrainedMinimization.py - Perform constrained minimization 696 697 Usage: 698 RDKitPerformConstrainedMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG, ETKDG, ETKDGv2>] 699 [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>] 700 [--energyOut <yes or no>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>] 701 [--maxConfs <number>] [--mcsParams <Name,Value,...>] [--mp <yes or no>] [--mpParams <Name,Value,...>] 702 [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>] 703 [--scaffold <auto or SMARTS>] [--scaffoldRMSDOut <yes or no>] [--useTethers <yes or no>] 704 [-w <dir>] -i <infile> -r <reffile> -o <outfile> 705 RDKitPerformConstrainedMinimization.py -h | --help | -e | --examples 706 707 Description: 708 Generate 3D structures for molecules by performing a constrained energy minimization 709 against a reference molecule. An initial set of 3D conformers are generated for the 710 input molecules using distance geometry. A common core scaffold, corresponding to 711 a Maximum Common Substructure (MCS) or an explicit SMARTS pattern, is identified 712 between a pair of input and reference molecules. The core scaffold atoms in input 713 molecules are aligned against the same atoms in the reference molecule. The energy 714 of aligned structures are minimized using the forcefield to generate the final 3D structures. 715 716 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi 717 .csv, .tsv, .txt) 718 719 The supported output file formats are: SD (.sdf, .sd) 720 721 Options: 722 -a, --addHydrogens <yes or no> [default: yes] 723 Add hydrogens before minimization. 724 -c, --conformerGenerator <text> [default: ETKDGv2] 725 Conformation generation methodology for generating initial 3D coordinates 726 for molecules in input file. A common core scaffold is identified between a 727 pair of input and reference molecules. The atoms in common core scaffold 728 of input molecules are aligned against the reference molecule followed by 729 energy minimization to generate final 3D structure. 730 731 The possible values along with a brief description are shown below: 732 733 SDG: Standard Distance Geometry 734 KDG: basic Knowledge-terms with Distance Geometry 735 ETDG: Experimental Torsion-angle preference with Distance Geometry 736 ETKDG: Experimental Torsion-angle preference along with basic 737 Knowledge-terms and Distance Geometry [Ref 129] 738 ETKDGv2: Experimental Torsion-angle preference along with basic 739 Knowledge-terms and Distance Geometry [Ref 167] 740 -f, --forceField <UFF, MMFF> [default: MMFF] 741 Forcefield method to use for constrained energy minimization. Possible values: 742 Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force 743 Field [ Ref 83-87 ] . 744 --forceFieldMMFFVariant <MMFF94 or MMFF94s> [default: MMFF94] 745 Variant of MMFF forcefield to use for energy minimization. 746 --energyOut <yes or no> [default: No] 747 Write out energy values. 748 --enforceChirality <yes or no> [default: Yes] 749 Enforce chirality for defined chiral centers. 750 -e, --examples 751 Print examples. 752 -h, --help 753 Print this help message. 754 -i, --infile <infile> 755 Input file name. 756 --infileParams <Name,Value,...> [default: auto] 757 A comma delimited list of parameter name and value pairs for reading 758 molecules from files. The supported parameter names for different file 759 formats, along with their default values, are shown below: 760 761 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 762 763 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 764 smilesTitleLine,auto,sanitize,yes 765 766 Possible values for smilesDelimiter: space, comma or tab. 767 --maxConfs <number> [default: 250] 768 Maximum number of conformations to generate for each molecule by conformation 769 generation methodology for initial 3D coordinates. A constrained minimization is 770 performed using the specified forcefield and the lowest energy conformation is written 771 to the output file. 772 --mcsParams <Name,Value,...> [default: auto] 773 Parameter values to use for identifying a maximum common substructure 774 (MCS) in between a pair of reference and input molecules.In general, it is a 775 comma delimited list of parameter name and value pairs. The supported 776 parameter names along with their default values are shown below: 777 778 atomCompare,CompareElements,bondCompare,CompareOrder, 779 maximizeBonds,yes,matchValences,yes,matchChiralTag,no, 780 minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes, 781 completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none 782 783 Possible values for atomCompare: CompareAny, CompareElements, 784 CompareIsotopes. Possible values for bondCompare: CompareAny, 785 CompareOrder, CompareOrderExact. 786 787 A brief description of MCS parameters taken from RDKit documentation is 788 as follows: 789 790 atomCompare - Controls match between two atoms 791 bondCompare - Controls match between two bonds 792 maximizeBonds - Maximize number of bonds instead of atoms 793 matchValences - Include atom valences in the MCS match 794 matchChiralTag - Include atom chirality in the MCS match 795 minNumAtoms - Minimum number of atoms in the MCS match 796 minNumBonds - Minimum number of bonds in the MCS match 797 ringMatchesRingOnly - Ring bonds only match other ring bonds 798 completeRingsOnly - Partial rings not allowed during the match 799 threshold - Fraction of the dataset that must contain the MCS 800 seedSMARTS - SMARTS string as the seed of the MCS 801 timeout - Timeout for the MCS calculation in seconds 802 803 --mp <yes or no> [default: no] 804 Use multiprocessing. 805 806 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 807 function employing lazy RDKit data iterable. This allows processing of 808 arbitrary large data sets without any additional requirements memory. 809 810 All input data may be optionally loaded into memory by mp.Pool.map() 811 before starting worker processes in a process pool by setting the value 812 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 813 814 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 815 data mode may adversely impact the performance. The '--mpParams' section 816 provides additional information to tune the value of 'chunkSize'. 817 --mpParams <Name,Value,...> [default: auto] 818 A comma delimited list of parameter name and value pairs to configure 819 multiprocessing. 820 821 The supported parameter names along with their default and possible 822 values are shown below: 823 824 chunkSize, auto 825 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 826 numProcesses, auto [ Default: mp.cpu_count() ] 827 828 These parameters are used by the following functions to configure and 829 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 830 mp.Pool.imap(). 831 832 The chunkSize determines chunks of input data passed to each worker 833 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 834 The default value of chunkSize is dependent on the value of 'inputDataMode'. 835 836 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 837 automatically converts RDKit data iterable into a list, loads all data into 838 memory, and calculates the default chunkSize using the following method 839 as shown in its code: 840 841 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 842 if extra: chunkSize += 1 843 844 For example, the default chunkSize will be 7 for a pool of 4 worker processes 845 and 100 data items. 846 847 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 848 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 849 data into memory. Consequently, the size of input data is not known a priori. 850 It's not possible to estimate an optimal value for the chunkSize. The default 851 chunkSize is set to 1. 852 853 The default value for the chunkSize during 'Lazy' data mode may adversely 854 impact the performance due to the overhead associated with exchanging 855 small chunks of data. It is generally a good idea to explicitly set chunkSize to 856 a larger value during 'Lazy' input data mode, based on the size of your input 857 data and number of processes in the process pool. 858 859 The mp.Pool.map() function waits for all worker processes to process all 860 the data and return the results. The mp.Pool.imap() function, however, 861 returns the the results obtained from worker processes as soon as the 862 results become available for specified chunks of data. 863 864 The order of data in the results returned by both mp.Pool.map() and 865 mp.Pool.imap() functions always corresponds to the input data. 866 -o, --outfile <outfile> 867 Output file name. 868 --outfileParams <Name,Value,...> [default: auto] 869 A comma delimited list of parameter name and value pairs for writing 870 molecules to files. The supported parameter names for different file 871 formats, along with their default values, are shown below: 872 873 SD: kekulize,yes,forceV3000,no 874 875 --overwrite 876 Overwrite existing files. 877 -q, --quiet <yes or no> [default: no] 878 Use quiet mode. The warning and information messages will not be printed. 879 -r, --reffile <reffile> 880 Reference input file name containing a 3D reference molecule. A common 881 core scaffold must be present in a pair of an input and reference molecules. 882 Otherwise, no constrained minimization is performed on the input molecule. 883 --removeHydrogens <yes or no> [default: Yes] 884 Remove hydrogens after minimization. 885 -s, --scaffold <auto or SMARTS> [default: auto] 886 Common core scaffold between a pair of input and reference molecules used for 887 constrained minimization of molecules in input file. Possible values: Auto or a 888 valid SMARTS pattern. The common core scaffold is automatically detected 889 corresponding to the Maximum Common Substructure (MCS) between a pair of 890 reference and input molecules. A valid SMARTS pattern may be optionally specified 891 for the common core scaffold. 892 --scaffoldRMSDOut <yes or no> [default: No] 893 Write out RMSD value for common core alignment between a pair of input and 894 reference molecules. 895 -u, --useTethers <yes or no> [default: yes] 896 Use tethers to optimize the final conformation by applying a series of extra forces 897 to align matching atoms to the positions of the core atoms. Otherwise, use simple 898 distance constraints during the optimization. 899 -w, --workingdir <dir> 900 Location of working directory which defaults to the current directory. 901 902 Examples: 903 To perform constrained energy minimization for molecules in a SMILES file against 904 a reference 3D molecule in a SD file using a common core scaffold between pairs of 905 input and reference molecules identified using MCS, generating up to 250 conformations 906 using ETKDG methodology followed by MMFF forcefield minimization, and write out 907 a SD file containing minimum energy structure corresponding to each constrained 908 molecule, type: 909 910 % RDKitPerformConstrainedMinimization.py -i SampleSeriesD3R.smi 911 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 912 913 To rerun the first example in a quiet mode and write out a SD file, type: 914 915 % RDKitPerformConstrainedMinimization.py -q yes -i SampleSeriesD3R.smi 916 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 917 918 To run the first example in multiprocessing mode on all available CPUs 919 without loading all data into memory and write out a SD file, type: 920 921 % RDKitPerformConstrainedMinimization.py --mp yes 922 -i SampleSeriesD3R.smi -r SampleSeriesRef3D.sdf -o SampleOut.sdf 923 924 To rerun the first example in multiprocessing mode on all available CPUs 925 by loading all data into memory and write out a SD file, type: 926 927 % RDKitPerformConstrainedMinimization.py --mp yes --mpParams 928 "inputDataMode,InMemory" -i SampleSeriesD3R.smi 929 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 930 931 To rerun the first example using an explicit SMARTS string for a common core 932 scaffold and write out a SD file, type: 933 934 % RDKitPerformConstrainedMinimization.py --scaffold 935 "c1c(C(N(C(c2cc(-c3nc(N)ncc3)cn2))))cccc1" -i SampleSeriesD3R.smi -r 936 SampleSeriesRef3D.sdf -o SampleOut.sdf 937 938 To rerun the first example using molecules in a CSV SMILES file, SMILES 939 strings in column 1, name in column2, and write out a SD file, type: 940 941 % RDKitPerformConstrainedMinimization.py --infileParams "smilesDelimiter, 942 comma,smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2" 943 -i SampleSeriesD3R.csv -r SampleSeriesRef3D.sdf -o SampleOut.sdf 944 945 To perform constrained energy minimization for molecules in a SD file against 946 a reference 3D molecule in a SD file using a common core scaffold between pairs of 947 input and reference molecules identified using MCS, generating up to 50 conformations 948 using SDG methodology followed by UFF forcefield minimization, and write out 949 a SD file containing minimum energy structure along with energy and embed RMS values 950 corresponding to each constrained molecule, type: 951 952 % RDKitPerformConstrainedMinimization.py --maxConfs 50 -c SDG -f UFF 953 --scaffoldRMSDOut yes --energyOut yes -i SampleSeriesD3R.sdf 954 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 955 956 Author: 957 Manish Sud(msud@san.rr.com) 958 959 See also: 960 RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py, 961 RDKitConvertFileFormat.py, RDKitGenerateConstrainedConformers.py, RDKitPerformMinimization.py 962 963 Copyright: 964 Copyright (C) 2024 Manish Sud. All rights reserved. 965 966 The functionality available in this script is implemented using RDKit, an 967 open source toolkit for cheminformatics developed by Greg Landrum. 968 969 This file is part of MayaChemTools. 970 971 MayaChemTools is free software; you can redistribute it and/or modify it under 972 the terms of the GNU Lesser General Public License as published by the Free 973 Software Foundation; either version 3 of the License, or (at your option) any 974 later version. 975 976 """ 977 978 if __name__ == "__main__": 979 main()