1 #!/bin/env python 2 # 3 # File: RDKitGenerateConstrainedConformers.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 import multiprocessing as mp 37 38 # RDKit imports... 39 try: 40 from rdkit import rdBase 41 from rdkit import Chem 42 from rdkit.Chem import AllChem 43 from rdkit.Chem import rdFMCS 44 from rdkit.Chem import rdMolAlign 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 try: 52 from docopt import docopt 53 import MiscUtil 54 import RDKitUtil 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 58 sys.exit(1) 59 60 ScriptName = os.path.basename(sys.argv[0]) 61 Options = {} 62 OptionsInfo = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 GenerateConstrainedConformers() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def GenerateConstrainedConformers(): 84 """Generate constrained conformers.""" 85 86 # Read and validate reference molecule... 87 RefMol = RetrieveReferenceMolecule() 88 89 # Setup a molecule reader for input file... 90 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 91 OptionsInfo["InfileParams"]["AllowEmptyMols"] = True 92 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 93 94 # Set up a molecule writer... 95 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 96 if Writer is None: 97 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 98 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 99 100 MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount = ProcessMolecules(RefMol, Mols, Writer) 101 102 if Writer is not None: 103 Writer.close() 104 105 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 106 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 107 MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount) 108 MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % ConfGenFailedCount) 109 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + ConfGenFailedCount)) 110 111 def ProcessMolecules(RefMol, Mols, Writer): 112 """Process molecules to generate constrained conformers.""" 113 114 if OptionsInfo["MPMode"]: 115 return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer) 116 else: 117 return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer) 118 119 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer): 120 """Process molecules to generate constrained conformers using a single process.""" 121 122 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4 123 124 for Mol in Mols: 125 MolCount += 1 126 127 if Mol is None: 128 continue 129 130 if RDKitUtil.IsMolEmpty(Mol): 131 if not OptionsInfo["QuietMode"]: 132 MolName = RDKitUtil.GetMolName(Mol, MolCount) 133 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 134 continue 135 ValidMolCount += 1 136 137 # Setup a reference molecule core containing common scaffold atoms... 138 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount) 139 if RefMolCore is None: 140 CoreScaffoldMissingCount += 1 141 continue 142 143 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(Mol, RefMolCore, MolCount) 144 145 if not CalcStatus: 146 ConfGenFailedCount += 1 147 continue 148 149 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues) 150 151 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) 152 153 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer): 154 """Process molecules to generate constrained conformers using multiprocessing.""" 155 156 MPParams = OptionsInfo["MPParams"] 157 158 # Setup data for initializing a worker process... 159 MiscUtil.PrintInfo("Encoding options info and reference molecule...") 160 161 OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol) 162 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 163 164 # Setup a encoded mols data iterable for a worker process... 165 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 166 167 # Setup process pool along with data initialization for each process... 168 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 169 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 170 171 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 172 173 # Start processing... 174 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 175 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 176 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 177 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 178 else: 179 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 180 181 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4 182 for Result in Results: 183 MolCount += 1 184 MolIndex, EncodedMol, EncodedConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = Result 185 186 if EncodedMol is None: 187 continue 188 ValidMolCount += 1 189 190 if CoreScaffoldMissingStatus: 191 CoreScaffoldMissingCount += 1 192 continue 193 194 if not CalcStatus: 195 ConfGenFailedCount += 1 196 continue 197 198 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 199 ConfMols = [RDKitUtil.MolFromBase64EncodedMolString(EncodedConfMol) for EncodedConfMol in EncodedConfMols] 200 201 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues) 202 203 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) 204 205 def InitializeWorkerProcess(*EncodedArgs): 206 """Initialize data for a worker process.""" 207 208 global Options, OptionsInfo 209 210 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 211 212 # Decode Options and OptionInfo... 213 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 214 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 215 216 # Decode RefMol... 217 OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"]) 218 219 def WorkerProcess(EncodedMolInfo): 220 """Process data for a worker process.""" 221 222 MolIndex, EncodedMol = EncodedMolInfo 223 224 ConfMols = None 225 CoreScaffoldMissingStatus = False 226 CalcStatus = False 227 ConfIDs = None 228 ConfEnergyValues = None 229 ConfScaffoldEmbedRMSDValues = None 230 231 if EncodedMol is None: 232 return [MolIndex, None, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 233 234 RefMol = OptionsInfo["RefMol"] 235 236 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 237 if RDKitUtil.IsMolEmpty(Mol): 238 if not OptionsInfo["QuietMode"]: 239 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 240 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 241 return [MolIndex, None, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 242 243 # Setup a reference molecule core containing common scaffold atoms... 244 RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1)) 245 if RefMolCore is None: 246 CoreScaffoldMissingStatus = True 247 return [MolIndex, EncodedMol, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 248 249 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(Mol, RefMolCore, (MolIndex + 1)) 250 251 EncodedConfMols = None 252 if ConfMols is not None: 253 EncodedConfMols = [RDKitUtil.MolToBase64EncodedMolString(ConfMol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps) for ConfMol in ConfMols] 254 255 return [MolIndex, EncodedMol, EncodedConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 256 257 def RetrieveReferenceMolecule(): 258 """Retrieve and validate reference molecule.""" 259 260 RefFile = OptionsInfo["RefFile"] 261 262 MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) 263 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False 264 ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"]) 265 266 if ValidRefMolCount == 0: 267 MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile) 268 elif ValidRefMolCount > 1: 269 MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount)) 270 271 RefMol = ValidRefMols[0] 272 273 if OptionsInfo["UseScaffoldSMARTS"]: 274 ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 275 if ScaffoldPatternMol is None: 276 MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"])) 277 278 if not RefMol.HasSubstructMatch(ScaffoldPatternMol): 279 MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"])) 280 281 return RefMol 282 283 def SetupCoreScaffold(RefMol, Mol, MolCount): 284 """Setup a reference molecule core containing common scaffold atoms between 285 a pair of molecules.""" 286 287 if OptionsInfo["UseScaffoldMCS"]: 288 return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount) 289 elif OptionsInfo["UseScaffoldSMARTS"]: 290 return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount) 291 else: 292 MiscUtil.PrintError("The value, %s, specified for \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"])) 293 294 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount): 295 """Setup a reference molecule core containing common scaffold atoms between 296 a pair of molecules using MCS.""" 297 298 MCSParams = OptionsInfo["MCSParams"] 299 Mols = [RefMol, Mol] 300 301 MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 302 303 if MCSResultObject.canceled: 304 if not OptionsInfo["QuietMode"]: 305 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 306 return None 307 308 CoreNumAtoms = MCSResultObject.numAtoms 309 CoreNumBonds = MCSResultObject.numBonds 310 311 SMARTSCore = MCSResultObject.smartsString 312 313 if not len(SMARTSCore): 314 if not OptionsInfo["QuietMode"]: 315 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 316 return None 317 318 if CoreNumAtoms < MCSParams["MinNumAtoms"]: 319 if not OptionsInfo["QuietMode"]: 320 MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"])) 321 return None 322 323 if CoreNumBonds < MCSParams["MinNumBonds"]: 324 if not OptionsInfo["QuietMode"]: 325 MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"])) 326 return None 327 328 return GenerateCoreMol(RefMol, SMARTSCore) 329 330 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount): 331 """Setup a reference molecule core containing common scaffold atoms between 332 a pair of molecules using specified SMARTS.""" 333 334 if OptionsInfo["ScaffoldPatternMol"] is None: 335 OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 336 337 if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]): 338 if not OptionsInfo["QuietMode"]: 339 MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule, %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))) 340 return None 341 342 return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"]) 343 344 def GenerateCoreMol(RefMol, SMARTSCore): 345 """Generate core molecule for embedding.""" 346 347 # Create a molecule corresponding to core atoms... 348 SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore) 349 350 # Setup a ref molecule containing core atoms with dummy atoms as 351 # attachment points for atoms around the core atoms... 352 Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol) 353 354 # Delete any substructures containing dummy atoms.. 355 RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*')) 356 RefMolCore.UpdatePropertyCache() 357 358 return RefMolCore 359 360 def GenerateMolConformers(Mol, RefMolCore, MolNum = None): 361 """Generate constrained conformers.""" 362 363 if OptionsInfo["AddHydrogens"]: 364 Mol = Chem.AddHs(Mol, addCoords = True) 365 366 # Setup forcefield function to use for constrained minimization... 367 ForceFieldFunction = None 368 ForceFieldName = None 369 if OptionsInfo["UseUFF"]: 370 ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId) 371 ForeceFieldName = "UFF" 372 else: 373 ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["MMFFVariant"]) , confId = confId) 374 ForeceFieldName = "MMFF" 375 376 if ForceFieldFunction is None: 377 if not OptionsInfo["QuietMode"]: 378 MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))) 379 return (None, False, None, None, None) 380 381 MaxConfs = OptionsInfo["MaxConfs"] 382 EnforceChirality = OptionsInfo["EnforceChirality"] 383 UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"] 384 ETVersion = OptionsInfo["ETVersion"] 385 UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"] 386 UseTethers = OptionsInfo["UseTethers"] 387 388 CalcEnergyMap = {} 389 MolConfsMap = {} 390 CalcRMSDMap = {} 391 392 ScaffoldEmbedRMSDMap = {} 393 394 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)] 395 for ConfID in ConfIDs: 396 try: 397 MolConf = Chem.Mol(Mol) 398 AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 399 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 400 if not OptionsInfo["QuietMode"]: 401 MolName = RDKitUtil.GetMolName(Mol, MolNum) 402 MiscUtil.PrintWarning("Constrained embedding coupldn't be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 403 return (None, False, None, None, None) 404 405 EnergyStatus, Energy = GetEnergy(MolConf) 406 407 if not EnergyStatus: 408 if not OptionsInfo["QuietMode"]: 409 MolName = RDKitUtil.GetMolName(Mol, MolNum) 410 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) 411 return (None, False, None, None, None) 412 413 CalcEnergyMap[ConfID] = Energy 414 415 if OptionsInfo["ScaffoldRMSDOut"]: 416 ScaffoldEmbedRMSDMap[ConfID] = "%.4f" % float(MolConf.GetProp('EmbedRMS')) 417 MolConf.ClearProp('EmbedRMS') 418 419 if OptionsInfo["RemoveHydrogens"]: 420 MolConf = Chem.RemoveHs(MolConf) 421 MolConfsMap[ConfID] = MolConf 422 423 # Sort conformers by energy... 424 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 425 426 MinEnergyConfID = SortedConfIDs[0] 427 EnergyWindow = OptionsInfo["EnergyWindow"] 428 429 MinConfEnergy = CalcEnergyMap[MinEnergyConfID] 430 MinEnergyMolConf = MolConfsMap[MinEnergyConfID] 431 432 # Calculate RMSD values for conformers... 433 CalcRMSDMap = {} 434 435 EnergyRMSDCutoff = OptionsInfo["EnergyRMSDCutoff"] 436 ApplyEnergyRMSDCutoff = True if EnergyRMSDCutoff > 0 else False 437 438 if ApplyEnergyRMSDCutoff: 439 FirstConf = True 440 RefMolConf = None 441 for ConfID in SortedConfIDs: 442 if FirstConf: 443 FirstConf = False 444 RefMolConf = MolConfsMap[ConfID] 445 CalcRMSDMap[ConfID] = 0.0 446 continue 447 448 # Make a copy for probe molecule. It gets updated during the calculation... 449 ProbeMolConf = Chem.Mol(MolConfsMap[ConfID]) 450 RMSD = rdMolAlign.AlignMol(ProbeMolConf, MinEnergyMolConf) 451 CalcRMSDMap[ConfID] = RMSD 452 453 # Track conformers with in the specified energy window from the lowest 454 # energy conformation along with applying RMSD cutoff as needed... 455 # 456 SelectedConfIDs = [] 457 458 ConfCount = 0 459 IgnoredByEnergyConfCount = 0 460 IgnoredByRMSDConfCount = 0 461 462 FirstConf = True 463 for ConfID in SortedConfIDs: 464 if FirstConf: 465 ConfCount += 1 466 FirstConf = False 467 SelectedConfIDs.append(ConfID) 468 continue 469 470 ConfEnergyDiff = abs(CalcEnergyMap[ConfID] - MinConfEnergy ) 471 if ConfEnergyDiff > EnergyWindow: 472 IgnoredByEnergyConfCount += 1 473 continue 474 475 if ApplyEnergyRMSDCutoff: 476 if CalcRMSDMap[ConfID] < EnergyRMSDCutoff: 477 IgnoredByRMSDConfCount += 1 478 continue 479 480 ConfCount += 1 481 SelectedConfIDs.append(ConfID) 482 483 if not OptionsInfo["QuietMode"]: 484 MolName = RDKitUtil.GetMolName(Mol, MolNum) 485 MiscUtil.PrintInfo("\nTotal Number of conformations generated for %s: %d" % (MolName, ConfCount)) 486 MiscUtil.PrintInfo("Number of conformations ignored due to energy window cutoff: %d" % (IgnoredByEnergyConfCount)) 487 if ApplyEnergyRMSDCutoff: 488 MiscUtil.PrintInfo("Number of conformations ignored due to energy RMSD cutoff: %d" % (IgnoredByRMSDConfCount)) 489 490 # Setup selected conformer molecules... 491 SelectedConfMols = [MolConfsMap[ConfID] for ConfID in SelectedConfIDs] 492 493 # Setup selected conformer energy values... 494 SelectedConfEnergyValues = None 495 if OptionsInfo["EnergyOut"]: 496 SelectedConfEnergyValues = ["%.2f" % CalcEnergyMap[ConfID] for ConfID in SelectedConfIDs] 497 498 # Setup selected conformer scaffold RMSD values... 499 SelectedConfScaffoldEmbedRMSDValues = None 500 if OptionsInfo["ScaffoldRMSDOut"]: 501 SelectedConfScaffoldEmbedRMSDValues = [] 502 for ConfID in SelectedConfIDs: 503 SelectedConfScaffoldEmbedRMSDValues.append(ScaffoldEmbedRMSDMap[ConfID]) 504 505 return (SelectedConfMols, True, SelectedConfIDs, SelectedConfEnergyValues, SelectedConfScaffoldEmbedRMSDValues) 506 507 def GetEnergy(Mol, ConfID = None): 508 """Calculate energy.""" 509 510 Status = True 511 Energy = None 512 513 if ConfID is None: 514 ConfID = -1 515 516 if OptionsInfo["UseUFF"]: 517 UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID) 518 if UFFMoleculeForcefield is None: 519 Status = False 520 else: 521 Energy = UFFMoleculeForcefield.CalcEnergy() 522 elif OptionsInfo["UseMMFF"]: 523 MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"]) 524 MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID) 525 if MMFFMoleculeForcefield is None: 526 Status = False 527 else: 528 Energy = MMFFMoleculeForcefield.CalcEnergy() 529 else: 530 MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]) 531 532 return (Status, Energy) 533 534 def WriteMolConformers(Writer, Mol, MolNum, ConfMols, ConfIDs, ConfEnergyValues = None, ConfScaffoldEmbedRMSDValues = None): 535 """Write molecule coformers.""" 536 537 if ConfMols is None: 538 return 539 540 for Index, ConfMol in enumerate(ConfMols): 541 ConfMolName = RDKitUtil.GetMolName(Mol, MolNum) 542 SetConfMolName(ConfMol, ConfMolName, ConfIDs[Index]) 543 544 if ConfScaffoldEmbedRMSDValues is not None: 545 ConfMol.SetProp("CoreScaffoldEmbedRMSD", ConfScaffoldEmbedRMSDValues[Index]) 546 547 if ConfEnergyValues is not None: 548 ConfMol.SetProp(OptionsInfo["EnergyLabel"], ConfEnergyValues[Index]) 549 550 Writer.write(ConfMol) 551 552 def SetConfMolName(Mol, MolName, ConfCount): 553 """Set conf mol name.""" 554 555 ConfName = "%s_Conf%d" % (MolName, ConfCount) 556 Mol.SetProp("_Name", ConfName) 557 558 def ProcessMCSParameters(): 559 """Set up and process MCS parameters.""" 560 561 SetupMCSParameters() 562 ProcessSpecifiedMCSParameters() 563 564 def SetupMCSParameters(): 565 """Set up default MCS parameters.""" 566 567 OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0} 568 569 def ProcessSpecifiedMCSParameters(): 570 """Process specified MCS parameters.""" 571 572 if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I): 573 # Nothing to process... 574 return 575 576 # Parse specified parameters... 577 MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"]) 578 if not MCSParams: 579 MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.") 580 581 MCSParamsWords = MCSParams.split(",") 582 if len(MCSParamsWords) % 2: 583 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords))) 584 585 # Setup canonical parameter names... 586 ValidParamNames = [] 587 CanonicalParamNamesMap = {} 588 for ParamName in sorted(OptionsInfo["MCSParams"]): 589 ValidParamNames.append(ParamName) 590 CanonicalParamNamesMap[ParamName.lower()] = ParamName 591 592 # Validate and set paramater names and value... 593 for Index in range(0, len(MCSParamsWords), 2): 594 Name = MCSParamsWords[Index] 595 Value = MCSParamsWords[Index + 1] 596 597 CanonicalName = Name.lower() 598 if not CanonicalName in CanonicalParamNamesMap: 599 MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name, " ".join(ValidParamNames))) 600 601 ParamName = CanonicalParamNamesMap[CanonicalName] 602 if re.match("^Threshold$", ParamName, re.I): 603 Value = float(Value) 604 if Value <= 0.0 or Value > 1.0 : 605 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name)) 606 ParamValue = Value 607 elif re.match("^Timeout$", ParamName, re.I): 608 Value = int(Value) 609 if Value <= 0: 610 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name)) 611 ParamValue = Value 612 elif re.match("^MinNumAtoms$", ParamName, re.I): 613 Value = int(Value) 614 if Value < 1: 615 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name)) 616 ParamValue = Value 617 elif re.match("^MinNumBonds$", ParamName, re.I): 618 Value = int(Value) 619 if Value < 0: 620 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name)) 621 ParamValue = Value 622 elif re.match("^AtomCompare$", ParamName, re.I): 623 if re.match("^CompareAny$", Value, re.I): 624 ParamValue = rdFMCS.AtomCompare.CompareAny 625 elif re.match("^CompareElements$", Value, re.I): 626 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements 627 elif re.match("^CompareIsotopes$", Value, re.I): 628 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes 629 else: 630 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name)) 631 elif re.match("^BondCompare$", ParamName, re.I): 632 if re.match("^CompareAny$", Value, re.I): 633 ParamValue = Chem.rdFMCS.BondCompare.CompareAny 634 elif re.match("^CompareOrder$", Value, re.I): 635 ParamValue = rdFMCS.BondCompare.CompareOrder 636 elif re.match("^CompareOrderExact$", Value, re.I): 637 ParamValue = rdFMCS.BondCompare.CompareOrderExact 638 else: 639 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name)) 640 elif re.match("^SeedSMARTS$", ParamName, re.I): 641 if not len(Value): 642 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option for parameter, %s, is empty. " % (Name)) 643 ParamValue = Value 644 else: 645 if not re.match("^(Yes|No|True|False)$", Value, re.I): 646 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name)) 647 ParamValue = False 648 if re.match("^(Yes|True)$", Value, re.I): 649 ParamValue = True 650 651 # Set value... 652 OptionsInfo["MCSParams"][ParamName] = ParamValue 653 654 def ProcesssConformerGeneratorOption(): 655 """Process comformer generator option. """ 656 657 ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"]) 658 659 OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"] 660 OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"] 661 OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"] 662 OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"] 663 664 def ProcessOptions(): 665 """Process and validate command line arguments and options.""" 666 667 MiscUtil.PrintInfo("Processing options...") 668 669 # Validate options... 670 ValidateOptions() 671 672 OptionsInfo["Infile"] = Options["--infile"] 673 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) 674 675 OptionsInfo["RefFile"] = Options["--reffile"] 676 677 OptionsInfo["Scaffold"] = Options["--scaffold"] 678 if re.match("^auto$", Options["--scaffold"], re.I): 679 UseScaffoldMCS = True 680 UseScaffoldSMARTS = False 681 ScaffoldSMARTS = None 682 else: 683 UseScaffoldMCS = False 684 UseScaffoldSMARTS = True 685 ScaffoldSMARTS = OptionsInfo["Scaffold"] 686 687 OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS 688 OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS 689 OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS 690 OptionsInfo["ScaffoldPatternMol"] = None 691 692 OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"] 693 ProcessMCSParameters() 694 695 OptionsInfo["Outfile"] = Options["--outfile"] 696 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 697 698 OptionsInfo["Overwrite"] = Options["--overwrite"] 699 700 OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False 701 702 ProcesssConformerGeneratorOption() 703 704 if re.match("^UFF$", Options["--forceField"], re.I): 705 ForceField = "UFF" 706 UseUFF = True 707 UseMMFF = False 708 elif re.match("^MMFF$", Options["--forceField"], re.I): 709 ForceField = "MMFF" 710 UseUFF = False 711 UseMMFF = True 712 else: 713 MiscUtil.PrintError("The value, %s, specified for \"--forceField\" is not supported." % (Options["--forceField"],)) 714 715 MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s" 716 717 OptionsInfo["ForceField"] = ForceField 718 OptionsInfo["MMFFVariant"] = MMFFVariant 719 OptionsInfo["UseMMFF"] = UseMMFF 720 OptionsInfo["UseUFF"] = UseUFF 721 722 OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False 723 724 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False 725 if UseMMFF: 726 OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant 727 else: 728 OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField 729 730 OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False 731 EnergyRMSDCutoff = -1.0 732 if not re.match("^none$", Options["--energyRMSDCutoff"], re.I): 733 EnergyRMSDCutoff = float(Options["--energyRMSDCutoff"]) 734 OptionsInfo["EnergyRMSDCutoff"] = EnergyRMSDCutoff 735 736 OptionsInfo["EnergyWindow"] = float(Options["--energyWindow"]) 737 738 OptionsInfo["MaxConfs"] = int(Options["--maxConfs"]) 739 740 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 741 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 742 743 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 744 745 OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False 746 OptionsInfo["UseTethers"] = True if re.match("^yes$", Options["--useTethers"], re.I) else False 747 748 def RetrieveOptions(): 749 """Retrieve command line arguments and options.""" 750 751 # Get options... 752 global Options 753 Options = docopt(_docoptUsage_) 754 755 # Set current working directory to the specified directory... 756 WorkingDir = Options["--workingdir"] 757 if WorkingDir: 758 os.chdir(WorkingDir) 759 760 # Handle examples option... 761 if "--examples" in Options and Options["--examples"]: 762 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 763 sys.exit(0) 764 765 def ValidateOptions(): 766 """Validate option values.""" 767 768 MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no") 769 MiscUtil.ValidateOptionTextValue("-c, --conformerGenerator", Options["--conformerGenerator"], "SDG KDG ETDG ETKDG ETKDGv2") 770 771 MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF") 772 MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s") 773 774 MiscUtil.ValidateOptionFloatValue("--energyWindow", Options["--energyWindow"], {">": 0.0}) 775 if not re.match("^none$", Options["--energyRMSDCutoff"], re.I): 776 MiscUtil.ValidateOptionFloatValue("--energyRMSDCutoff", Options["--energyRMSDCutoff"], {">": 0.0}) 777 778 MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no") 779 780 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no") 781 MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no") 782 783 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 784 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 785 786 MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"]) 787 MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol") 788 789 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 790 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 791 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 792 793 MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0}) 794 795 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 796 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 797 798 MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no") 799 800 MiscUtil.ValidateOptionTextValue("-u, --useTethers", Options["--useTethers"], "yes no") 801 802 # Setup a usage string for docopt... 803 _docoptUsage_ = """ 804 RDKitGenerateConstrainedConformers.py - Generate constrained molecular conformations 805 806 Usage: 807 RDKitGenerateConstrainedConformers.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG, ETKDG, ETKDGv2>] 808 [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>] 809 [--energyOut <yes or no>] [--enforceChirality <yes or no>] [--energyRMSDCutoff <number>] 810 [--energyWindow <number> ] [--infileParams <Name,Value,...>] [--maxConfs <number>] 811 [--mcsParams <Name,Value,...>] [--mp <yes or no>] [--mpParams <Name,Value,...>] 812 [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>] 813 [--scaffold <auto or SMARTS>] [--scaffoldRMSDOut <yes or no>] [--useTethers <yes or no>] 814 [-w <dir>] -i <infile> -r <reffile> -o <outfile> 815 RDKitGenerateConstrainedConformers.py -h | --help | -e | --examples 816 817 Description: 818 Generate molecular conformations by performing a constrained energy minimization 819 against a reference molecule. An initial set of 3D conformers are generated for the input 820 molecules using distance geometry. A common core scaffold, corresponding to 821 a Maximum Common Substructure (MCS) or an explicit SMARTS pattern, is identified 822 between a pair of input and reference molecules. The core scaffold atoms in input 823 molecules are aligned against the same atoms in the reference molecule. The energy 824 of aligned structures are minimized using the forcefield to generate the final 3D structures. 825 826 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 827 .csv, .tsv, .txt) 828 829 The supported output file formats are: SD (.sdf, .sd) 830 831 Options: 832 -a, --addHydrogens <yes or no> [default: yes] 833 Add hydrogens before minimization. 834 -c, --conformerGenerator <text> [default: ETKDGv2] 835 Conformation generation methodology for generating initial 3D coordinates 836 for molecules in input file. A common core scaffold is identified between a 837 a pair of input and reference molecules. The atoms in common core scaffold 838 of input molecules are aligned against the reference molecule followed by 839 energy minimization to generate final 3D structure. 840 841 The possible values along with a brief description are shown below: 842 843 SDG: Standard Distance Geometry 844 KDG: basic Knowledge-terms with Distance Geometry 845 ETDG: Experimental Torsion-angle preference with Distance Geometry 846 ETKDG: Experimental Torsion-angle preference along with basic 847 Knowledge-terms and Distance Geometry [Ref 129] 848 ETKDGv2: Experimental Torsion-angle preference along with basic 849 Knowledge-terms and Distance Geometry [Ref 167] 850 -f, --forceField <UFF, MMFF> [default: MMFF] 851 Forcefield method to use for constrained energy minimization. Possible values: 852 Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force 853 Field [ Ref 83-87 ] . 854 --forceFieldMMFFVariant <MMFF94 or MMFF94s> [default: MMFF94] 855 Variant of MMFF forcefield to use for energy minimization. 856 --energyOut <yes or no> [default: No] 857 Write out energy values. 858 --enforceChirality <yes or no> [default: Yes] 859 Enforce chirality for defined chiral centers. 860 --energyRMSDCutoff <number> [default: 0.5] 861 RMSD cutoff for retaining conformations after embedding and energy minimization. 862 Possible values: A number or None 863 864 The default is to keep only those conformations which are different from the 865 lowest energy conformation by the specified RMSD cutoff. The None value may 866 be used to keep all minimized conformations with in the specified energy window 867 from the lowest energy conformation. The lowest energy conformation is always 868 retained. 869 --energyWindow <number> [default: 20] 870 Energy window in kcal/mol for selecting conformers. 871 -e, --examples 872 Print examples. 873 -h, --help 874 Print this help message. 875 -i, --infile <infile> 876 Input file name. 877 --infileParams <Name,Value,...> [default: auto] 878 A comma delimited list of parameter name and value pairs for reading 879 molecules from files. The supported parameter names for different file 880 formats, along with their default values, are shown below: 881 882 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 883 884 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 885 smilesTitleLine,auto,sanitize,yes 886 887 Possible values for smilesDelimiter: space, comma or tab. 888 --maxConfs <number> [default: 50] 889 Maximum number of conformations to generate for each molecule by conformation 890 generation methodology for initial 3D coordinates. A constrained minimization is 891 performed using the specified forcefield and the lowest energy conformation is written 892 to the output file. 893 --mcsParams <Name,Value,...> [default: auto] 894 Parameter values to use for identifying a maximum common substructure 895 (MCS) in between a pair of reference and input molecules.In general, it is a 896 comma delimited list of parameter name and value pairs. The supported 897 parameter names along with their default values are shown below: 898 899 atomCompare,CompareElements,bondCompare,CompareOrder, 900 maximizeBonds,yes,matchValences,yes,matchChiralTag,no, 901 minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes, 902 completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none 903 904 Possible values for atomCompare: CompareAny, CompareElements, 905 CompareIsotopes. Possible values for bondCompare: CompareAny, 906 CompareOrder, CompareOrderExact. 907 908 A brief description of MCS parameters taken from RDKit documentation is 909 as follows: 910 911 atomCompare - Controls match between two atoms 912 bondCompare - Controls match between two bonds 913 maximizeBonds - Maximize number of bonds instead of atoms 914 matchValences - Include atom valences in the MCS match 915 matchChiralTag - Include atom chirality in the MCS match 916 minNumAtoms - Minimum number of atoms in the MCS match 917 minNumBonds - Minimum number of bonds in the MCS match 918 ringMatchesRingOnly - Ring bonds only match other ring bonds 919 completeRingsOnly - Partial rings not allowed during the match 920 threshold - Fraction of the dataset that must contain the MCS 921 seedSMARTS - SMARTS string as the seed of the MCS 922 timeout - Timeout for the MCS calculation in seconds 923 924 --mp <yes or no> [default: no] 925 Use multiprocessing. 926 927 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 928 function employing lazy RDKit data iterable. This allows processing of 929 arbitrary large data sets without any additional requirements memory. 930 931 All input data may be optionally loaded into memory by mp.Pool.map() 932 before starting worker processes in a process pool by setting the value 933 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 934 935 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 936 data mode may adversely impact the performance. The '--mpParams' section 937 provides additional information to tune the value of 'chunkSize'. 938 --mpParams <Name,Value,...> [default: auto] 939 A comma delimited list of parameter name and value pairs to configure 940 multiprocessing. 941 942 The supported parameter names along with their default and possible 943 values are shown below: 944 945 chunkSize, auto 946 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 947 numProcesses, auto [ Default: mp.cpu_count() ] 948 949 These parameters are used by the following functions to configure and 950 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 951 mp.Pool.imap(). 952 953 The chunkSize determines chunks of input data passed to each worker 954 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 955 The default value of chunkSize is dependent on the value of 'inputDataMode'. 956 957 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 958 automatically converts RDKit data iterable into a list, loads all data into 959 memory, and calculates the default chunkSize using the following method 960 as shown in its code: 961 962 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 963 if extra: chunkSize += 1 964 965 For example, the default chunkSize will be 7 for a pool of 4 worker processes 966 and 100 data items. 967 968 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 969 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 970 data into memory. Consequently, the size of input data is not known a priori. 971 It's not possible to estimate an optimal value for the chunkSize. The default 972 chunkSize is set to 1. 973 974 The default value for the chunkSize during 'Lazy' data mode may adversely 975 impact the performance due to the overhead associated with exchanging 976 small chunks of data. It is generally a good idea to explicitly set chunkSize to 977 a larger value during 'Lazy' input data mode, based on the size of your input 978 data and number of processes in the process pool. 979 980 The mp.Pool.map() function waits for all worker processes to process all 981 the data and return the results. The mp.Pool.imap() function, however, 982 returns the the results obtained from worker processes as soon as the 983 results become available for specified chunks of data. 984 985 The order of data in the results returned by both mp.Pool.map() and 986 mp.Pool.imap() functions always corresponds to the input data. 987 -o, --outfile <outfile> 988 Output file name. 989 --outfileParams <Name,Value,...> [default: auto] 990 A comma delimited list of parameter name and value pairs for writing 991 molecules to files. The supported parameter names for different file 992 formats, along with their default values, are shown below: 993 994 SD: kekulize,yes,forceV3000,no 995 996 --overwrite 997 Overwrite existing files. 998 -q, --quiet <yes or no> [default: no] 999 Use quiet mode. The warning and information messages will not be printed. 1000 -r, --reffile <reffile> 1001 Reference input file name containing a 3D reference molecule. A common 1002 core scaffold must be present in a pair of an input and reference molecules. 1003 Otherwise, no constrained minimization is performed on the input molecule. 1004 --removeHydrogens <yes or no> [default: Yes] 1005 Remove hydrogens after minimization. 1006 -s, --scaffold <auto or SMARTS> [default: auto] 1007 Common core scaffold between a pair of input and reference molecules used for 1008 constrained minimization of molecules in input file. Possible values: Auto or a 1009 valid SMARTS pattern. The common core scaffold is automatically detected 1010 corresponding to the Maximum Common Substructure (MCS) between a pair of 1011 reference and input molecules. A valid SMARTS pattern may be optionally specified 1012 for the common core scaffold. 1013 --scaffoldRMSDOut <yes or no> [default: No] 1014 Write out RMSD value for common core alignment between a pair of input and 1015 reference molecules. 1016 -u, --useTethers <yes or no> [default: yes] 1017 Use tethers to optimize the final conformation by applying a series of extra forces 1018 to align matching atoms to the positions of the core atoms. Otherwise, use simple 1019 distance constraints during the optimization. 1020 -w, --workingdir <dir> 1021 Location of working directory which defaults to the current directory. 1022 1023 Examples: 1024 To generate conformers by performing constrained energy minimization for molecules 1025 in a SMILES file against a reference 3D molecule in a SD file using a common core 1026 scaffold between pairs of input and reference molecules identified using MCS, 1027 generating up to 50 conformations using ETKDG methodology followed by MMFF 1028 forcefield minimization within energy window of 20 kcal/mol and RMSD of greater 1029 than 0.5 from the lowest energy conformation, and write out a SD file: 1030 1031 % RDKitGenerateConstrainedConformers.py -i SampleSeriesD3R.smi 1032 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 1033 1034 To rerun the first example in a quiet mode and write out a SD file, type: 1035 1036 % RDKitGenerateConstrainedConformers.py -q yes -i SampleSeriesD3R.smi 1037 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 1038 1039 To rerun the first example in multiprocessing mode on all available CPUs 1040 without loading all data into memory and write out a SD file, type: 1041 1042 % RDKitGenerateConstrainedConformers.py --mp yes -i SampleSeriesD3R.smi 1043 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 1044 1045 To run the first example in multiprocessing mode on all available CPUs 1046 by loading all data into memory and write out a SD file, type: 1047 1048 % RDKitGenerateConstrainedConformers.py --mp yes --mpParams 1049 "inputDataMode,InMemory" -i SampleSeriesD3R.smi 1050 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 1051 1052 To rerun the first example in multiprocessing mode on specific number of 1053 CPUs and chunk size without loading all data into memory and write out a SD file, 1054 type: 1055 1056 % RDKitGenerateConstrainedConformers.py --mp yes --mpParams 1057 "inputDataMode,Lazy,numProcesses,4,chunkSize,8" 1058 -i SampleSeriesD3R.smi -r SampleSeriesRef3D.sdf -o SampleOut.sdf 1059 1060 To rerun the first example using an explicit SMARTS string for a common core 1061 scaffold and write out a SD file, type: 1062 1063 % RDKitGenerateConstrainedConformers.py --scaffold 1064 "c2cc(-c3nc(N)ncc3)cn2" -i SampleSeriesD3R.smi 1065 -r SampleSeriesRef3D.sdf -o SampleOut.sdf 1066 1067 To rerun the first example using molecules in a CSV SMILES file, SMILES 1068 strings in column 1, name in column2, and write out a SD file, type: 1069 1070 % RDKitGenerateConstrainedConformers.py --infileParams 1071 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1, 1072 smilesNameColumn,2" -i SampleSeriesD3R.csv -r SampleSeriesRef3D.sdf 1073 -o SampleOut.sdf 1074 1075 To generate constrained conformers for molecules in a SD file against a reference 1076 3D molecule in a SD file using a common core scaffold between pairs of input and 1077 reference molecules identified using MCS, generating up to 10 conformations 1078 using SDG methodology followed by UFF forcefield minimization, conformations 1079 with in an energy window of 10 kcal/mol and RMSD of greater that 1, and write out 1080 a SD file containing minimum energy structure along with energy and embed RMS 1081 values corresponding to each constrained molecule, type: 1082 1083 % RDKitGenerateConstrainedConformers.py --maxConfs 10 -c SDG -f UFF 1084 --scaffoldRMSDOut yes --energyOut yes --energyRMSDCutoff 1.0 1085 --energyWindow 10 -i SampleSeriesD3R.sdf -r SampleSeriesRef3D.sdf 1086 -o SampleOut.sdf 1087 1088 Author: 1089 Manish Sud(msud@san.rr.com) 1090 1091 See also: 1092 RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py, 1093 RDKitConvertFileFormat.py, RDKitGenerateConformers.py, RDKitPerformConstrainedMinimization.py 1094 1095 Copyright: 1096 Copyright (C) 2024 Manish Sud. All rights reserved. 1097 1098 The functionality available in this script is implemented using RDKit, an 1099 open source toolkit for cheminformatics developed by Greg Landrum. 1100 1101 This file is part of MayaChemTools. 1102 1103 MayaChemTools is free software; you can redistribute it and/or modify it under 1104 the terms of the GNU Lesser General Public License as published by the Free 1105 Software Foundation; either version 3 of the License, or (at your option) any 1106 later version. 1107 1108 """ 1109 1110 if __name__ == "__main__": 1111 main()