1 #!/bin/env python
2 #
3 # File: RDKitPerformConstrainedMinimization.py
4 # Author: Manish Sud <msud@san.rr.com>
5 #
6 # Copyright (C) 2026 Manish Sud. All rights reserved.
7 #
8 # The functionality available in this script is implemented using RDKit, an
9 # open source toolkit for cheminformatics developed by Greg Landrum.
10 #
11 # This file is part of MayaChemTools.
12 #
13 # MayaChemTools is free software; you can redistribute it and/or modify it under
14 # the terms of the GNU Lesser General Public License as published by the Free
15 # Software Foundation; either version 3 of the License, or (at your option) any
16 # later version.
17 #
18 # MayaChemTools is distributed in the hope that it will be useful, but without
19 # any warranty; without even the implied warranty of merchantability of fitness
20 # for a particular purpose. See the GNU Lesser General Public License for more
21 # details.
22 #
23 # You should have received a copy of the GNU Lesser General Public License
24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
26 # Boston, MA, 02111-1307, USA.
27 #
28
29 from __future__ import print_function
30
31 import os
32 import sys
33 import time
34 import re
35 import multiprocessing as mp
36
37 # RDKit imports...
38 try:
39 from rdkit import rdBase
40 from rdkit import Chem
41 from rdkit.Chem import AllChem
42 from rdkit.Chem import rdFMCS
43 except ImportError as ErrMsg:
44 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
45 sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
46 sys.exit(1)
47
48 # MayaChemTools imports...
49 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
50 try:
51 from docopt import docopt
52 import MiscUtil
53 import RDKitUtil
54 except ImportError as ErrMsg:
55 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
56 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
57 sys.exit(1)
58
59 ScriptName = os.path.basename(sys.argv[0])
60 Options = {}
61 OptionsInfo = {}
62
63
64 def main():
65 """Start execution of the script."""
66
67 MiscUtil.PrintInfo(
68 "\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
69 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
70 )
71
72 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
73
74 # Retrieve command line arguments and options...
75 RetrieveOptions()
76
77 # Process and validate command line arguments and options...
78 ProcessOptions()
79
80 # Perform actions required by the script...
81 PerformConstrainedMinimization()
82
83 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
84 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
85
86
87 def PerformConstrainedMinimization():
88 """Perform constrained minimization."""
89
90 # Read and validate reference molecule...
91 RefMol = RetrieveReferenceMolecule()
92
93 # Setup a molecule reader for input file...
94 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
95 OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
96 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
97
98 # Set up a molecule writer...
99 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
100 if Writer is None:
101 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
102 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
103
104 MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount = ProcessMolecules(RefMol, Mols, Writer)
105
106 if Writer is not None:
107 Writer.close()
108
109 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
110 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
111 MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
112 MiscUtil.PrintInfo(
113 "Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount
114 )
115 MiscUtil.PrintInfo(
116 "Number of ignored molecules: %d"
117 % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount)
118 )
119
120
121 def ProcessMolecules(RefMol, Mols, Writer):
122 """Process and minimize molecules."""
123
124 if OptionsInfo["MPMode"]:
125 return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
126 else:
127 return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
128
129
130 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
131 """Process and minimize molecules using a single process."""
132
133 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
134
135 for Mol in Mols:
136 MolCount += 1
137
138 if Mol is None:
139 continue
140
141 if RDKitUtil.IsMolEmpty(Mol):
142 if not OptionsInfo["QuietMode"]:
143 MolName = RDKitUtil.GetMolName(Mol, MolCount)
144 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
145 continue
146 ValidMolCount += 1
147
148 # Setup a reference molecule core containing common scaffold atoms...
149 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
150 if RefMolCore is None:
151 CoreScaffoldMissingCount += 1
152 continue
153
154 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolCount)
155
156 if not CalcStatus:
157 MinimizationFailedCount += 1
158 continue
159
160 WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
161
162 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
163
164
165 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
166 """Process and minimize molecules using multiprocessing."""
167
168 MPParams = OptionsInfo["MPParams"]
169
170 # Setup data for initializing a worker process...
171 MiscUtil.PrintInfo("Encoding options info and reference molecule...")
172
173 OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
174 InitializeWorkerProcessArgs = (
175 MiscUtil.ObjectToBase64EncodedString(Options),
176 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
177 )
178
179 # Setup a encoded mols data iterable for a worker process...
180 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
181
182 # Setup process pool along with data initialization for each process...
183 MiscUtil.PrintInfo(
184 "\nConfiguring multiprocessing using %s method..."
185 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
186 )
187 MiscUtil.PrintInfo(
188 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
189 % (
190 MPParams["NumProcesses"],
191 MPParams["InputDataMode"],
192 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
193 )
194 )
195
196 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
197
198 # Start processing...
199 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
200 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
201 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
202 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
203 else:
204 MiscUtil.PrintError(
205 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
206 )
207
208 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount) = [0] * 4
209 for Result in Results:
210 MolCount += 1
211 MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD = Result
212
213 if EncodedMol is None:
214 continue
215 ValidMolCount += 1
216
217 if CoreScaffoldMissingStatus:
218 CoreScaffoldMissingStatus += 1
219 continue
220
221 if not CalcStatus:
222 MinimizationFailedCount += 1
223 continue
224
225 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
226 WriteMolecule(Writer, Mol, Energy, ScaffoldEmbedRMSD)
227
228 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount)
229
230
231 def InitializeWorkerProcess(*EncodedArgs):
232 """Initialize data for a worker process."""
233
234 global Options, OptionsInfo
235
236 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
237
238 # Decode Options and OptionInfo...
239 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
240 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
241
242 # Decode RefMol...
243 OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
244
245
246 def WorkerProcess(EncodedMolInfo):
247 """Process data for a worker process."""
248
249 MolIndex, EncodedMol = EncodedMolInfo
250
251 CoreScaffoldMissingStatus = False
252 CalcStatus = False
253 Energy = None
254 ScaffoldEmbedRMSD = None
255
256 if EncodedMol is None:
257 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
258
259 RefMol = OptionsInfo["RefMol"]
260
261 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
262 if RDKitUtil.IsMolEmpty(Mol):
263 if not OptionsInfo["QuietMode"]:
264 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
265 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
266 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
267
268 # Setup a reference molecule core containing common scaffold atoms...
269 RefMolCore = SetupCoreScaffold(RefMol, Mol, (MolIndex + 1))
270 if RefMolCore is None:
271 CoreScaffoldMissingStatus = True
272 return [MolIndex, None, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
273
274 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Mol, RefMolCore, (MolIndex + 1))
275
276 return [
277 MolIndex,
278 RDKitUtil.MolToBase64EncodedMolString(
279 Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
280 ),
281 CoreScaffoldMissingStatus,
282 CalcStatus,
283 Energy,
284 ScaffoldEmbedRMSD,
285 ]
286
287
288 def RetrieveReferenceMolecule():
289 """Retrieve and validate reference molecule."""
290
291 RefFile = OptionsInfo["RefFile"]
292
293 MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
294 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
295 ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(
296 RefFile, **OptionsInfo["InfileParams"]
297 )
298
299 if ValidRefMolCount == 0:
300 MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
301 elif ValidRefMolCount > 1:
302 MiscUtil.PrintWarning(
303 "The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..."
304 % (RefFile, ValidRefMolCount)
305 )
306
307 RefMol = ValidRefMols[0]
308
309 if OptionsInfo["UseScaffoldSMARTS"]:
310 ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
311 if ScaffoldPatternMol is None:
312 MiscUtil.PrintError(
313 'Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is not valid.'
314 % (OptionsInfo["ScaffoldSMARTS"])
315 )
316
317 if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
318 MiscUtil.PrintError(
319 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option, is missing in the first valid reference molecule.'
320 % (OptionsInfo["ScaffoldSMARTS"])
321 )
322
323 return RefMol
324
325
326 def SetupCoreScaffold(RefMol, Mol, MolCount):
327 """Setup a reference molecule core containing common scaffold atoms between
328 a pair of molecules."""
329
330 if OptionsInfo["UseScaffoldMCS"]:
331 return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
332 elif OptionsInfo["UseScaffoldSMARTS"]:
333 return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
334 else:
335 MiscUtil.PrintError(
336 'The value, %s, specified for "-s, --scaffold" option is not supported.' % (OptionsInfo["Scaffold"])
337 )
338
339
340 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
341 """Setup a reference molecule core containing common scaffold atoms between
342 a pair of molecules using MCS."""
343
344 MCSParams = OptionsInfo["MCSParams"]
345 Mols = [RefMol, Mol]
346
347 MCSResultObject = rdFMCS.FindMCS(
348 Mols,
349 maximizeBonds=MCSParams["MaximizeBonds"],
350 threshold=MCSParams["Threshold"],
351 timeout=MCSParams["TimeOut"],
352 verbose=MCSParams["Verbose"],
353 matchValences=MCSParams["MatchValences"],
354 ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"],
355 completeRingsOnly=MCSParams["CompleteRingsOnly"],
356 matchChiralTag=MCSParams["MatchChiralTag"],
357 atomCompare=MCSParams["AtomCompare"],
358 bondCompare=MCSParams["BondCompare"],
359 seedSmarts=MCSParams["SeedSMARTS"],
360 )
361
362 if MCSResultObject.canceled:
363 if not OptionsInfo["QuietMode"]:
364 MiscUtil.PrintWarning(
365 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
366 % (RDKitUtil.GetMolName(Mol, MolCount))
367 )
368 return None
369
370 CoreNumAtoms = MCSResultObject.numAtoms
371 CoreNumBonds = MCSResultObject.numBonds
372
373 SMARTSCore = MCSResultObject.smartsString
374
375 if not len(SMARTSCore):
376 if not OptionsInfo["QuietMode"]:
377 MiscUtil.PrintWarning(
378 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
379 % (RDKitUtil.GetMolName(Mol, MolCount))
380 )
381 return None
382
383 if CoreNumAtoms < MCSParams["MinNumAtoms"]:
384 if not OptionsInfo["QuietMode"]:
385 MiscUtil.PrintWarning(
386 'Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumAtoms" parameter in "-m, --mcsParams" option.'
387 % (CoreNumAtoms, MCSParams["MinNumAtoms"])
388 )
389 return None
390
391 if CoreNumBonds < MCSParams["MinNumBonds"]:
392 if not OptionsInfo["QuietMode"]:
393 MiscUtil.PrintWarning(
394 'Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumBonds" parameter in "-m, --mcsParams" option.'
395 % (CoreNumBonds, MCSParams["MinNumBonds"])
396 )
397 return None
398
399 return GenerateCoreMol(RefMol, SMARTSCore)
400
401
402 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
403 """Setup a reference molecule core containing common scaffold atoms between
404 a pair of molecules using specified SMARTS."""
405
406 if OptionsInfo["ScaffoldPatternMol"] is None:
407 OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
408
409 if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
410 if not OptionsInfo["QuietMode"]:
411 MiscUtil.PrintWarning(
412 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is missing in input molecule, %s.'
413 % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))
414 )
415 return None
416
417 return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
418
419
420 def GenerateCoreMol(RefMol, SMARTSCore):
421 """Generate core molecule for embedding."""
422
423 # Create a molecule corresponding to core atoms...
424 SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
425
426 # Setup a ref molecule containing core atoms with dummy atoms as
427 # attachment points for atoms around the core atoms...
428 Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
429
430 # Delete any substructures containing dummy atoms..
431 RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles("*"))
432 RefMolCore.UpdatePropertyCache()
433
434 return RefMolCore
435
436
437 def ConstrainAndMinimizeMolecule(Mol, RefMolCore, MolNum=None):
438 "Constrain and minimize molecule."
439
440 if OptionsInfo["AddHydrogens"]:
441 Mol = Chem.AddHs(Mol, addCoords=True)
442
443 # Setup forcefield function to use for constrained minimization...
444 ForceFieldFunction = None
445 ForceFieldName = None
446 if OptionsInfo["UseUFF"]:
447 ForceFieldFunction = lambda mol, confId=-1: AllChem.UFFGetMoleculeForceField(mol, confId=confId)
448 ForceFieldName = "UFF"
449 else:
450 ForceFieldFunction = lambda mol, confId=-1: AllChem.MMFFGetMoleculeForceField(
451 mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=OptionsInfo["MMFFVariant"]), confId=confId
452 )
453 ForceFieldName = "MMFF"
454
455 if ForceFieldFunction is None:
456 if not OptionsInfo["QuietMode"]:
457 MiscUtil.PrintWarning(
458 "Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))
459 )
460 return (None, False, None, None)
461
462 MaxConfs = OptionsInfo["MaxConfs"]
463 EnforceChirality = OptionsInfo["EnforceChirality"]
464 UseExpTorsionAnglePrefs = OptionsInfo["UseExpTorsionAnglePrefs"]
465 ETVersion = OptionsInfo["ETVersion"]
466 UseBasicKnowledge = OptionsInfo["UseBasicKnowledge"]
467 UseTethers = OptionsInfo["UseTethers"]
468
469 CalcEnergyMap = {}
470 MolConfsMap = {}
471 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
472
473 for ConfID in ConfIDs:
474 try:
475 MolConf = Chem.Mol(Mol)
476 AllChem.ConstrainedEmbed(
477 MolConf,
478 RefMolCore,
479 useTethers=UseTethers,
480 coreConfId=-1,
481 randomseed=ConfID,
482 getForceField=ForceFieldFunction,
483 enforceChirality=EnforceChirality,
484 useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
485 useBasicKnowledge=UseBasicKnowledge,
486 ETversion=ETVersion,
487 )
488 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
489 if not OptionsInfo["QuietMode"]:
490 MolName = RDKitUtil.GetMolName(Mol, MolNum)
491 MiscUtil.PrintWarning(
492 "Constrained embedding couldn't be performed for molecule %s:\n%s\n"
493 % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)
494 )
495 return (None, False, None, None)
496
497 EnergyStatus, Energy = GetEnergy(MolConf)
498
499 if not EnergyStatus:
500 if not OptionsInfo["QuietMode"]:
501 MolName = RDKitUtil.GetMolName(Mol, MolNum)
502 MiscUtil.PrintWarning(
503 "Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n"
504 % (ConfID, MolName)
505 )
506 return (None, False, None, None)
507
508 CalcEnergyMap[ConfID] = Energy
509 MolConfsMap[ConfID] = MolConf
510
511 SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
512 MinEnergyConfID = SortedConfIDs[0]
513
514 MinEnergy = "%.2f" % CalcEnergyMap[MinEnergyConfID] if OptionsInfo["EnergyOut"] else None
515 MinEnergyMolConf = MolConfsMap[MinEnergyConfID]
516
517 ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp("EmbedRMS")) if OptionsInfo["ScaffoldRMSDOut"] else None
518 MinEnergyMolConf.ClearProp("EmbedRMS")
519
520 if OptionsInfo["RemoveHydrogens"]:
521 MinEnergyMolConf = Chem.RemoveHs(MinEnergyMolConf)
522
523 return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
524
525
526 def GetEnergy(Mol, ConfID=None):
527 """Calculate energy."""
528
529 Status = True
530 Energy = None
531
532 if ConfID is None:
533 ConfID = -1
534
535 if OptionsInfo["UseUFF"]:
536 UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId=ConfID)
537 if UFFMoleculeForcefield is None:
538 Status = False
539 else:
540 Energy = UFFMoleculeForcefield.CalcEnergy()
541 elif OptionsInfo["UseMMFF"]:
542 MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant=OptionsInfo["MMFFVariant"])
543 MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId=ConfID)
544 if MMFFMoleculeForcefield is None:
545 Status = False
546 else:
547 Energy = MMFFMoleculeForcefield.CalcEnergy()
548 else:
549 MiscUtil.PrintError(
550 "Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ForceField"]
551 )
552
553 return (Status, Energy)
554
555
556 def WriteMolecule(
557 Writer,
558 Mol,
559 Energy=None,
560 ScaffoldEmbedRMSD=None,
561 ConfID=None,
562 ):
563 """Write molecule."""
564
565 if ScaffoldEmbedRMSD is not None:
566 Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
567
568 if Energy is not None:
569 Mol.SetProp(OptionsInfo["EnergyLabel"], Energy)
570
571 if ConfID is None:
572 Writer.write(Mol)
573 else:
574 Writer.write(Mol, confId=ConfID)
575
576
577 def ProcessMCSParameters():
578 """Set up and process MCS parameters."""
579
580 SetupMCSParameters()
581 ProcessSpecifiedMCSParameters()
582
583
584 def SetupMCSParameters():
585 """Set up default MCS parameters."""
586
587 OptionsInfo["MCSParams"] = {
588 "MaximizeBonds": True,
589 "Threshold": 0.9,
590 "TimeOut": 3600,
591 "Verbose": False,
592 "MatchValences": True,
593 "MatchChiralTag": False,
594 "RingMatchesRingOnly": True,
595 "CompleteRingsOnly": True,
596 "AtomCompare": rdFMCS.AtomCompare.CompareElements,
597 "BondCompare": rdFMCS.BondCompare.CompareOrder,
598 "SeedSMARTS": "",
599 "MinNumAtoms": 1,
600 "MinNumBonds": 0,
601 }
602
603
604 def ProcessSpecifiedMCSParameters():
605 """Process specified MCS parameters."""
606
607 if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
608 # Nothing to process...
609 return
610
611 # Parse specified parameters...
612 MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
613 if not MCSParams:
614 MiscUtil.PrintError('No valid parameter name and value pairs specified using "-m, --mcsParams" option.')
615
616 MCSParamsWords = MCSParams.split(",")
617 if len(MCSParamsWords) % 2:
618 MiscUtil.PrintError(
619 'The number of comma delimited paramater names and values, %d, specified using "-m, --mcsParams" option must be an even number.'
620 % (len(MCSParamsWords))
621 )
622
623 # Setup canonical parameter names...
624 ValidParamNames = []
625 CanonicalParamNamesMap = {}
626 for ParamName in sorted(OptionsInfo["MCSParams"]):
627 ValidParamNames.append(ParamName)
628 CanonicalParamNamesMap[ParamName.lower()] = ParamName
629
630 # Validate and set paramater names and value...
631 for Index in range(0, len(MCSParamsWords), 2):
632 Name = MCSParamsWords[Index]
633 Value = MCSParamsWords[Index + 1]
634
635 CanonicalName = Name.lower()
636 if CanonicalName not in CanonicalParamNamesMap:
637 MiscUtil.PrintError(
638 'The parameter name, %s, specified using "-m, --mcsParams" option is not a valid name. Supported parameter names: %s'
639 % (Name, " ".join(ValidParamNames))
640 )
641
642 ParamName = CanonicalParamNamesMap[CanonicalName]
643 if re.match("^Threshold$", ParamName, re.I):
644 Value = float(Value)
645 if Value <= 0.0 or Value > 1.0:
646 MiscUtil.PrintError(
647 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0'
648 % (Value, Name)
649 )
650 ParamValue = Value
651 elif re.match("^Timeout$", ParamName, re.I):
652 Value = int(Value)
653 if Value <= 0:
654 MiscUtil.PrintError(
655 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: > 0'
656 % (Value, Name)
657 )
658 ParamValue = Value
659 elif re.match("^MinNumAtoms$", ParamName, re.I):
660 Value = int(Value)
661 if Value < 1:
662 MiscUtil.PrintError(
663 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: >= 1'
664 % (Value, Name)
665 )
666 ParamValue = Value
667 elif re.match("^MinNumBonds$", ParamName, re.I):
668 Value = int(Value)
669 if Value < 0:
670 MiscUtil.PrintError(
671 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: >=0 '
672 % (Value, Name)
673 )
674 ParamValue = Value
675 elif re.match("^AtomCompare$", ParamName, re.I):
676 if re.match("^CompareAny$", Value, re.I):
677 ParamValue = rdFMCS.AtomCompare.CompareAny
678 elif re.match("^CompareElements$", Value, re.I):
679 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
680 elif re.match("^CompareIsotopes$", Value, re.I):
681 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
682 else:
683 MiscUtil.PrintError(
684 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes'
685 % (Value, Name)
686 )
687 elif re.match("^BondCompare$", ParamName, re.I):
688 if re.match("^CompareAny$", Value, re.I):
689 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
690 elif re.match("^CompareOrder$", Value, re.I):
691 ParamValue = rdFMCS.BondCompare.CompareOrder
692 elif re.match("^CompareOrderExact$", Value, re.I):
693 ParamValue = rdFMCS.BondCompare.CompareOrderExact
694 else:
695 MiscUtil.PrintError(
696 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact'
697 % (Value, Name)
698 )
699 elif re.match("^SeedSMARTS$", ParamName, re.I):
700 if not len(Value):
701 MiscUtil.PrintError(
702 'The parameter value specified using "-m, --mcsParams" option for parameter, %s, is empty. '
703 % (Name)
704 )
705 ParamValue = Value
706 else:
707 if not re.match("^(Yes|No|True|False)$", Value, re.I):
708 MiscUtil.PrintError(
709 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: Yes No True False'
710 % (Value, Name)
711 )
712 ParamValue = False
713 if re.match("^(Yes|True)$", Value, re.I):
714 ParamValue = True
715
716 # Set value...
717 OptionsInfo["MCSParams"][ParamName] = ParamValue
718
719
720 def ProcesssConformerGeneratorOption():
721 """Process comformer generator option."""
722
723 ConfGenParams = MiscUtil.ProcessOptionConformerGenerator("--conformerGenerator", Options["--conformerGenerator"])
724
725 OptionsInfo["ConformerGenerator"] = ConfGenParams["ConformerGenerator"]
726 OptionsInfo["UseBasicKnowledge"] = ConfGenParams["UseBasicKnowledge"]
727 OptionsInfo["UseExpTorsionAnglePrefs"] = ConfGenParams["UseExpTorsionAnglePrefs"]
728 OptionsInfo["ETVersion"] = ConfGenParams["ETVersion"]
729
730
731 def ProcessOptions():
732 """Process and validate command line arguments and options."""
733
734 MiscUtil.PrintInfo("Processing options...")
735
736 # Validate options...
737 ValidateOptions()
738
739 OptionsInfo["Infile"] = Options["--infile"]
740 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
741 "--infileParams", Options["--infileParams"], Options["--infile"]
742 )
743
744 OptionsInfo["RefFile"] = Options["--reffile"]
745
746 OptionsInfo["Scaffold"] = Options["--scaffold"]
747 if re.match("^auto$", Options["--scaffold"], re.I):
748 UseScaffoldMCS = True
749 UseScaffoldSMARTS = False
750 ScaffoldSMARTS = None
751 else:
752 UseScaffoldMCS = False
753 UseScaffoldSMARTS = True
754 ScaffoldSMARTS = OptionsInfo["Scaffold"]
755
756 OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
757 OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
758 OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
759 OptionsInfo["ScaffoldPatternMol"] = None
760
761 OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
762 ProcessMCSParameters()
763
764 OptionsInfo["Outfile"] = Options["--outfile"]
765 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
766 "--outfileParams", Options["--outfileParams"]
767 )
768
769 OptionsInfo["Overwrite"] = Options["--overwrite"]
770
771 OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
772
773 ProcesssConformerGeneratorOption()
774
775 if re.match("^UFF$", Options["--forceField"], re.I):
776 ForceField = "UFF"
777 UseUFF = True
778 UseMMFF = False
779 elif re.match("^MMFF$", Options["--forceField"], re.I):
780 ForceField = "MMFF"
781 UseUFF = False
782 UseMMFF = True
783 else:
784 MiscUtil.PrintError(
785 'The value, %s, specified for "--forceField" is not supported.' % (Options["--forceField"],)
786 )
787
788 MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--forceFieldMMFFVariant"], re.I) else "MMFF94s"
789
790 OptionsInfo["ForceField"] = ForceField
791 OptionsInfo["MMFFVariant"] = MMFFVariant
792 OptionsInfo["UseMMFF"] = UseMMFF
793 OptionsInfo["UseUFF"] = UseUFF
794
795 OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
796
797 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
798 if UseMMFF:
799 OptionsInfo["EnergyLabel"] = "%s_Energy" % MMFFVariant
800 else:
801 OptionsInfo["EnergyLabel"] = "%s_Energy" % ForceField
802
803 OptionsInfo["EnforceChirality"] = True if re.match("^yes$", Options["--enforceChirality"], re.I) else False
804
805 OptionsInfo["MaxConfs"] = int(Options["--maxConfs"])
806
807 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
808 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
809
810 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
811
812 OptionsInfo["RemoveHydrogens"] = True if re.match("^yes$", Options["--removeHydrogens"], re.I) else False
813 OptionsInfo["UseTethers"] = True if re.match("^yes$", Options["--useTethers"], re.I) else False
814
815
816 def RetrieveOptions():
817 """Retrieve command line arguments and options."""
818
819 # Get options...
820 global Options
821 Options = docopt(_docoptUsage_)
822
823 # Set current working directory to the specified directory...
824 WorkingDir = Options["--workingdir"]
825 if WorkingDir:
826 os.chdir(WorkingDir)
827
828 # Handle examples option...
829 if "--examples" in Options and Options["--examples"]:
830 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
831 sys.exit(0)
832
833
834 def ValidateOptions():
835 """Validate option values."""
836
837 MiscUtil.ValidateOptionTextValue("-a, --addHydrogens", Options["--addHydrogens"], "yes no")
838 MiscUtil.ValidateOptionTextValue(
839 "-c, --conformerGenerator", Options["--conformerGenerator"], "SDG KDG ETDG ETKDG ETKDGv2"
840 )
841
842 MiscUtil.ValidateOptionTextValue("-f, --forceField", Options["--forceField"], "UFF MMFF")
843 MiscUtil.ValidateOptionTextValue(" --forceFieldMMFFVariant", Options["--forceFieldMMFFVariant"], "MMFF94 MMFF94s")
844
845 MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
846
847 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
848 MiscUtil.ValidateOptionTextValue("--enforceChirality ", Options["--enforceChirality"], "yes no")
849
850 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
851 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
852
853 MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
854 MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
855
856 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
857 MiscUtil.ValidateOptionsOutputFileOverwrite(
858 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
859 )
860 MiscUtil.ValidateOptionsDistinctFileNames(
861 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
862 )
863
864 MiscUtil.ValidateOptionIntegerValue("--maxConfs", Options["--maxConfs"], {">": 0})
865
866 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
867 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
868
869 MiscUtil.ValidateOptionTextValue("-r, --removeHydrogens", Options["--removeHydrogens"], "yes no")
870
871 MiscUtil.ValidateOptionTextValue("-u, --useTethers", Options["--useTethers"], "yes no")
872
873
874 # Setup a usage string for docopt...
875 _docoptUsage_ = """
876 RDKitPerformConstrainedMinimization.py - Perform constrained minimization
877
878 Usage:
879 RDKitPerformConstrainedMinimization.py [--addHydrogens <yes or no>] [--conformerGenerator <SDG, KDG, ETDG, ETKDG, ETKDGv2>]
880 [--forceField <UFF, or MMFF>] [--forceFieldMMFFVariant <MMFF94 or MMFF94s>]
881 [--energyOut <yes or no>] [--enforceChirality <yes or no>] [--infileParams <Name,Value,...>]
882 [--maxConfs <number>] [--mcsParams <Name,Value,...>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
883 [ --outfileParams <Name,Value,...> ] [--overwrite] [--quiet <yes or no>] [ --removeHydrogens <yes or no>]
884 [--scaffold <auto or SMARTS>] [--scaffoldRMSDOut <yes or no>] [--useTethers <yes or no>]
885 [-w <dir>] -i <infile> -r <reffile> -o <outfile>
886 RDKitPerformConstrainedMinimization.py -h | --help | -e | --examples
887
888 Description:
889 Generate 3D structures for molecules by performing a constrained energy minimization
890 against a reference molecule. An initial set of 3D conformers are generated for the
891 input molecules using distance geometry. A common core scaffold, corresponding to
892 a Maximum Common Substructure (MCS) or an explicit SMARTS pattern, is identified
893 between a pair of input and reference molecules. The core scaffold atoms in input
894 molecules are aligned against the same atoms in the reference molecule. The energy
895 of aligned structures are minimized using the forcefield to generate the final 3D structures.
896
897 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi
898 .csv, .tsv, .txt)
899
900 The supported output file formats are: SD (.sdf, .sd)
901
902 Options:
903 -a, --addHydrogens <yes or no> [default: yes]
904 Add hydrogens before minimization.
905 -c, --conformerGenerator <text> [default: ETKDGv2]
906 Conformation generation methodology for generating initial 3D coordinates
907 for molecules in input file. A common core scaffold is identified between a
908 pair of input and reference molecules. The atoms in common core scaffold
909 of input molecules are aligned against the reference molecule followed by
910 energy minimization to generate final 3D structure.
911
912 The possible values along with a brief description are shown below:
913
914 SDG: Standard Distance Geometry
915 KDG: basic Knowledge-terms with Distance Geometry
916 ETDG: Experimental Torsion-angle preference with Distance Geometry
917 ETKDG: Experimental Torsion-angle preference along with basic
918 Knowledge-terms and Distance Geometry [Ref 129]
919 ETKDGv2: Experimental Torsion-angle preference along with basic
920 Knowledge-terms and Distance Geometry [Ref 167]
921 -f, --forceField <UFF, MMFF> [default: MMFF]
922 Forcefield method to use for constrained energy minimization. Possible values:
923 Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force
924 Field [ Ref 83-87 ] .
925 --forceFieldMMFFVariant <MMFF94 or MMFF94s> [default: MMFF94]
926 Variant of MMFF forcefield to use for energy minimization.
927 --energyOut <yes or no> [default: No]
928 Write out energy values.
929 --enforceChirality <yes or no> [default: Yes]
930 Enforce chirality for defined chiral centers.
931 -e, --examples
932 Print examples.
933 -h, --help
934 Print this help message.
935 -i, --infile <infile>
936 Input file name.
937 --infileParams <Name,Value,...> [default: auto]
938 A comma delimited list of parameter name and value pairs for reading
939 molecules from files. The supported parameter names for different file
940 formats, along with their default values, are shown below:
941
942 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
943
944 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
945 smilesTitleLine,auto,sanitize,yes
946
947 Possible values for smilesDelimiter: space, comma or tab.
948 --maxConfs <number> [default: 250]
949 Maximum number of conformations to generate for each molecule by conformation
950 generation methodology for initial 3D coordinates. A constrained minimization is
951 performed using the specified forcefield and the lowest energy conformation is written
952 to the output file.
953 --mcsParams <Name,Value,...> [default: auto]
954 Parameter values to use for identifying a maximum common substructure
955 (MCS) in between a pair of reference and input molecules.In general, it is a
956 comma delimited list of parameter name and value pairs. The supported
957 parameter names along with their default values are shown below:
958
959 atomCompare,CompareElements,bondCompare,CompareOrder,
960 maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
961 minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
962 completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
963
964 Possible values for atomCompare: CompareAny, CompareElements,
965 CompareIsotopes. Possible values for bondCompare: CompareAny,
966 CompareOrder, CompareOrderExact.
967
968 A brief description of MCS parameters taken from RDKit documentation is
969 as follows:
970
971 atomCompare - Controls match between two atoms
972 bondCompare - Controls match between two bonds
973 maximizeBonds - Maximize number of bonds instead of atoms
974 matchValences - Include atom valences in the MCS match
975 matchChiralTag - Include atom chirality in the MCS match
976 minNumAtoms - Minimum number of atoms in the MCS match
977 minNumBonds - Minimum number of bonds in the MCS match
978 ringMatchesRingOnly - Ring bonds only match other ring bonds
979 completeRingsOnly - Partial rings not allowed during the match
980 threshold - Fraction of the dataset that must contain the MCS
981 seedSMARTS - SMARTS string as the seed of the MCS
982 timeout - Timeout for the MCS calculation in seconds
983
984 --mp <yes or no> [default: no]
985 Use multiprocessing.
986
987 By default, input data is retrieved in a lazy manner via mp.Pool.imap()
988 function employing lazy RDKit data iterable. This allows processing of
989 arbitrary large data sets without any additional requirements memory.
990
991 All input data may be optionally loaded into memory by mp.Pool.map()
992 before starting worker processes in a process pool by setting the value
993 of 'inputDataMode' to 'InMemory' in '--mpParams' option.
994
995 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
996 data mode may adversely impact the performance. The '--mpParams' section
997 provides additional information to tune the value of 'chunkSize'.
998 --mpParams <Name,Value,...> [default: auto]
999 A comma delimited list of parameter name and value pairs to configure
1000 multiprocessing.
1001
1002 The supported parameter names along with their default and possible
1003 values are shown below:
1004
1005 chunkSize, auto
1006 inputDataMode, Lazy [ Possible values: InMemory or Lazy ]
1007 numProcesses, auto [ Default: mp.cpu_count() ]
1008
1009 These parameters are used by the following functions to configure and
1010 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
1011 mp.Pool.imap().
1012
1013 The chunkSize determines chunks of input data passed to each worker
1014 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
1015 The default value of chunkSize is dependent on the value of 'inputDataMode'.
1016
1017 The mp.Pool.map() function, invoked during 'InMemory' input data mode,
1018 automatically converts RDKit data iterable into a list, loads all data into
1019 memory, and calculates the default chunkSize using the following method
1020 as shown in its code:
1021
1022 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
1023 if extra: chunkSize += 1
1024
1025 For example, the default chunkSize will be 7 for a pool of 4 worker processes
1026 and 100 data items.
1027
1028 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
1029 'lazy' RDKit data iterable to retrieve data as needed, without loading all the
1030 data into memory. Consequently, the size of input data is not known a priori.
1031 It's not possible to estimate an optimal value for the chunkSize. The default
1032 chunkSize is set to 1.
1033
1034 The default value for the chunkSize during 'Lazy' data mode may adversely
1035 impact the performance due to the overhead associated with exchanging
1036 small chunks of data. It is generally a good idea to explicitly set chunkSize to
1037 a larger value during 'Lazy' input data mode, based on the size of your input
1038 data and number of processes in the process pool.
1039
1040 The mp.Pool.map() function waits for all worker processes to process all
1041 the data and return the results. The mp.Pool.imap() function, however,
1042 returns the the results obtained from worker processes as soon as the
1043 results become available for specified chunks of data.
1044
1045 The order of data in the results returned by both mp.Pool.map() and
1046 mp.Pool.imap() functions always corresponds to the input data.
1047 -o, --outfile <outfile>
1048 Output file name.
1049 --outfileParams <Name,Value,...> [default: auto]
1050 A comma delimited list of parameter name and value pairs for writing
1051 molecules to files. The supported parameter names for different file
1052 formats, along with their default values, are shown below:
1053
1054 SD: kekulize,yes,forceV3000,no
1055
1056 --overwrite
1057 Overwrite existing files.
1058 -q, --quiet <yes or no> [default: no]
1059 Use quiet mode. The warning and information messages will not be printed.
1060 -r, --reffile <reffile>
1061 Reference input file name containing a 3D reference molecule. A common
1062 core scaffold must be present in a pair of an input and reference molecules.
1063 Otherwise, no constrained minimization is performed on the input molecule.
1064 --removeHydrogens <yes or no> [default: Yes]
1065 Remove hydrogens after minimization.
1066 -s, --scaffold <auto or SMARTS> [default: auto]
1067 Common core scaffold between a pair of input and reference molecules used for
1068 constrained minimization of molecules in input file. Possible values: Auto or a
1069 valid SMARTS pattern. The common core scaffold is automatically detected
1070 corresponding to the Maximum Common Substructure (MCS) between a pair of
1071 reference and input molecules. A valid SMARTS pattern may be optionally specified
1072 for the common core scaffold.
1073 --scaffoldRMSDOut <yes or no> [default: No]
1074 Write out RMSD value for common core alignment between a pair of input and
1075 reference molecules.
1076 -u, --useTethers <yes or no> [default: yes]
1077 Use tethers to optimize the final conformation by applying a series of extra forces
1078 to align matching atoms to the positions of the core atoms. Otherwise, use simple
1079 distance constraints during the optimization.
1080 -w, --workingdir <dir>
1081 Location of working directory which defaults to the current directory.
1082
1083 Examples:
1084 To perform constrained energy minimization for molecules in a SMILES file against
1085 a reference 3D molecule in a SD file using a common core scaffold between pairs of
1086 input and reference molecules identified using MCS, generating up to 250 conformations
1087 using ETKDG methodology followed by MMFF forcefield minimization, and write out
1088 a SD file containing minimum energy structure corresponding to each constrained
1089 molecule, type:
1090
1091 % RDKitPerformConstrainedMinimization.py -i SampleSeriesD3R.smi
1092 -r SampleSeriesRef3D.sdf -o SampleOut.sdf
1093
1094 To rerun the first example in a quiet mode and write out a SD file, type:
1095
1096 % RDKitPerformConstrainedMinimization.py -q yes -i SampleSeriesD3R.smi
1097 -r SampleSeriesRef3D.sdf -o SampleOut.sdf
1098
1099 To run the first example in multiprocessing mode on all available CPUs
1100 without loading all data into memory and write out a SD file, type:
1101
1102 % RDKitPerformConstrainedMinimization.py --mp yes
1103 -i SampleSeriesD3R.smi -r SampleSeriesRef3D.sdf -o SampleOut.sdf
1104
1105 To rerun the first example in multiprocessing mode on all available CPUs
1106 by loading all data into memory and write out a SD file, type:
1107
1108 % RDKitPerformConstrainedMinimization.py --mp yes --mpParams
1109 "inputDataMode,InMemory" -i SampleSeriesD3R.smi
1110 -r SampleSeriesRef3D.sdf -o SampleOut.sdf
1111
1112 To rerun the first example using an explicit SMARTS string for a common core
1113 scaffold and write out a SD file, type:
1114
1115 % RDKitPerformConstrainedMinimization.py --scaffold
1116 "c1c(C(N(C(c2cc(-c3nc(N)ncc3)cn2))))cccc1" -i SampleSeriesD3R.smi -r
1117 SampleSeriesRef3D.sdf -o SampleOut.sdf
1118
1119 To rerun the first example using molecules in a CSV SMILES file, SMILES
1120 strings in column 1, name in column2, and write out a SD file, type:
1121
1122 % RDKitPerformConstrainedMinimization.py --infileParams "smilesDelimiter,
1123 comma,smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
1124 -i SampleSeriesD3R.csv -r SampleSeriesRef3D.sdf -o SampleOut.sdf
1125
1126 To perform constrained energy minimization for molecules in a SD file against
1127 a reference 3D molecule in a SD file using a common core scaffold between pairs of
1128 input and reference molecules identified using MCS, generating up to 50 conformations
1129 using SDG methodology followed by UFF forcefield minimization, and write out
1130 a SD file containing minimum energy structure along with energy and embed RMS values
1131 corresponding to each constrained molecule, type:
1132
1133 % RDKitPerformConstrainedMinimization.py --maxConfs 50 -c SDG -f UFF
1134 --scaffoldRMSDOut yes --energyOut yes -i SampleSeriesD3R.sdf
1135 -r SampleSeriesRef3D.sdf -o SampleOut.sdf
1136
1137 Author:
1138 Manish Sud(msud@san.rr.com)
1139
1140 See also:
1141 RDKitCalculateRMSD.py, RDKitCalculateMolecularDescriptors.py, RDKitCompareMoleculeShapes.py,
1142 RDKitConvertFileFormat.py, RDKitGenerateConstrainedConformers.py, RDKitPerformMinimization.py
1143
1144 Copyright:
1145 Copyright (C) 2026 Manish Sud. All rights reserved.
1146
1147 The functionality available in this script is implemented using RDKit, an
1148 open source toolkit for cheminformatics developed by Greg Landrum.
1149
1150 This file is part of MayaChemTools.
1151
1152 MayaChemTools is free software; you can redistribute it and/or modify it under
1153 the terms of the GNU Lesser General Public License as published by the Free
1154 Software Foundation; either version 3 of the License, or (at your option) any
1155 later version.
1156
1157 """
1158
1159 if __name__ == "__main__":
1160 main()