1 #!/bin/env python
2 #
3 # File: Psi4GenerateConstrainedConformers.py
4 # Author: Manish Sud <msud@san.rr.com>
5 #
6 # Copyright (C) 2026 Manish Sud. All rights reserved.
7 #
8 # The functionality available in this script is implemented using Psi4, an
9 # open source quantum chemistry software package, and RDKit, an open
10 # source toolkit for cheminformatics developed by Greg Landrum.
11 #
12 # This file is part of MayaChemTools.
13 #
14 # MayaChemTools is free software; you can redistribute it and/or modify it under
15 # the terms of the GNU Lesser General Public License as published by the Free
16 # Software Foundation; either version 3 of the License, or (at your option) any
17 # later version.
18 #
19 # MayaChemTools is distributed in the hope that it will be useful, but without
20 # any warranty; without even the implied warranty of merchantability of fitness
21 # for a particular purpose. See the GNU Lesser General Public License for more
22 # details.
23 #
24 # You should have received a copy of the GNU Lesser General Public License
25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
27 # Boston, MA, 02111-1307, USA.
28 #
29
30 from __future__ import print_function
31
32 import os
33 import sys
34 import time
35 import re
36 import shutil
37 import multiprocessing as mp
38
39 # Psi4 imports...
40 if hasattr(shutil, "which") and shutil.which("psi4") is None:
41 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
42 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
43 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
44 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
45 sys.stderr.write("Psi4 environment and try again.\n\n")
46
47 # RDKit imports...
48 try:
49 from rdkit import rdBase
50 from rdkit import Chem
51 from rdkit.Chem import AllChem, rdMolAlign
52 from rdkit.Chem import rdFMCS
53 except ImportError as ErrMsg:
54 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
55 sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
56 sys.exit(1)
57
58 # MayaChemTools imports...
59 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
60 try:
61 from docopt import docopt
62 import MiscUtil
63 import Psi4Util
64 import RDKitUtil
65 except ImportError as ErrMsg:
66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
68 sys.exit(1)
69
70 ScriptName = os.path.basename(sys.argv[0])
71 Options = {}
72 OptionsInfo = {}
73
74
75 def main():
76 """Start execution of the script."""
77
78 MiscUtil.PrintInfo(
79 "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
80 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
81 )
82
83 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
84
85 # Retrieve command line arguments and options...
86 RetrieveOptions()
87
88 # Process and validate command line arguments and options...
89 ProcessOptions()
90
91 # Perform actions required by the script...
92 GenerateConstrainedConformers()
93
94 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
95 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
96
97
98 def GenerateConstrainedConformers():
99 """Generate constrained conformers."""
100
101 # Read and validate reference molecule...
102 RefMol = RetrieveReferenceMolecule()
103
104 # Setup a molecule reader for input file...
105 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
106 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
107
108 # Set up a molecule writer...
109 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
110 if Writer is None:
111 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
112 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
113
114 MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount = ProcessMolecules(RefMol, Mols, Writer)
115
116 if Writer is not None:
117 Writer.close()
118
119 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
120 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
121 MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
122 MiscUtil.PrintInfo(
123 "Number of molecules failed during conformation generation or minimization: %d" % ConfGenFailedCount
124 )
125 MiscUtil.PrintInfo(
126 "Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + ConfGenFailedCount)
127 )
128
129
130 def ProcessMolecules(RefMol, Mols, Writer):
131 """Process molecules to generate constrained conformers."""
132
133 if OptionsInfo["MPMode"]:
134 return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
135 else:
136 return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
137
138
139 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
140 """Process molecules to generate constrained conformers using a single process."""
141
142 # Intialize Psi4...
143 MiscUtil.PrintInfo("\nInitializing Psi4...")
144 Psi4Handle = Psi4Util.InitializePsi4(
145 Psi4RunParams=OptionsInfo["Psi4RunParams"],
146 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
147 PrintVersion=True,
148 PrintHeader=True,
149 )
150 OptionsInfo["psi4"] = Psi4Handle
151
152 # Setup max iterations global variable...
153 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
154
155 # Setup conversion factor for energy units...
156 SetupEnergyConversionFactor(Psi4Handle)
157
158 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
159
160 MiscUtil.PrintInfo("\nGenerating constrained conformers and performing energy minimization...")
161
162 for Mol in Mols:
163 MolCount += 1
164
165 if not CheckAndValidateMolecule(Mol, MolCount):
166 continue
167
168 # Setup 2D coordinates for SMILES input file...
169 if OptionsInfo["SMILESInfileStatus"]:
170 AllChem.Compute2DCoords(Mol)
171
172 ValidMolCount += 1
173
174 # Setup a reference molecule core containing common scaffold atoms...
175 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
176 if RefMolCore is None:
177 CoreScaffoldMissingCount += 1
178 continue
179
180 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(
181 Psi4Handle, Mol, RefMolCore, MolCount
182 )
183
184 if not CalcStatus:
185 ConfGenFailedCount += 1
186 continue
187
188 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
189
190 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
191
192
193 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
194 """Process molecules to generate constrained conformers using multiprocessing."""
195
196 if OptionsInfo["MPLevelConformersMode"]:
197 return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer)
198 elif OptionsInfo["MPLevelMoleculesMode"]:
199 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer)
200 else:
201 MiscUtil.PrintError('The value, %s, option "--mpLevel" is not supported.' % (OptionsInfo["MPLevel"]))
202
203
204 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer):
205 """Process molecules to generate constrained conformers using multiprocessing at molecules level."""
206
207 MiscUtil.PrintInfo(
208 "\nGenerating constrained conformers and performing energy minimization using multiprocessing at molecules level..."
209 )
210
211 MPParams = OptionsInfo["MPParams"]
212
213 # Setup data for initializing a worker process...
214 OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
215 InitializeWorkerProcessArgs = (
216 MiscUtil.ObjectToBase64EncodedString(Options),
217 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
218 )
219
220 # Setup a encoded mols data iterable for a worker process...
221 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
222
223 # Setup process pool along with data initialization for each process...
224 if not OptionsInfo["QuietMode"]:
225 MiscUtil.PrintInfo(
226 "\nConfiguring multiprocessing using %s method..."
227 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
228 )
229 MiscUtil.PrintInfo(
230 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
231 % (
232 MPParams["NumProcesses"],
233 MPParams["InputDataMode"],
234 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
235 )
236 )
237
238 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
239
240 # Start processing...
241 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
242 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
243 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
244 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
245 else:
246 MiscUtil.PrintError(
247 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
248 )
249
250 # Print out Psi4 version in the main process...
251 MiscUtil.PrintInfo("\nInitializing Psi4...\n")
252 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
253 OptionsInfo["psi4"] = Psi4Handle
254
255 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
256
257 for Result in Results:
258 MolCount += 1
259 (
260 MolIndex,
261 EncodedMol,
262 EncodedConfMols,
263 CoreScaffoldMissingStatus,
264 CalcStatus,
265 ConfIDs,
266 ConfEnergyValues,
267 ConfScaffoldEmbedRMSDValues,
268 ) = Result
269
270 if EncodedMol is None:
271 continue
272 ValidMolCount += 1
273
274 if CoreScaffoldMissingStatus:
275 CoreScaffoldMissingCount += 1
276 continue
277
278 if not CalcStatus:
279 ConfGenFailedCount += 1
280 continue
281
282 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
283 ConfMols = [RDKitUtil.MolFromBase64EncodedMolString(EncodedConfMol) for EncodedConfMol in EncodedConfMols]
284
285 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
286
287 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
288
289
290 def InitializeWorkerProcess(*EncodedArgs):
291 """Initialize data for a worker process."""
292
293 global Options, OptionsInfo
294
295 if not OptionsInfo["QuietMode"]:
296 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
297
298 # Decode Options and OptionInfo...
299 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
300 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
301
302 # Decode RefMol...
303 OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
304
305 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
306 # output files for each process...
307 OptionsInfo["Psi4Initialized"] = False
308
309
310 def WorkerProcess(EncodedMolInfo):
311 """Process data for a worker process."""
312
313 if not OptionsInfo["Psi4Initialized"]:
314 InitializePsi4ForWorkerProcess()
315
316 MolIndex, EncodedMol = EncodedMolInfo
317
318 MolNum = MolIndex + 1
319 ConfMols = None
320 CoreScaffoldMissingStatus = False
321 CalcStatus = False
322 ConfIDs = None
323 ConfEnergyValues = None
324 ConfScaffoldEmbedRMSDValues = None
325
326 if EncodedMol is None:
327 return [
328 MolIndex,
329 None,
330 ConfMols,
331 CoreScaffoldMissingStatus,
332 CalcStatus,
333 ConfIDs,
334 ConfEnergyValues,
335 ConfScaffoldEmbedRMSDValues,
336 ]
337
338 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
339 if not CheckAndValidateMolecule(Mol, MolNum):
340 return [
341 MolIndex,
342 None,
343 ConfMols,
344 CoreScaffoldMissingStatus,
345 CalcStatus,
346 ConfIDs,
347 ConfEnergyValues,
348 ConfScaffoldEmbedRMSDValues,
349 ]
350
351 # Setup 2D coordinates for SMILES input file...
352 if OptionsInfo["SMILESInfileStatus"]:
353 AllChem.Compute2DCoords(Mol)
354
355 # Setup a reference molecule core containing common scaffold atoms...
356 RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum)
357 if RefMolCore is None:
358 CoreScaffoldMissingStatus = True
359 return [
360 MolIndex,
361 EncodedMol,
362 ConfMols,
363 CoreScaffoldMissingStatus,
364 CalcStatus,
365 ConfIDs,
366 ConfEnergyValues,
367 ConfScaffoldEmbedRMSDValues,
368 ]
369
370 # Generate conformers...
371 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(
372 OptionsInfo["psi4"], Mol, RefMolCore, MolNum
373 )
374
375 EncodedConfMols = None
376 if ConfMols is not None:
377 EncodedConfMols = [
378 RDKitUtil.MolToBase64EncodedMolString(
379 ConfMol,
380 PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps,
381 )
382 for ConfMol in ConfMols
383 ]
384
385 return [
386 MolIndex,
387 EncodedMol,
388 EncodedConfMols,
389 CoreScaffoldMissingStatus,
390 CalcStatus,
391 ConfIDs,
392 ConfEnergyValues,
393 ConfScaffoldEmbedRMSDValues,
394 ]
395
396
397 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer):
398 """Process and minimize molecules using multiprocessing at conformers level."""
399
400 MiscUtil.PrintInfo(
401 "\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level..."
402 )
403
404 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4
405
406 for Mol in Mols:
407 MolCount += 1
408
409 if not CheckAndValidateMolecule(Mol, MolCount):
410 continue
411
412 # Setup 2D coordinates for SMILES input file...
413 if OptionsInfo["SMILESInfileStatus"]:
414 AllChem.Compute2DCoords(Mol)
415
416 ValidMolCount += 1
417
418 # Setup a reference molecule core containing common scaffold atoms...
419 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
420 if RefMolCore is None:
421 CoreScaffoldMissingCount += 1
422 continue
423
424 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = (
425 ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount)
426 )
427
428 if not CalcStatus:
429 ConfGenFailedCount += 1
430 continue
431
432 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues)
433
434 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount)
435
436
437 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum):
438 """Generate constrained conformers and minimize them using multiple processes."""
439
440 # Add hydrogens...
441 Mol = Chem.AddHs(Mol, addCoords=True)
442
443 MolName = RDKitUtil.GetMolName(Mol, MolNum)
444
445 # Setup constrained conformers...
446 MolConfs, MolConfsStatus, MolConfIDs = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
447 if not MolConfsStatus:
448 if not OptionsInfo["QuietMode"]:
449 MiscUtil.PrintWarning(
450 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
451 % MolName
452 )
453 return (None, False, None, None, None)
454
455 MPParams = OptionsInfo["MPParams"]
456
457 # Setup data for initializing a worker process...
458 OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore)
459 InitializeWorkerProcessArgs = (
460 MiscUtil.ObjectToBase64EncodedString(Options),
461 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
462 )
463
464 # Setup a encoded mols data iterable for a worker process...
465 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringsWithIDs(MolConfs, MolConfIDs)
466
467 # Setup process pool along with data initialization for each process...
468 if not OptionsInfo["QuietMode"]:
469 MiscUtil.PrintInfo(
470 "\nConfiguring multiprocessing using %s method..."
471 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
472 )
473 MiscUtil.PrintInfo(
474 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
475 % (
476 MPParams["NumProcesses"],
477 MPParams["InputDataMode"],
478 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
479 )
480 )
481
482 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
483
484 # Start processing...
485 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
486 Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
487 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
488 Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
489 else:
490 MiscUtil.PrintError(
491 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
492 )
493
494 MolConfIDs = []
495 MolConfs = []
496 CalcEnergyMap = {}
497 ScaffoldEmbedRMSDMap = {}
498 CalcFailedCount = 0
499
500 for Result in Results:
501 ConfID, EncodedMolConf, CalcStatus, Energy, ScaffoldEmbedRMSD = Result
502
503 if EncodedMolConf is None:
504 CalcFailedCount += 1
505 continue
506
507 if not CalcStatus:
508 CalcFailedCount += 1
509 continue
510
511 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
512
513 MolConfIDs.append(ConfID)
514 MolConfs.append(MolConf)
515 CalcEnergyMap[ConfID] = Energy
516 ScaffoldEmbedRMSDMap[ConfID] = ScaffoldEmbedRMSD
517
518 if CalcFailedCount:
519 return (None, False, None, None, None)
520
521 # Filter conformers...
522 SelectedMolConfs, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues = (
523 FilterMolConformers(Mol, MolNum, MolConfs, MolConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap)
524 )
525
526 return (SelectedMolConfs, True, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues)
527
528
529 def InitializeConformerWorkerProcess(*EncodedArgs):
530 """Initialize data for a conformer worker process."""
531
532 global Options, OptionsInfo
533
534 if not OptionsInfo["QuietMode"]:
535 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
536
537 # Decode Options and OptionInfo...
538 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
539 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
540
541 # Decode RefMol...
542 OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"])
543
544 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
545 # output files for each process...
546 OptionsInfo["Psi4Initialized"] = False
547
548
549 def ConformerWorkerProcess(EncodedMolInfo):
550 """Process data for a conformer worker process."""
551
552 if not OptionsInfo["Psi4Initialized"]:
553 InitializePsi4ForWorkerProcess()
554
555 MolConfID, EncodedMolConf = EncodedMolInfo
556
557 MolConfNum = MolConfID
558 CalcStatus = False
559 Energy = None
560 ScaffoldEmbedRMSD = None
561
562 if EncodedMolConf is None:
563 return [MolConfNum, None, CalcStatus, Energy, ScaffoldEmbedRMSD]
564
565 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
566 MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum)
567
568 if not OptionsInfo["QuietMode"]:
569 MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum))
570
571 # Perform Psi4 constrained minimization...
572 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(
573 OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum
574 )
575 if not CalcStatus:
576 if not OptionsInfo["QuietMode"]:
577 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolConfName))
578 return [MolConfNum, None, CalcStatus, Energy, ScaffoldEmbedRMSD]
579
580 if OptionsInfo["ScaffoldRMSDOut"]:
581 ScaffoldEmbedRMSD = "%.4f" % float(MolConf.GetProp("EmbedRMS"))
582 MolConf.ClearProp("EmbedRMS")
583
584 return [
585 MolConfNum,
586 RDKitUtil.MolToBase64EncodedMolString(
587 MolConf, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
588 ),
589 CalcStatus,
590 Energy,
591 ScaffoldEmbedRMSD,
592 ]
593
594
595 def InitializePsi4ForWorkerProcess():
596 """Initialize Psi4 for a worker process."""
597
598 if OptionsInfo["Psi4Initialized"]:
599 return
600
601 OptionsInfo["Psi4Initialized"] = True
602
603 if OptionsInfo["MPLevelConformersMode"] and re.match(
604 "auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I
605 ):
606 # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
607 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
608 else:
609 # Update output file...
610 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
611 OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
612 )
613
614 # Intialize Psi4...
615 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
616 Psi4RunParams=OptionsInfo["Psi4RunParams"],
617 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
618 PrintVersion=False,
619 PrintHeader=True,
620 )
621
622 # Setup max iterations global variable...
623 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
624
625 # Setup conversion factor for energy units...
626 SetupEnergyConversionFactor(OptionsInfo["psi4"])
627
628
629 def RetrieveReferenceMolecule():
630 """Retrieve and validate reference molecule."""
631
632 RefFile = OptionsInfo["RefFile"]
633
634 MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
635 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
636 ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(
637 RefFile, **OptionsInfo["InfileParams"]
638 )
639
640 if ValidRefMolCount == 0:
641 MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
642 elif ValidRefMolCount > 1:
643 MiscUtil.PrintWarning(
644 "The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..."
645 % (RefFile, ValidRefMolCount)
646 )
647
648 RefMol = ValidRefMols[0]
649
650 if OptionsInfo["UseScaffoldSMARTS"]:
651 ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
652 if ScaffoldPatternMol is None:
653 MiscUtil.PrintError(
654 'Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is not valid.'
655 % (OptionsInfo["ScaffoldSMARTS"])
656 )
657
658 if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
659 MiscUtil.PrintError(
660 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option, is missing in the first valid reference molecule.'
661 % (OptionsInfo["ScaffoldSMARTS"])
662 )
663
664 return RefMol
665
666
667 def SetupCoreScaffold(RefMol, Mol, MolCount):
668 """Setup a reference molecule core containing common scaffold atoms between
669 a pair of molecules."""
670
671 if OptionsInfo["UseScaffoldMCS"]:
672 return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
673 elif OptionsInfo["UseScaffoldSMARTS"]:
674 return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
675 else:
676 MiscUtil.PrintError(
677 'The value, %s, specified for "-s, --scaffold" option is not supported.' % (OptionsInfo["Scaffold"])
678 )
679
680
681 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
682 """Setup a reference molecule core containing common scaffold atoms between
683 a pair of molecules using MCS."""
684
685 MCSParams = OptionsInfo["MCSParams"]
686 Mols = [RefMol, Mol]
687
688 MCSResultObject = rdFMCS.FindMCS(
689 Mols,
690 maximizeBonds=MCSParams["MaximizeBonds"],
691 threshold=MCSParams["Threshold"],
692 timeout=MCSParams["TimeOut"],
693 verbose=MCSParams["Verbose"],
694 matchValences=MCSParams["MatchValences"],
695 ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"],
696 completeRingsOnly=MCSParams["CompleteRingsOnly"],
697 matchChiralTag=MCSParams["MatchChiralTag"],
698 atomCompare=MCSParams["AtomCompare"],
699 bondCompare=MCSParams["BondCompare"],
700 seedSmarts=MCSParams["SeedSMARTS"],
701 )
702
703 if MCSResultObject.canceled:
704 if not OptionsInfo["QuietMode"]:
705 MiscUtil.PrintWarning(
706 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
707 % (RDKitUtil.GetMolName(Mol, MolCount))
708 )
709 return None
710
711 CoreNumAtoms = MCSResultObject.numAtoms
712 CoreNumBonds = MCSResultObject.numBonds
713
714 SMARTSCore = MCSResultObject.smartsString
715
716 if not len(SMARTSCore):
717 if not OptionsInfo["QuietMode"]:
718 MiscUtil.PrintWarning(
719 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
720 % (RDKitUtil.GetMolName(Mol, MolCount))
721 )
722 return None
723
724 if CoreNumAtoms < MCSParams["MinNumAtoms"]:
725 if not OptionsInfo["QuietMode"]:
726 MiscUtil.PrintWarning(
727 'Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumAtoms" parameter in "-m, --mcsParams" option.'
728 % (CoreNumAtoms, MCSParams["MinNumAtoms"])
729 )
730 return None
731
732 if CoreNumBonds < MCSParams["MinNumBonds"]:
733 if not OptionsInfo["QuietMode"]:
734 MiscUtil.PrintWarning(
735 'Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumBonds" parameter in "-m, --mcsParams" option.'
736 % (CoreNumBonds, MCSParams["MinNumBonds"])
737 )
738 return None
739
740 return GenerateCoreMol(RefMol, SMARTSCore)
741
742
743 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
744 """Setup a reference molecule core containing common scaffold atoms between
745 a pair of molecules using specified SMARTS."""
746
747 if OptionsInfo["ScaffoldPatternMol"] is None:
748 OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
749
750 if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
751 if not OptionsInfo["QuietMode"]:
752 MiscUtil.PrintWarning(
753 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is missing in input molecule, %s.'
754 % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))
755 )
756 return None
757
758 return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
759
760
761 def GenerateCoreMol(RefMol, SMARTSCore):
762 """Generate core molecule for embedding."""
763
764 # Create a molecule corresponding to core atoms...
765 SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
766
767 # Setup a ref molecule containing core atoms with dummy atoms as
768 # attachment points for atoms around the core atoms...
769 Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
770
771 # Delete any substructures containing dummy atoms..
772 RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles("*"))
773 RefMolCore.UpdatePropertyCache()
774
775 return RefMolCore
776
777
778 def GenerateMolConformers(Psi4Handle, Mol, RefMolCore, MolNum=None):
779 """Generate constrained conformers."""
780
781 # Add hydrogens...
782 Mol = Chem.AddHs(Mol, addCoords=True)
783
784 MolName = RDKitUtil.GetMolName(Mol, MolNum)
785
786 # Setup constrained conformers...
787 MolConfs, MolConfsStatus, MolConfIDs = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
788 if not MolConfsStatus:
789 if not OptionsInfo["QuietMode"]:
790 MiscUtil.PrintWarning(
791 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
792 % MolName
793 )
794 return (None, False, None, None, None)
795
796 CalcEnergyMap = {}
797 ScaffoldEmbedRMSDMap = {}
798
799 for MolConfIndex, MolConf in enumerate(MolConfs):
800 MolConfID = MolConfIDs[MolConfIndex]
801
802 if not OptionsInfo["QuietMode"]:
803 MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, MolConfID))
804
805 # Perform Psi4 minimization...
806 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum)
807 if not CalcStatus:
808 if not OptionsInfo["QuietMode"]:
809 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
810 return (None, False, None, None, None)
811
812 CalcEnergyMap[MolConfID] = Energy
813
814 if OptionsInfo["ScaffoldRMSDOut"]:
815 ScaffoldEmbedRMSDMap[MolConfID] = "%.4f" % float(MolConf.GetProp("EmbedRMS"))
816 MolConf.ClearProp("EmbedRMS")
817
818 # Filter conformers...
819 SelectedMolConfs, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues = (
820 FilterMolConformers(Mol, MolNum, MolConfs, MolConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap)
821 )
822
823 return (SelectedMolConfs, True, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues)
824
825
826 def FilterMolConformers(Mol, MolNum, MolConfs, ConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap):
827 """Filter conformers for a molecule."""
828
829 # Sort conformers by energy...
830 SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
831
832 # Setup a map from ConfID to MolConf...
833 MolConfsMap = {}
834 for MolConfIndex, ConfID in enumerate(ConfIDs):
835 MolConfsMap[ConfID] = MolConfs[MolConfIndex]
836
837 MinEnergyConfID = SortedConfIDs[0]
838 MinConfEnergy = CalcEnergyMap[MinEnergyConfID]
839 MinEnergyMolConf = MolConfsMap[MinEnergyConfID]
840
841 EnergyWindow = OptionsInfo["EnergyWindow"]
842
843 EnergyRMSDCutoff = OptionsInfo["EnergyRMSDCutoff"]
844 ApplyEnergyRMSDCutoff = True if EnergyRMSDCutoff > 0 else False
845
846 EnergyRMSDCutoffLowest = OptionsInfo["EnergyRMSDCutoffModeLowest"]
847 EnergyRMSDCalcModeBest = OptionsInfo["EnergyRMSDCalcModeBest"]
848
849 SelectedConfIDs = []
850
851 ConfCount = 0
852 IgnoredByEnergyConfCount = 0
853 IgnoredByRMSDConfCount = 0
854
855 FirstConf = True
856
857 for ConfID in SortedConfIDs:
858 if FirstConf:
859 FirstConf = False
860 ConfCount += 1
861 SelectedConfIDs.append(ConfID)
862 continue
863
864 ConfEnergyDiff = abs(CalcEnergyMap[ConfID] - MinConfEnergy)
865 if ConfEnergyDiff > EnergyWindow:
866 IgnoredByEnergyConfCount += 1
867 continue
868
869 if ApplyEnergyRMSDCutoff:
870 IgnoreConf = False
871 ProbeMolConf = Chem.Mol(MolConfsMap[ConfID])
872
873 if EnergyRMSDCutoffLowest:
874 # Compare RMSD with the lowest energy conformation...
875 RefMolConf = Chem.Mol(MinEnergyMolConf)
876 if EnergyRMSDCalcModeBest:
877 CalcRMSD = AllChem.GetBestRMS(ProbeMolConf, RefMolConf)
878 else:
879 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
880 if CalcRMSD < EnergyRMSDCutoff:
881 IgnoreConf = True
882 else:
883 for SelectedConfID in SelectedConfIDs:
884 RefMolConf = Chem.Mol(MolConfsMap[SelectedConfID])
885 if EnergyRMSDCalcModeBest:
886 CalcRMSD = AllChem.GetBestRMS(ProbeMolConf, RefMolConf)
887 else:
888 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
889 if CalcRMSD < EnergyRMSDCutoff:
890 IgnoreConf = True
891 break
892 if IgnoreConf:
893 IgnoredByRMSDConfCount += 1
894 continue
895
896 ConfCount += 1
897 SelectedConfIDs.append(ConfID)
898
899 if not OptionsInfo["QuietMode"]:
900 MiscUtil.PrintInfo(
901 "\nTotal Number of conformations generated for %s: %d" % (RDKitUtil.GetMolName(Mol, MolNum), ConfCount)
902 )
903 MiscUtil.PrintInfo(
904 "Number of conformations ignored due to energy window cutoff: %d" % (IgnoredByEnergyConfCount)
905 )
906 if ApplyEnergyRMSDCutoff:
907 MiscUtil.PrintInfo(
908 "Number of conformations ignored due to energy RMSD cutoff: %d" % (IgnoredByRMSDConfCount)
909 )
910
911 # Setup selected conformer molecules...
912 SelectedConfMols = [MolConfsMap[ConfID] for ConfID in SelectedConfIDs]
913
914 # Setup energies...
915 SelectedConfEnergies = None
916 if OptionsInfo["EnergyOut"]:
917 SelectedConfEnergies = []
918 for ConfID in SelectedConfIDs:
919 Energy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[ConfID])
920 SelectedConfEnergies.append(Energy)
921
922 # Setup scaffold RMSD values...
923 SelectedConfScaffoldEmbedRMSDValues = None
924 if OptionsInfo["ScaffoldRMSDOut"]:
925 SelectedConfScaffoldEmbedRMSDValues = []
926 for ConfID in SelectedConfIDs:
927 # MolConf = MolConfsMap[ConfID]
928 SelectedConfScaffoldEmbedRMSDValues.append(ScaffoldEmbedRMSDMap[ConfID])
929
930 return (SelectedConfMols, SelectedConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues)
931
932
933 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID=-1):
934 """Constrain and Minimize molecule using Psi4."""
935
936 # Setup a list for constrained atoms...
937 ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore)
938 if len(ConstrainedAtomIndices) == 0:
939 return (False, None)
940
941 # Setup a Psi4Mol...
942 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
943 if Psi4Mol is None:
944 return (False, None)
945
946 # Setup reference wave function...
947 Reference = SetupReferenceWavefunction(Mol)
948 Psi4Handle.set_options({"Reference": Reference})
949
950 # Setup method name and basis set...
951 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
952
953 # Setup freeze list for constrained atoms...
954 FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices]
955 FreezeXYZString = " %s " % " ".join(FreezeXYZList)
956 Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString})
957
958 # Optimize geometry...
959 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(
960 Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
961 )
962
963 if not Status:
964 PerformPsi4Cleanup(Psi4Handle)
965 return (False, None)
966
967 # Update atom positions...
968 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms=True)
969 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
970
971 # Convert energy units...
972 if OptionsInfo["ApplyEnergyConversionFactor"]:
973 Energy = Energy * OptionsInfo["EnergyConversionFactor"]
974
975 # Clean up
976 PerformPsi4Cleanup(Psi4Handle)
977
978 return (True, Energy)
979
980
981 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum=None):
982 """Constrain, embed, and minimize molecule."""
983
984 # Setup forcefield function to use for constrained minimization...
985 ForceFieldFunction = None
986 ForceFieldName = None
987 if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
988 ForceFieldFunction = lambda mol, confId=-1: AllChem.UFFGetMoleculeForceField(mol, confId=confId)
989 ForceFieldName = "UFF"
990 else:
991 ForceFieldFunction = lambda mol, confId=-1: AllChem.MMFFGetMoleculeForceField(
992 mol,
993 AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=OptionsInfo["ConfGenerationParams"]["MMFFVariant"]),
994 confId=confId,
995 )
996 ForceFieldName = "MMFF"
997
998 if ForceFieldFunction is None:
999 if not OptionsInfo["QuietMode"]:
1000 MiscUtil.PrintWarning(
1001 "Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))
1002 )
1003 return (None, False, None)
1004
1005 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
1006 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
1007 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
1008 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
1009 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
1010 UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"]
1011
1012 MolConfs = []
1013 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
1014
1015 for ConfID in ConfIDs:
1016 try:
1017 MolConf = Chem.Mol(Mol)
1018 AllChem.ConstrainedEmbed(
1019 MolConf,
1020 RefMolCore,
1021 useTethers=UseTethers,
1022 coreConfId=-1,
1023 randomseed=ConfID,
1024 getForceField=ForceFieldFunction,
1025 enforceChirality=EnforceChirality,
1026 useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
1027 useBasicKnowledge=UseBasicKnowledge,
1028 ETversion=ETVersion,
1029 )
1030 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
1031 if not OptionsInfo["QuietMode"]:
1032 MiscUtil.PrintWarning(
1033 "Constrained embedding couldn't be performed for molecule %s:\n%s\n"
1034 % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)
1035 )
1036 return (None, False, None)
1037
1038 MolConfs.append(MolConf)
1039
1040 return FilterConstrainedConformationsByRMSD(Mol, MolConfs, ConfIDs, MolNum)
1041
1042
1043 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolConfIDs, MolNum=None):
1044 """Filter contarained conformations by RMSD."""
1045
1046 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
1047 if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0:
1048 if not OptionsInfo["QuietMode"]:
1049 MiscUtil.PrintInfo(
1050 "\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: None; Size: %s"
1051 % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))
1052 )
1053 return (MolConfs, True, MolConfIDs)
1054
1055 FirstMolConf = True
1056 SelectedMolConfs = []
1057 SelectedMolConfIDs = []
1058 for MolConfIndex, MolConf in enumerate(MolConfs):
1059 if FirstMolConf:
1060 FirstMolConf = False
1061 SelectedMolConfs.append(MolConf)
1062 SelectedMolConfIDs.append(MolConfIDs[MolConfIndex])
1063 continue
1064
1065 # Compare RMSD against all selected conformers...
1066 IgnoreConf = False
1067 ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf))
1068 for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs):
1069 RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf))
1070 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
1071
1072 if CalcRMSD < EmbedRMSDCutoff:
1073 IgnoreConf = True
1074 break
1075
1076 if IgnoreConf:
1077 continue
1078
1079 SelectedMolConfs.append(MolConf)
1080 SelectedMolConfIDs.append(MolConfIDs[MolConfIndex])
1081
1082 if not OptionsInfo["QuietMode"]:
1083 MiscUtil.PrintInfo(
1084 "\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s"
1085 % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))
1086 )
1087
1088 return (SelectedMolConfs, True, SelectedMolConfIDs)
1089
1090
1091 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens=False):
1092 """Setup a list of atom indices to be constrained during Psi4 minimizaiton."""
1093
1094 AtomIndices = []
1095
1096 # Collect matched heavy atoms along with attached hydrogens...
1097 for AtomIndex in Mol.GetSubstructMatch(RefMolCore):
1098 Atom = Mol.GetAtomWithIdx(AtomIndex)
1099 if Atom.GetAtomicNum() == 1:
1100 continue
1101
1102 AtomIndices.append(AtomIndex)
1103 for AtomNbr in Atom.GetNeighbors():
1104 if AtomNbr.GetAtomicNum() == 1:
1105 if ConstrainHydrogens:
1106 AtomNbrIndex = AtomNbr.GetIdx()
1107 AtomIndices.append(AtomNbrIndex)
1108
1109 AtomIndices = sorted(AtomIndices)
1110
1111 # Atom indices start from 1 for Psi4 instead 0 for RDKit...
1112 AtomIndices = [AtomIndex + 1 for AtomIndex in AtomIndices]
1113
1114 return AtomIndices
1115
1116
1117 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID=-1):
1118 """Setup a Psi4 molecule object."""
1119
1120 # Turn off recentering and reorientation to perform optimization in the
1121 # constrained coordinate space...
1122 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=True, NoReorient=True)
1123
1124 try:
1125 Psi4Mol = Psi4Handle.geometry(MolGeometry)
1126 except Exception as ErrMsg:
1127 Psi4Mol = None
1128 if not OptionsInfo["QuietMode"]:
1129 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
1130 MolName = RDKitUtil.GetMolName(Mol, MolNum)
1131 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
1132
1133 return Psi4Mol
1134
1135
1136 def PerformPsi4Cleanup(Psi4Handle):
1137 """Perform clean up."""
1138
1139 # Clean up after Psi4 run...
1140 Psi4Handle.core.clean()
1141
1142 # Clean up any leftover scratch files...
1143 if OptionsInfo["MPMode"]:
1144 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
1145
1146
1147 def CheckAndValidateMolecule(Mol, MolCount=None):
1148 """Validate molecule for Psi4 calculations."""
1149
1150 if Mol is None:
1151 if not OptionsInfo["QuietMode"]:
1152 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
1153 return False
1154
1155 MolName = RDKitUtil.GetMolName(Mol, MolCount)
1156 if not OptionsInfo["QuietMode"]:
1157 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
1158
1159 if RDKitUtil.IsMolEmpty(Mol):
1160 if not OptionsInfo["QuietMode"]:
1161 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
1162 return False
1163
1164 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
1165 if not OptionsInfo["QuietMode"]:
1166 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
1167 return False
1168
1169 return True
1170
1171
1172 def SetupMethodNameAndBasisSet(Mol):
1173 """Setup method name and basis set."""
1174
1175 MethodName = OptionsInfo["MethodName"]
1176 if OptionsInfo["MethodNameAuto"]:
1177 MethodName = "B3LYP"
1178
1179 BasisSet = OptionsInfo["BasisSet"]
1180 if OptionsInfo["BasisSetAuto"]:
1181 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
1182
1183 return (MethodName, BasisSet)
1184
1185
1186 def SetupReferenceWavefunction(Mol):
1187 """Setup reference wavefunction."""
1188
1189 Reference = OptionsInfo["Reference"]
1190 if OptionsInfo["ReferenceAuto"]:
1191 Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
1192
1193 return Reference
1194
1195
1196 def SetupEnergyConversionFactor(Psi4Handle):
1197 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
1198
1199 EnergyUnits = OptionsInfo["EnergyUnits"]
1200
1201 ApplyConversionFactor = True
1202 if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
1203 ConversionFactor = Psi4Handle.constants.hartree2kcalmol
1204 elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
1205 ConversionFactor = Psi4Handle.constants.hartree2kJmol
1206 elif re.match("^eV$", EnergyUnits, re.I):
1207 ConversionFactor = Psi4Handle.constants.hartree2ev
1208 else:
1209 ApplyConversionFactor = False
1210 ConversionFactor = 1.0
1211
1212 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
1213 OptionsInfo["EnergyConversionFactor"] = ConversionFactor
1214
1215
1216 def WriteMolConformers(Writer, Mol, MolNum, ConfMols, ConfIDs, ConfEnergyValues=None, ConfScaffoldEmbedRMSDValues=None):
1217 """Write molecule coformers."""
1218
1219 if ConfMols is None:
1220 return
1221
1222 for Index, ConfMol in enumerate(ConfMols):
1223 ConfMolName = RDKitUtil.GetMolName(Mol, MolNum)
1224 SetConfMolName(ConfMol, ConfMolName, ConfIDs[Index])
1225
1226 if ConfScaffoldEmbedRMSDValues is not None:
1227 ConfMol.SetProp("CoreScaffoldEmbedRMSD", ConfScaffoldEmbedRMSDValues[Index])
1228
1229 if ConfEnergyValues is not None:
1230 ConfMol.SetProp(OptionsInfo["EnergyDataFieldLabel"], ConfEnergyValues[Index])
1231
1232 Writer.write(ConfMol)
1233
1234
1235 def SetConfMolName(Mol, MolName, ConfCount):
1236 """Set conf mol name."""
1237
1238 ConfName = "%s_Conf%d" % (MolName, ConfCount)
1239 Mol.SetProp("_Name", ConfName)
1240
1241
1242 def ProcessMCSParameters():
1243 """Set up and process MCS parameters."""
1244
1245 SetupMCSParameters()
1246 ProcessSpecifiedMCSParameters()
1247
1248
1249 def SetupMCSParameters():
1250 """Set up default MCS parameters."""
1251
1252 OptionsInfo["MCSParams"] = {
1253 "MaximizeBonds": True,
1254 "Threshold": 0.9,
1255 "TimeOut": 3600,
1256 "Verbose": False,
1257 "MatchValences": True,
1258 "MatchChiralTag": False,
1259 "RingMatchesRingOnly": True,
1260 "CompleteRingsOnly": True,
1261 "AtomCompare": rdFMCS.AtomCompare.CompareElements,
1262 "BondCompare": rdFMCS.BondCompare.CompareOrder,
1263 "SeedSMARTS": "",
1264 "MinNumAtoms": 1,
1265 "MinNumBonds": 0,
1266 }
1267
1268
1269 def ProcessSpecifiedMCSParameters():
1270 """Process specified MCS parameters."""
1271
1272 if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
1273 # Nothing to process...
1274 return
1275
1276 # Parse specified parameters...
1277 MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
1278 if not MCSParams:
1279 MiscUtil.PrintError('No valid parameter name and value pairs specified using "-m, --mcsParams" option.')
1280
1281 MCSParamsWords = MCSParams.split(",")
1282 if len(MCSParamsWords) % 2:
1283 MiscUtil.PrintError(
1284 'The number of comma delimited paramater names and values, %d, specified using "-m, --mcsParams" option must be an even number.'
1285 % (len(MCSParamsWords))
1286 )
1287
1288 # Setup canonical parameter names...
1289 ValidParamNames = []
1290 CanonicalParamNamesMap = {}
1291 for ParamName in sorted(OptionsInfo["MCSParams"]):
1292 ValidParamNames.append(ParamName)
1293 CanonicalParamNamesMap[ParamName.lower()] = ParamName
1294
1295 # Validate and set paramater names and value...
1296 for Index in range(0, len(MCSParamsWords), 2):
1297 Name = MCSParamsWords[Index]
1298 Value = MCSParamsWords[Index + 1]
1299
1300 CanonicalName = Name.lower()
1301 if CanonicalName not in CanonicalParamNamesMap:
1302 MiscUtil.PrintError(
1303 'The parameter name, %s, specified using "-m, --mcsParams" option is not a valid name. Supported parameter names: %s'
1304 % (Name, " ".join(ValidParamNames))
1305 )
1306
1307 ParamName = CanonicalParamNamesMap[CanonicalName]
1308 if re.match("^Threshold$", ParamName, re.I):
1309 Value = float(Value)
1310 if Value <= 0.0 or Value > 1.0:
1311 MiscUtil.PrintError(
1312 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0'
1313 % (Value, Name)
1314 )
1315 ParamValue = Value
1316 elif re.match("^Timeout$", ParamName, re.I):
1317 Value = int(Value)
1318 if Value <= 0:
1319 MiscUtil.PrintError(
1320 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: > 0'
1321 % (Value, Name)
1322 )
1323 ParamValue = Value
1324 elif re.match("^MinNumAtoms$", ParamName, re.I):
1325 Value = int(Value)
1326 if Value < 1:
1327 MiscUtil.PrintError(
1328 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: >= 1'
1329 % (Value, Name)
1330 )
1331 ParamValue = Value
1332 elif re.match("^MinNumBonds$", ParamName, re.I):
1333 Value = int(Value)
1334 if Value < 0:
1335 MiscUtil.PrintError(
1336 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: >=0 '
1337 % (Value, Name)
1338 )
1339 ParamValue = Value
1340 elif re.match("^AtomCompare$", ParamName, re.I):
1341 if re.match("^CompareAny$", Value, re.I):
1342 ParamValue = rdFMCS.AtomCompare.CompareAny
1343 elif re.match("^CompareElements$", Value, re.I):
1344 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
1345 elif re.match("^CompareIsotopes$", Value, re.I):
1346 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
1347 else:
1348 MiscUtil.PrintError(
1349 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes'
1350 % (Value, Name)
1351 )
1352 elif re.match("^BondCompare$", ParamName, re.I):
1353 if re.match("^CompareAny$", Value, re.I):
1354 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
1355 elif re.match("^CompareOrder$", Value, re.I):
1356 ParamValue = rdFMCS.BondCompare.CompareOrder
1357 elif re.match("^CompareOrderExact$", Value, re.I):
1358 ParamValue = rdFMCS.BondCompare.CompareOrderExact
1359 else:
1360 MiscUtil.PrintError(
1361 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact'
1362 % (Value, Name)
1363 )
1364 elif re.match("^SeedSMARTS$", ParamName, re.I):
1365 if not len(Value):
1366 MiscUtil.PrintError(
1367 'The parameter value specified using "-m, --mcsParams" option for parameter, %s, is empty. '
1368 % (Name)
1369 )
1370 ParamValue = Value
1371 else:
1372 if not re.match("^(Yes|No|True|False)$", Value, re.I):
1373 MiscUtil.PrintError(
1374 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: Yes No True False'
1375 % (Value, Name)
1376 )
1377 ParamValue = False
1378 if re.match("^(Yes|True)$", Value, re.I):
1379 ParamValue = True
1380
1381 # Set value...
1382 OptionsInfo["MCSParams"][ParamName] = ParamValue
1383
1384
1385 def ProcessOptions():
1386 """Process and validate command line arguments and options."""
1387
1388 MiscUtil.PrintInfo("Processing options...")
1389
1390 # Validate options...
1391 ValidateOptions()
1392
1393 OptionsInfo["Infile"] = Options["--infile"]
1394 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
1395 ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
1396 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
1397 "--infileParams",
1398 Options["--infileParams"],
1399 InfileName=Options["--infile"],
1400 ParamsDefaultInfo=ParamsDefaultInfoOverride,
1401 )
1402
1403 OptionsInfo["RefFile"] = Options["--reffile"]
1404
1405 OptionsInfo["Outfile"] = Options["--outfile"]
1406 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
1407 "--outfileParams", Options["--outfileParams"]
1408 )
1409
1410 OptionsInfo["Overwrite"] = Options["--overwrite"]
1411
1412 OptionsInfo["Scaffold"] = Options["--scaffold"]
1413 if re.match("^auto$", Options["--scaffold"], re.I):
1414 UseScaffoldMCS = True
1415 UseScaffoldSMARTS = False
1416 ScaffoldSMARTS = None
1417 else:
1418 UseScaffoldMCS = False
1419 UseScaffoldSMARTS = True
1420 ScaffoldSMARTS = OptionsInfo["Scaffold"]
1421
1422 OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
1423 OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
1424 OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
1425 OptionsInfo["ScaffoldPatternMol"] = None
1426
1427 OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
1428 ProcessMCSParameters()
1429
1430 OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
1431
1432 # Method, basis set, and reference wavefunction...
1433 OptionsInfo["BasisSet"] = Options["--basisSet"]
1434 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
1435
1436 OptionsInfo["MethodName"] = Options["--methodName"]
1437 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
1438
1439 OptionsInfo["Reference"] = Options["--reference"]
1440 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
1441
1442 # Run and options parameters...
1443 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
1444 "--psi4OptionsParams", Options["--psi4OptionsParams"]
1445 )
1446 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
1447 "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
1448 )
1449
1450 # Conformer generation paramaters...
1451 ParamsDefaultInfoOverride = {"MaxConfs": 50}
1452 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters(
1453 "--confParams", Options["--confParams"], ParamsDefaultInfoOverride
1454 )
1455
1456 # Write energy parameters...
1457 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
1458 OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
1459
1460 EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
1461 if re.match("^auto$", EnergyDataFieldLabel, re.I):
1462 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
1463 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
1464
1465 OptionsInfo["EnergyRMSDCalcMode"] = Options["--energyRMSDCalcMode"]
1466 OptionsInfo["EnergyRMSDCalcModeBest"] = (
1467 True if re.match("^BestRMSD$", Options["--energyRMSDCalcMode"], re.I) else False
1468 )
1469
1470 OptionsInfo["EnergyRMSDCutoff"] = float(Options["--energyRMSDCutoff"])
1471
1472 OptionsInfo["EnergyRMSDCutoffMode"] = Options["--energyRMSDCutoffMode"]
1473 OptionsInfo["EnergyRMSDCutoffModeLowest"] = (
1474 True if re.match("^Lowest$", Options["--energyRMSDCutoffMode"], re.I) else False
1475 )
1476
1477 # Process energy window...
1478 EnergyWindow = Options["--energyWindow"]
1479 if re.match("^auto$", EnergyWindow, re.I):
1480 # Set default energy window based on units...
1481 EnergyUnits = Options["--energyUnits"]
1482 if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
1483 EnergyWindow = 20
1484 elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
1485 EnergyWindow = 83.68
1486 elif re.match("^eV$", EnergyUnits, re.I):
1487 EnergyWindow = 0.8673
1488 elif re.match("^Hartrees$", EnergyUnits, re.I):
1489 EnergyWindow = 0.03188
1490 else:
1491 MiscUtil.PrintError(
1492 'Failed to set default value for "--energyWindow". The value, %s, specified for option "--energyUnits" is not valid. '
1493 % EnergyUnits
1494 )
1495 else:
1496 EnergyWindow = float(EnergyWindow)
1497 OptionsInfo["EnergyWindow"] = EnergyWindow
1498
1499 OptionsInfo["MaxIters"] = int(Options["--maxIters"])
1500
1501 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
1502 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
1503
1504 # Multiprocessing level...
1505 MPLevelMoleculesMode = False
1506 MPLevelConformersMode = False
1507 MPLevel = Options["--mpLevel"]
1508 if re.match("^Molecules$", MPLevel, re.I):
1509 MPLevelMoleculesMode = True
1510 elif re.match("^Conformers$", MPLevel, re.I):
1511 MPLevelConformersMode = True
1512 else:
1513 MiscUtil.PrintError('The value, %s, specified for option "--mpLevel" is not valid. ' % MPLevel)
1514 OptionsInfo["MPLevel"] = MPLevel
1515 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
1516 OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
1517
1518 OptionsInfo["Precision"] = int(Options["--precision"])
1519 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
1520
1521
1522 def RetrieveOptions():
1523 """Retrieve command line arguments and options."""
1524
1525 # Get options...
1526 global Options
1527 Options = docopt(_docoptUsage_)
1528
1529 # Set current working directory to the specified directory...
1530 WorkingDir = Options["--workingdir"]
1531 if WorkingDir:
1532 os.chdir(WorkingDir)
1533
1534 # Handle examples option...
1535 if "--examples" in Options and Options["--examples"]:
1536 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
1537 sys.exit(0)
1538
1539
1540 def ValidateOptions():
1541 """Validate option values."""
1542
1543 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
1544 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
1545
1546 MiscUtil.ValidateOptionTextValue(" --energyRMSDCalcMode", Options["--energyRMSDCalcMode"], "RMSD BestRMSD")
1547
1548 MiscUtil.ValidateOptionFloatValue("--energyRMSDCutoff", Options["--energyRMSDCutoff"], {">=": 0})
1549 MiscUtil.ValidateOptionTextValue(" --energyRMSDCutoffMode", Options["--energyRMSDCutoffMode"], "All Lowest")
1550
1551 if not re.match("^auto$", Options["--energyWindow"], re.I):
1552 MiscUtil.ValidateOptionFloatValue("--energyWindow", Options["--energyWindow"], {">": 0})
1553
1554 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
1555 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
1556
1557 MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
1558 MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
1559
1560 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
1561
1562 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
1563 MiscUtil.ValidateOptionsOutputFileOverwrite(
1564 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
1565 )
1566 MiscUtil.ValidateOptionsDistinctFileNames(
1567 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
1568 )
1569
1570 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
1571 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
1572
1573 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
1574 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
1575
1576 MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
1577
1578
1579 # Setup a usage string for docopt...
1580 _docoptUsage_ = """
1581 Psi4GenerateConstrainedConformers.py - Generate constrained molecular conformations
1582
1583 Usage:
1584 Psi4GenerateConstrainedConformers.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>]
1585 [--energyDataFieldLabel <text>] [--energyUnits <text>] [--energyRMSDCalcMode <RMSD or BestRMSD>]
1586 [--energyRMSDCutoff <number>] [--energyRMSDCutoffMode <All or Lowest>] [--energyWindow <number>]
1587 [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>]
1588 [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
1589 [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
1590 [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
1591 [--quiet <yes or no>] [--reference <text>] [--scaffold <auto or SMARTS>]
1592 [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile>
1593 Psi4GenerateConstrainedConformers.py -h | --help | -e | --examples
1594
1595 Description:
1596 Generate molecular conformations by performing a constrained energy
1597 minimization against a reference molecule. The molecular conformations
1598 are generated using a combination of distance geometry and forcefield
1599 minimization followed by geometry optimization using a quantum chemistry
1600 method.
1601
1602 An initial set of 3D conformers are generated for input molecules using
1603 distance geometry. A common core scaffold, corresponding to a Maximum
1604 Common Substructure (MCS) or an explicit SMARTS pattern, is identified
1605 between a pair of input and reference molecules. The core scaffold atoms in
1606 input molecules are aligned against the same atoms in the reference molecule.
1607 The energy of aligned structures are sequentially minimized using the forcefield
1608 and a quantum chemistry method to generate the final 3D structures.
1609
1610 A Psi4 XYZ format geometry string is automatically generated for each molecule
1611 in input file. It contains atom symbols and 3D coordinates for each atom in a
1612 molecule. In addition, the formal charge and spin multiplicity are present in the
1613 the geometry string. These values are either retrieved from molecule properties
1614 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
1615 molecule.
1616
1617 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
1618 .csv, .tsv, .txt)
1619
1620 The supported output file formats are: SD (.sdf, .sd)
1621
1622 Options:
1623 -b, --basisSet <text> [default: auto]
1624 Basis set to use for constrained energy minimization. Default: 6-31+G**
1625 for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified
1626 value must be a valid Psi4 basis set. No validation is performed.
1627
1628 The following list shows a representative sample of basis sets available
1629 in Psi4:
1630
1631 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*,
1632 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
1633 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
1634 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
1635 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
1636
1637 --confParams <Name,Value,...> [default: auto]
1638 A comma delimited list of parameter name and value pairs for generating
1639 initial 3D coordinates for molecules in input file. A common core scaffold is
1640 identified between a pair of input and reference molecules. The atoms in
1641 common core scaffold of input molecules are aligned against the reference
1642 molecule followed by constrained energy minimization using forcefield
1643 available in RDKit. The 3D structures are subsequently constrained and
1644 minimized by a quantum chemistry method available in Psi4.
1645
1646 The supported parameter names along with their default values are shown
1647 below:
1648
1649 confMethod,ETKDGv2,
1650 forceField,MMFF, forceFieldMMFFVariant,MMFF94,
1651 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
1652 useTethers,yes
1653
1654 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG,
1655 ETKDG , or ETKDGv2]
1656 forceField, MMFF [ Possible values: UFF or MMFF ]
1657 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ]
1658 enforceChirality,yes [ Possible values: yes or no ]
1659 useTethers,yes [ Possible values: yes or no ]
1660
1661 confMethod: Conformation generation methodology for generating initial 3D
1662 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
1663 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
1664 with Distance Geometry (KDG) and Experimental Torsion-angle preference
1665 along with basic Knowledge-terms and Distance Geometry (ETKDG or
1666 ETKDGv2) [Ref 129, 167] .
1667
1668 forceField: Forcefield method to use for constrained energy minimization.
1669 Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular
1670 Mechanics Force Field [ Ref 83-87 ] .
1671
1672 enforceChirality: Enforce chirality for defined chiral centers during
1673 forcefield minimization.
1674
1675 maxConfs: Maximum number of conformations to generate for each molecule
1676 during the generation of an initial 3D conformation ensemble using conformation
1677 generation methodology. The conformations are constrained and minimized using
1678 the specified forcefield and a quantum chemistry method. The lowest energy
1679 conformation is written to the output file.
1680
1681 embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded
1682 using distance geometry and forcefield minimization. All embedded conformers
1683 are kept for 'None' value. Otherwise, only those conformers which are different
1684 from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first
1685 embedded conformer is always retained.
1686
1687 useTethers: Use tethers to optimize the final embedded conformation by
1688 applying a series of extra forces to align matching atoms to the positions of
1689 the core atoms. Otherwise, use simple distance constraints during the
1690 optimization.
1691 --energyOut <yes or no> [default: yes]
1692 Write out energy values.
1693 --energyDataFieldLabel <text> [default: auto]
1694 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>).
1695 --energyUnits <text> [default: kcal/mol]
1696 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
1697 --energyRMSDCalcMode <RMSD or BestRMSD> [default: RMSD]
1698 Methodology for calculating RMSD values during the application of RMSD
1699 cutoff for retaining conformations after the final energy minimization. Possible
1700 values: RMSD or BestRMSD. This option is ignore during 'None' value of
1701 '--energyRMSDCutoff' option.
1702
1703 During BestRMSMode mode, the RDKit 'function AllChem.GetBestRMS' is used to
1704 align and calculate RMSD. This function calculates optimal RMSD for aligning two
1705 molecules, taking symmetry into account. Otherwise, the RMSD value is calculated
1706 using 'AllChem.GetConformerRMS' without changing the atom order. A word to the
1707 wise from RDKit documentation: The AllChem.GetBestRMS function will attempt to
1708 align all permutations of matching atom orders in both molecules, for some molecules
1709 it will lead to 'combinatorial explosion'.
1710 --energyRMSDCutoff <number> [default: 0.5]
1711 RMSD cutoff for retaining conformations after the final energy minimization.
1712 By default, only those conformations which are different from other low
1713 energy conformation by the specified RMSD cutoff and are with in the
1714 specified energy window are kept. The lowest energy conformation is always
1715 retained. A value of zero keeps all minimized conformations with in the
1716 specified energy window from the lowest energy.
1717 --energyRMSDCutoffMode <All or Lowest> [default: All]
1718 RMSD cutoff mode for retaining conformations after the final energy
1719 minimization. Possible values: All or Lowest. The RMSD values are compared
1720 against all the selected conformations or the lowest energy conformation during
1721 'All' and 'Lowest' value of '--energyRMSDCutoffMode'. This option is ignored
1722 during zero value of '--energyRMSDCutoff'.
1723
1724 By default, only those conformations which all different from all selected
1725 low energy conformations by the specified RMSD cutoff and are with in the
1726 specified energy window are kept.
1727 --energyWindow <number> [default: auto]
1728 Psi4 Energy window for selecting conformers after the final energy minimization.
1729 The default value is dependent on '--energyUnits': 20 kcal/mol, 83.68 kJ/mol,
1730 0.8673 ev, or 0.03188 Hartrees. The specified value must be in '--energyUnits'.
1731 -e, --examples
1732 Print examples.
1733 -h, --help
1734 Print this help message.
1735 -i, --infile <infile>
1736 Input file name.
1737 --infileParams <Name,Value,...> [default: auto]
1738 A comma delimited list of parameter name and value pairs for reading
1739 molecules from files. The supported parameter names for different file
1740 formats, along with their default values, are shown below:
1741
1742 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
1743
1744 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
1745 smilesTitleLine,auto,sanitize,yes
1746
1747 Possible values for smilesDelimiter: space, comma or tab.
1748 --maxIters <number> [default: 50]
1749 Maximum number of iterations to perform for each molecule or conformer
1750 during energy minimization by a quantum chemistry method.
1751 -m, --methodName <text> [default: auto]
1752 Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ].
1753 The specified value must be a valid Psi4 method name. No validation is
1754 performed.
1755
1756 The following list shows a representative sample of methods available
1757 in Psi4:
1758
1759 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
1760 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05,
1761 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
1762 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
1763
1764 --mcsParams <Name,Value,...> [default: auto]
1765 Parameter values to use for identifying a maximum common substructure
1766 (MCS) in between a pair of reference and input molecules.In general, it is a
1767 comma delimited list of parameter name and value pairs. The supported
1768 parameter names along with their default values are shown below:
1769
1770 atomCompare,CompareElements,bondCompare,CompareOrder,
1771 maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
1772 minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
1773 completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
1774
1775 Possible values for atomCompare: CompareAny, CompareElements,
1776 CompareIsotopes. Possible values for bondCompare: CompareAny,
1777 CompareOrder, CompareOrderExact.
1778
1779 A brief description of MCS parameters taken from RDKit documentation is
1780 as follows:
1781
1782 atomCompare - Controls match between two atoms
1783 bondCompare - Controls match between two bonds
1784 maximizeBonds - Maximize number of bonds instead of atoms
1785 matchValences - Include atom valences in the MCS match
1786 matchChiralTag - Include atom chirality in the MCS match
1787 minNumAtoms - Minimum number of atoms in the MCS match
1788 minNumBonds - Minimum number of bonds in the MCS match
1789 ringMatchesRingOnly - Ring bonds only match other ring bonds
1790 completeRingsOnly - Partial rings not allowed during the match
1791 threshold - Fraction of the dataset that must contain the MCS
1792 seedSMARTS - SMARTS string as the seed of the MCS
1793 timeout - Timeout for the MCS calculation in seconds
1794
1795 --mp <yes or no> [default: no]
1796 Use multiprocessing.
1797
1798 By default, input data is retrieved in a lazy manner via mp.Pool.imap()
1799 function employing lazy RDKit data iterable. This allows processing of
1800 arbitrary large data sets without any additional requirements memory.
1801
1802 All input data may be optionally loaded into memory by mp.Pool.map()
1803 before starting worker processes in a process pool by setting the value
1804 of 'inputDataMode' to 'InMemory' in '--mpParams' option.
1805
1806 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
1807 data mode may adversely impact the performance. The '--mpParams' section
1808 provides additional information to tune the value of 'chunkSize'.
1809 --mpLevel <Molecules or Conformers> [default: Molecules]
1810 Perform multiprocessing at molecules or conformers level. Possible values:
1811 Molecules or Conformers. The 'Molecules' value starts a process pool at the
1812 molecules level. All conformers of a molecule are processed in a single
1813 process. The 'Conformers' value, however, starts a process pool at the
1814 conformers level. Each conformer of a molecule is processed in an individual
1815 process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
1816 using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
1817 a large number of Psi4 output files.
1818 --mpParams <Name,Value,...> [default: auto]
1819 A comma delimited list of parameter name and value pairs to configure
1820 multiprocessing.
1821
1822 The supported parameter names along with their default and possible
1823 values are shown below:
1824
1825 chunkSize, auto
1826 inputDataMode, Lazy [ Possible values: InMemory or Lazy ]
1827 numProcesses, auto [ Default: mp.cpu_count() ]
1828
1829 These parameters are used by the following functions to configure and
1830 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
1831 mp.Pool.imap().
1832
1833 The chunkSize determines chunks of input data passed to each worker
1834 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
1835 The default value of chunkSize is dependent on the value of 'inputDataMode'.
1836
1837 The mp.Pool.map() function, invoked during 'InMemory' input data mode,
1838 automatically converts RDKit data iterable into a list, loads all data into
1839 memory, and calculates the default chunkSize using the following method
1840 as shown in its code:
1841
1842 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
1843 if extra: chunkSize += 1
1844
1845 For example, the default chunkSize will be 7 for a pool of 4 worker processes
1846 and 100 data items.
1847
1848 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
1849 'lazy' RDKit data iterable to retrieve data as needed, without loading all the
1850 data into memory. Consequently, the size of input data is not known a priori.
1851 It's not possible to estimate an optimal value for the chunkSize. The default
1852 chunkSize is set to 1.
1853
1854 The default value for the chunkSize during 'Lazy' data mode may adversely
1855 impact the performance due to the overhead associated with exchanging
1856 small chunks of data. It is generally a good idea to explicitly set chunkSize to
1857 a larger value during 'Lazy' input data mode, based on the size of your input
1858 data and number of processes in the process pool.
1859
1860 The mp.Pool.map() function waits for all worker processes to process all
1861 the data and return the results. The mp.Pool.imap() function, however,
1862 returns the the results obtained from worker processes as soon as the
1863 results become available for specified chunks of data.
1864
1865 The order of data in the results returned by both mp.Pool.map() and
1866 mp.Pool.imap() functions always corresponds to the input data.
1867 -o, --outfile <outfile>
1868 Output file name.
1869 --outfileParams <Name,Value,...> [default: auto]
1870 A comma delimited list of parameter name and value pairs for writing
1871 molecules to files. The supported parameter names for different file
1872 formats, along with their default values, are shown below:
1873
1874 SD: kekulize,yes,forceV3000,no
1875
1876 --overwrite
1877 Overwrite existing files.
1878 --precision <number> [default: 6]
1879 Floating point precision for writing energy values.
1880 --psi4OptionsParams <Name,Value,...> [default: none]
1881 A comma delimited list of Psi4 option name and value pairs for setting
1882 global and module options. The names are 'option_name' for global options
1883 and 'module_name__option_name' for options local to a module. The
1884 specified option names must be valid Psi4 names. No validation is
1885 performed.
1886
1887 The specified option name and value pairs are processed and passed to
1888 psi4.set_options() as a dictionary. The supported value types are float,
1889 integer, boolean, or string. The float value string is converted into a float.
1890 The valid values for a boolean string are yes, no, true, false, on, or off.
1891 --psi4RunParams <Name,Value,...> [default: auto]
1892 A comma delimited list of parameter name and value pairs for configuring
1893 Psi4 jobs.
1894
1895 The supported parameter names along with their default and possible
1896 values are shown below:
1897
1898 MemoryInGB, 1
1899 NumThreads, 1
1900 OutputFile, auto [ Possible values: stdout, quiet, or FileName ]
1901 ScratchDir, auto [ Possivle values: DirName]
1902 RemoveOutputFile, yes [ Possible values: yes, no, true, or false]
1903
1904 These parameters control the runtime behavior of Psi4.
1905
1906 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
1907 is appended to output file name during multiprocessing as shown:
1908 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
1909 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
1910 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during
1911 'Conformers' of '--mpLevel' option.
1912
1913 The default 'Yes' value of 'RemoveOutputFile' option forces the removal
1914 of any existing Psi4 before creating new files to append output from
1915 multiple Psi4 runs.
1916
1917 The option 'ScratchDir' is a directory path to the location of scratch
1918 files. The default value corresponds to Psi4 default. It may be used to
1919 override the deafult path.
1920 -q, --quiet <yes or no> [default: no]
1921 Use quiet mode. The warning and information messages will not be printed.
1922 -r, --reffile <reffile>
1923 Reference input file name containing a 3D reference molecule. A common
1924 core scaffold must be present in a pair of an input and reference molecules.
1925 Otherwise, no constrained minimization is performed on the input molecule.
1926 --reference <text> [default: auto]
1927 Reference wave function to use for energy calculation. Default: RHF or UHF.
1928 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
1929 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
1930 molecules with unpaired electrons.
1931
1932 The specified value must be a valid Psi4 reference wave function. No validation
1933 is performed. For example: ROHF, CUHF, RKS, etc.
1934
1935 The spin multiplicity determines the default value of reference wave function
1936 for input molecules. It is calculated from number of free radical electrons using
1937 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
1938 electron spin. The total spin is 1/2 the number of free radical electrons in a
1939 molecule. The value of 'SpinMultiplicity' molecule property takes precedence
1940 over the calculated value of spin multiplicity.
1941 -s, --scaffold <auto or SMARTS> [default: auto]
1942 Common core scaffold between a pair of input and reference molecules used for
1943 constrained minimization of molecules in input file. Possible values: Auto or a
1944 valid SMARTS pattern. The common core scaffold is automatically detected
1945 corresponding to the Maximum Common Substructure (MCS) between a pair of
1946 reference and input molecules. A valid SMARTS pattern may be optionally specified
1947 for the common core scaffold.
1948 --scaffoldRMSDOut <yes or no> [default: No]
1949 Write out RMSD value for common core alignment between a pair of input and
1950 reference molecules.
1951 -w, --workingdir <dir>
1952 Location of working directory which defaults to the current directory.
1953
1954 Examples:
1955 To generate conformers by performing constrained energy minimization for molecules
1956 in a SMILES file against a reference 3D molecule in a SD file using a common core
1957 scaffold between pairs of input and reference molecules identified using MCS,
1958 generating up to 50 conformations using ETKDGv2 methodology followed by initial
1959 MMFF forcefield minimization and final energy minimization using B3LYP/6-31G**
1960 and B3LYP/6-31+G** for non-sulfur and sulfur containing molecules, applying
1961 energy RMSD cutoff of 0.5 and energy window value value of 20 kcal/mol, and
1962 write out a SD file:
1963
1964 % Psi4GenerateConstrainedConformers.py -i Psi4SampleAlkanes.smi
1965 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
1966
1967 To run the first example in a quiet mode and write out a SD file, type:
1968
1969 % Psi4GenerateConstrainedConformers.py -q yes -i Psi4SampleAlkanes.smi
1970 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
1971
1972 To rerun the first example in multiprocessing mode on all available CPUs
1973 without loading all data into memory and write out a SD file, type:
1974
1975 % Psi4GenerateConstrainedConformers.py --mp yes
1976 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1977 -o Psi4SampleAlkanesOut.sdf
1978
1979 To run the first example in multiprocessing mode on all available CPUs
1980 by loading all data into memory and write out a SD file, type:
1981
1982 % Psi4GenerateConstrainedConformers.py --mp yes --mpParams
1983 "inputDataMode,InMemory"-i Psi4SampleAlkanes.smi
1984 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
1985
1986 To rerun the first example in multiprocessing mode on specific number of
1987 CPUs and chunk size without loading all data into memory and write out a SD file,
1988 type:
1989
1990 % Psi4GenerateConstrainedConformers.py --mp yes --mpParams
1991 "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
1992 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1993 -o Psi4SampleAlkanesOut.sdf
1994
1995 To rerun the first example using an explicit SMARTS string for a common core
1996 scaffold and write out a SD file, type:
1997
1998 % Psi4GenerateConstrainedConformers.py --scaffold "CC"
1999 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
2000 -o Psi4SampleAlkanesOut.sdf
2001
2002 To run the first example using a specific set of parameters for generating
2003 an initial set of conformers followed by energy minimization using forcefield
2004 and a quantum chemistry method and write out a SD file type:
2005
2006 % Psi4GenerateConstrainedConformers.py --confParams "confMethod,
2007 ETKDGv2, forceField,MMFF, forceFieldMMFFVariant,MMFF94s, maxConfs,20,
2008 embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
2009 -b "6-31+G**" --maxIters 20 --energyRMSDCutoff 0.5
2010 --energyRMSDCutoffMode All -i Psi4SampleAlkanes.sdf
2011 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
2012
2013 To rerun the first example using molecules in a CSV SMILES file, SMILES
2014 strings in column 1, name in column2, and write out a SD file, type:
2015
2016 % Psi4GenerateConstrainedConformers.py --infileParams
2017 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
2018 smilesNameColumn,2" -i Psi4SampleAlkanes.csv
2019 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
2020
2021 Author:
2022
2023 Manish Sud(msud@san.rr.com)
2024
2025 See also:
2026 Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py,
2027 Psi4PerformConstrainedMinimization.py, Psi4PerformMinimization.py
2028
2029 Copyright:
2030 Copyright (C) 2026 Manish Sud. All rights reserved.
2031
2032 The functionality available in this script is implemented using Psi4, an
2033 open source quantum chemistry software package, and RDKit, an open
2034 source toolkit for cheminformatics developed by Greg Landrum.
2035
2036 This file is part of MayaChemTools.
2037
2038 MayaChemTools is free software; you can redistribute it and/or modify it under
2039 the terms of the GNU Lesser General Public License as published by the Free
2040 Software Foundation; either version 3 of the License, or (at your option) any
2041 later version.
2042
2043 """
2044
2045 if __name__ == "__main__":
2046 main()