1 #!/bin/env python
2 #
3 # File: Psi4PerformConstrainedMinimization.py
4 # Author: Manish Sud <msud@san.rr.com>
5 #
6 # Copyright (C) 2026 Manish Sud. All rights reserved.
7 #
8 # The functionality available in this script is implemented using Psi4, an
9 # open source quantum chemistry software package, and RDKit, an open
10 # source toolkit for cheminformatics developed by Greg Landrum.
11 #
12 # This file is part of MayaChemTools.
13 #
14 # MayaChemTools is free software; you can redistribute it and/or modify it under
15 # the terms of the GNU Lesser General Public License as published by the Free
16 # Software Foundation; either version 3 of the License, or (at your option) any
17 # later version.
18 #
19 # MayaChemTools is distributed in the hope that it will be useful, but without
20 # any warranty; without even the implied warranty of merchantability of fitness
21 # for a particular purpose. See the GNU Lesser General Public License for more
22 # details.
23 #
24 # You should have received a copy of the GNU Lesser General Public License
25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
27 # Boston, MA, 02111-1307, USA.
28 #
29
30 from __future__ import print_function
31
32 import os
33 import sys
34 import time
35 import re
36 import shutil
37 import multiprocessing as mp
38
39 # Psi4 imports...
40 if hasattr(shutil, "which") and shutil.which("psi4") is None:
41 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
42 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
43 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
44 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
45 sys.stderr.write("Psi4 environment and try again.\n\n")
46
47 # RDKit imports...
48 try:
49 from rdkit import rdBase
50 from rdkit import Chem
51 from rdkit.Chem import AllChem, rdMolAlign
52 from rdkit.Chem import rdFMCS
53 except ImportError as ErrMsg:
54 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
55 sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
56 sys.exit(1)
57
58 # MayaChemTools imports...
59 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
60 try:
61 from docopt import docopt
62 import MiscUtil
63 import Psi4Util
64 import RDKitUtil
65 except ImportError as ErrMsg:
66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
68 sys.exit(1)
69
70 ScriptName = os.path.basename(sys.argv[0])
71 Options = {}
72 OptionsInfo = {}
73
74
75 def main():
76 """Start execution of the script."""
77
78 MiscUtil.PrintInfo(
79 "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
80 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
81 )
82
83 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
84
85 # Retrieve command line arguments and options...
86 RetrieveOptions()
87
88 # Process and validate command line arguments and options...
89 ProcessOptions()
90
91 # Perform actions required by the script...
92 PerformConstrainedMinimization()
93
94 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
95 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
96
97
98 def PerformConstrainedMinimization():
99 """Perform constrained minimization."""
100
101 # Read and validate reference molecule...
102 RefMol = RetrieveReferenceMolecule()
103
104 # Setup a molecule reader for input file...
105 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
106 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
107
108 # Set up a molecule writer...
109 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
110 if Writer is None:
111 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
112 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
113
114 MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(
115 RefMol, Mols, Writer
116 )
117
118 if Writer is not None:
119 Writer.close()
120
121 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
122 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
123 MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount)
124 MiscUtil.PrintInfo(
125 "Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount
126 )
127 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
128 MiscUtil.PrintInfo(
129 "Number of ignored molecules: %d"
130 % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount + WriteFailedCount)
131 )
132
133
134 def ProcessMolecules(RefMol, Mols, Writer):
135 """Process and minimize molecules."""
136
137 if OptionsInfo["MPMode"]:
138 return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer)
139 else:
140 return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer)
141
142
143 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer):
144 """Process and minimize molecules using a single process."""
145
146 # Intialize Psi4...
147 MiscUtil.PrintInfo("\nInitializing Psi4...")
148 Psi4Handle = Psi4Util.InitializePsi4(
149 Psi4RunParams=OptionsInfo["Psi4RunParams"],
150 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
151 PrintVersion=True,
152 PrintHeader=True,
153 )
154 OptionsInfo["psi4"] = Psi4Handle
155
156 # Setup max iterations global variable...
157 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
158
159 # Setup conversion factor for energy units...
160 SetupEnergyConversionFactor(Psi4Handle)
161
162 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
163
164 for Mol in Mols:
165 MolCount += 1
166
167 if not CheckAndValidateMolecule(Mol, MolCount):
168 continue
169
170 # Setup 2D coordinates for SMILES input file...
171 if OptionsInfo["SMILESInfileStatus"]:
172 AllChem.Compute2DCoords(Mol)
173
174 ValidMolCount += 1
175
176 # Setup a reference molecule core containing common scaffold atoms...
177 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
178 if RefMolCore is None:
179 CoreScaffoldMissingCount += 1
180 continue
181
182 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolCount)
183
184 if not CalcStatus:
185 MinimizationFailedCount += 1
186 continue
187
188 WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
189
190 if not WriteStatus:
191 WriteFailedCount += 1
192
193 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
194
195
196 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer):
197 """Process and minimize molecules using multiprocessing."""
198
199 if OptionsInfo["MPLevelConformersMode"]:
200 return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer)
201 elif OptionsInfo["MPLevelMoleculesMode"]:
202 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer)
203 else:
204 MiscUtil.PrintError('The value, %s, option "--mpLevel" is not supported.' % (OptionsInfo["MPLevel"]))
205
206
207 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer):
208 """Process and minimize molecules using multiprocessing at molecules level."""
209
210 MiscUtil.PrintInfo(
211 "\nPerforming constrained minimization with generation of conformers using multiprocessing at molecules level..."
212 )
213
214 MPParams = OptionsInfo["MPParams"]
215
216 # Setup data for initializing a worker process...
217 OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol)
218 InitializeWorkerProcessArgs = (
219 MiscUtil.ObjectToBase64EncodedString(Options),
220 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
221 )
222
223 # Setup a encoded mols data iterable for a worker process...
224 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
225
226 # Setup process pool along with data initialization for each process...
227 if not OptionsInfo["QuietMode"]:
228 MiscUtil.PrintInfo(
229 "\nConfiguring multiprocessing using %s method..."
230 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
231 )
232 MiscUtil.PrintInfo(
233 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
234 % (
235 MPParams["NumProcesses"],
236 MPParams["InputDataMode"],
237 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
238 )
239 )
240
241 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
242
243 # Start processing...
244 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
245 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
246 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
247 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
248 else:
249 MiscUtil.PrintError(
250 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
251 )
252
253 # Print out Psi4 version in the main process...
254 MiscUtil.PrintInfo("\nInitializing Psi4...\n")
255 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
256 OptionsInfo["psi4"] = Psi4Handle
257
258 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
259
260 for Result in Results:
261 MolCount += 1
262 MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD = Result
263
264 if EncodedMol is None:
265 continue
266 ValidMolCount += 1
267
268 if CoreScaffoldMissingStatus:
269 CoreScaffoldMissingStatus += 1
270 continue
271
272 if not CalcStatus:
273 MinimizationFailedCount += 1
274 continue
275
276 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
277 WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
278
279 if not WriteStatus:
280 WriteFailedCount += 1
281
282 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
283
284
285 def InitializeWorkerProcess(*EncodedArgs):
286 """Initialize data for a worker process."""
287
288 global Options, OptionsInfo
289
290 if not OptionsInfo["QuietMode"]:
291 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
292
293 # Decode Options and OptionInfo...
294 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
295 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
296
297 # Decode RefMol...
298 OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"])
299
300 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
301 # output files for each process...
302 OptionsInfo["Psi4Initialized"] = False
303
304
305 def WorkerProcess(EncodedMolInfo):
306 """Process data for a worker process."""
307
308 if not OptionsInfo["Psi4Initialized"]:
309 InitializePsi4ForWorkerProcess()
310
311 MolIndex, EncodedMol = EncodedMolInfo
312
313 MolNum = MolIndex + 1
314 CoreScaffoldMissingStatus = False
315 CalcStatus = False
316 Energy = None
317 ScaffoldEmbedRMSD = None
318
319 if EncodedMol is None:
320 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
321
322 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
323 if not CheckAndValidateMolecule(Mol, MolNum):
324 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD]
325
326 # Setup 2D coordinates for SMILES input file...
327 if OptionsInfo["SMILESInfileStatus"]:
328 AllChem.Compute2DCoords(Mol)
329
330 # Setup a reference molecule core containing common scaffold atoms...
331 RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum)
332 if RefMolCore is None:
333 CoreScaffoldMissingStatus = True
334 return [MolIndex, EncodedMol, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD]
335
336 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(
337 OptionsInfo["psi4"], Mol, RefMolCore, MolNum
338 )
339
340 return [
341 MolIndex,
342 RDKitUtil.MolToBase64EncodedMolString(
343 Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
344 ),
345 CoreScaffoldMissingStatus,
346 CalcStatus,
347 Energy,
348 ScaffoldEmbedRMSD,
349 ]
350
351
352 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer):
353 """Process and minimize molecules using multiprocessing at conformers level."""
354
355 MiscUtil.PrintInfo(
356 "\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level..."
357 )
358
359 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5
360
361 for Mol in Mols:
362 MolCount += 1
363
364 if not CheckAndValidateMolecule(Mol, MolCount):
365 continue
366
367 # Setup 2D coordinates for SMILES input file...
368 if OptionsInfo["SMILESInfileStatus"]:
369 AllChem.Compute2DCoords(Mol)
370
371 ValidMolCount += 1
372
373 # Setup a reference molecule core containing common scaffold atoms...
374 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount)
375 if RefMolCore is None:
376 CoreScaffoldMissingCount += 1
377 continue
378
379 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount)
380
381 if not CalcStatus:
382 MinimizationFailedCount += 1
383 continue
384
385 WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD)
386
387 if not WriteStatus:
388 WriteFailedCount += 1
389
390 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount)
391
392
393 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum):
394 """Generate conformers and minimize them using multiple processes."""
395
396 # Add hydrogens...
397 Mol = Chem.AddHs(Mol, addCoords=True)
398
399 MolName = RDKitUtil.GetMolName(Mol, MolNum)
400
401 # Setup constrained conformers...
402 MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
403 if not MolConfsStatus:
404 if not OptionsInfo["QuietMode"]:
405 MiscUtil.PrintWarning(
406 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
407 % MolName
408 )
409 return (Mol, False, None, None)
410
411 MPParams = OptionsInfo["MPParams"]
412
413 # Setup data for initializing a worker process...
414 OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore)
415 InitializeWorkerProcessArgs = (
416 MiscUtil.ObjectToBase64EncodedString(Options),
417 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
418 )
419
420 # Setup a encoded mols data iterable for a worker process...
421 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(MolConfs)
422
423 # Setup process pool along with data initialization for each process...
424 if not OptionsInfo["QuietMode"]:
425 MiscUtil.PrintInfo(
426 "\nConfiguring multiprocessing using %s method..."
427 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
428 )
429 MiscUtil.PrintInfo(
430 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
431 % (
432 MPParams["NumProcesses"],
433 MPParams["InputDataMode"],
434 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
435 )
436 )
437
438 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
439
440 # Start processing...
441 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
442 Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
443 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
444 Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
445 else:
446 MiscUtil.PrintError(
447 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
448 )
449
450 ConfNums = []
451 MolConfsMap = {}
452 CalcEnergyMap = {}
453 CalcFailedCount = 0
454
455 for Result in Results:
456 ConfNum, EncodedMolConf, CalcStatus, Energy = Result
457
458 if EncodedMolConf is None:
459 CalcFailedCount += 1
460 continue
461
462 if not CalcStatus:
463 CalcFailedCount += 1
464 continue
465
466 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
467
468 ConfNums.append(ConfNum)
469 CalcEnergyMap[ConfNum] = Energy
470 MolConfsMap[ConfNum] = MolConf
471
472 if CalcFailedCount:
473 return (Mol, False, None, None)
474
475 SortedConfNums = sorted(ConfNums, key=lambda ConfNum: CalcEnergyMap[ConfNum])
476 MinEnergyConfNum = SortedConfNums[0]
477
478 MinEnergy = (
479 "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum]) if OptionsInfo["EnergyOut"] else None
480 )
481 MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
482
483 ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp("EmbedRMS")) if OptionsInfo["ScaffoldRMSDOut"] else None
484 MinEnergyMolConf.ClearProp("EmbedRMS")
485
486 return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
487
488
489 def InitializeConformerWorkerProcess(*EncodedArgs):
490 """Initialize data for a conformer worker process."""
491
492 global Options, OptionsInfo
493
494 if not OptionsInfo["QuietMode"]:
495 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
496
497 # Decode Options and OptionInfo...
498 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
499 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
500
501 # Decode RefMol...
502 OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"])
503
504 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
505 # output files for each process...
506 OptionsInfo["Psi4Initialized"] = False
507
508
509 def ConformerWorkerProcess(EncodedMolInfo):
510 """Process data for a conformer worker process."""
511
512 if not OptionsInfo["Psi4Initialized"]:
513 InitializePsi4ForWorkerProcess()
514
515 MolConfIndex, EncodedMolConf = EncodedMolInfo
516
517 MolConfNum = MolConfIndex
518 CalcStatus = False
519 Energy = None
520
521 if EncodedMolConf is None:
522 return [MolConfIndex, None, CalcStatus, Energy]
523
524 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf)
525 MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum)
526
527 if not OptionsInfo["QuietMode"]:
528 MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum))
529
530 # Perform Psi4 constrained minimization...
531 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(
532 OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum
533 )
534 if not CalcStatus:
535 if not OptionsInfo["QuietMode"]:
536 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolConfName))
537 return [MolConfIndex, EncodedMolConf, CalcStatus, Energy]
538
539 return [
540 MolConfIndex,
541 RDKitUtil.MolToBase64EncodedMolString(
542 MolConf, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
543 ),
544 CalcStatus,
545 Energy,
546 ]
547
548
549 def InitializePsi4ForWorkerProcess():
550 """Initialize Psi4 for a worker process."""
551
552 if OptionsInfo["Psi4Initialized"]:
553 return
554
555 OptionsInfo["Psi4Initialized"] = True
556
557 if OptionsInfo["MPLevelConformersMode"] and re.match(
558 "auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I
559 ):
560 # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
561 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
562 else:
563 # Update output file...
564 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
565 OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
566 )
567
568 # Intialize Psi4...
569 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
570 Psi4RunParams=OptionsInfo["Psi4RunParams"],
571 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
572 PrintVersion=False,
573 PrintHeader=True,
574 )
575
576 # Setup max iterations global variable...
577 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
578
579 # Setup conversion factor for energy units...
580 SetupEnergyConversionFactor(OptionsInfo["psi4"])
581
582
583 def RetrieveReferenceMolecule():
584 """Retrieve and validate reference molecule."""
585
586 RefFile = OptionsInfo["RefFile"]
587
588 MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile))
589 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False
590 ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(
591 RefFile, **OptionsInfo["InfileParams"]
592 )
593
594 if ValidRefMolCount == 0:
595 MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile)
596 elif ValidRefMolCount > 1:
597 MiscUtil.PrintWarning(
598 "The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..."
599 % (RefFile, ValidRefMolCount)
600 )
601
602 RefMol = ValidRefMols[0]
603
604 if OptionsInfo["UseScaffoldSMARTS"]:
605 ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
606 if ScaffoldPatternMol is None:
607 MiscUtil.PrintError(
608 'Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is not valid.'
609 % (OptionsInfo["ScaffoldSMARTS"])
610 )
611
612 if not RefMol.HasSubstructMatch(ScaffoldPatternMol):
613 MiscUtil.PrintError(
614 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option, is missing in the first valid reference molecule.'
615 % (OptionsInfo["ScaffoldSMARTS"])
616 )
617
618 return RefMol
619
620
621 def SetupCoreScaffold(RefMol, Mol, MolCount):
622 """Setup a reference molecule core containing common scaffold atoms between
623 a pair of molecules."""
624
625 if OptionsInfo["UseScaffoldMCS"]:
626 return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount)
627 elif OptionsInfo["UseScaffoldSMARTS"]:
628 return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount)
629 else:
630 MiscUtil.PrintError(
631 'The value, %s, specified for "-s, --scaffold" option is not supported.' % (OptionsInfo["Scaffold"])
632 )
633
634
635 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount):
636 """Setup a reference molecule core containing common scaffold atoms between
637 a pair of molecules using MCS."""
638
639 MCSParams = OptionsInfo["MCSParams"]
640 Mols = [RefMol, Mol]
641
642 MCSResultObject = rdFMCS.FindMCS(
643 Mols,
644 maximizeBonds=MCSParams["MaximizeBonds"],
645 threshold=MCSParams["Threshold"],
646 timeout=MCSParams["TimeOut"],
647 verbose=MCSParams["Verbose"],
648 matchValences=MCSParams["MatchValences"],
649 ringMatchesRingOnly=MCSParams["RingMatchesRingOnly"],
650 completeRingsOnly=MCSParams["CompleteRingsOnly"],
651 matchChiralTag=MCSParams["MatchChiralTag"],
652 atomCompare=MCSParams["AtomCompare"],
653 bondCompare=MCSParams["BondCompare"],
654 seedSmarts=MCSParams["SeedSMARTS"],
655 )
656
657 if MCSResultObject.canceled:
658 if not OptionsInfo["QuietMode"]:
659 MiscUtil.PrintWarning(
660 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
661 % (RDKitUtil.GetMolName(Mol, MolCount))
662 )
663 return None
664
665 CoreNumAtoms = MCSResultObject.numAtoms
666 CoreNumBonds = MCSResultObject.numBonds
667
668 SMARTSCore = MCSResultObject.smartsString
669
670 if not len(SMARTSCore):
671 if not OptionsInfo["QuietMode"]:
672 MiscUtil.PrintWarning(
673 'MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using "-m, --mcsParams" option and try again.'
674 % (RDKitUtil.GetMolName(Mol, MolCount))
675 )
676 return None
677
678 if CoreNumAtoms < MCSParams["MinNumAtoms"]:
679 if not OptionsInfo["QuietMode"]:
680 MiscUtil.PrintWarning(
681 'Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumAtoms" parameter in "-m, --mcsParams" option.'
682 % (CoreNumAtoms, MCSParams["MinNumAtoms"])
683 )
684 return None
685
686 if CoreNumBonds < MCSParams["MinNumBonds"]:
687 if not OptionsInfo["QuietMode"]:
688 MiscUtil.PrintWarning(
689 'Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by "minNumBonds" parameter in "-m, --mcsParams" option.'
690 % (CoreNumBonds, MCSParams["MinNumBonds"])
691 )
692 return None
693
694 return GenerateCoreMol(RefMol, SMARTSCore)
695
696
697 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount):
698 """Setup a reference molecule core containing common scaffold atoms between
699 a pair of molecules using specified SMARTS."""
700
701 if OptionsInfo["ScaffoldPatternMol"] is None:
702 OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"])
703
704 if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]):
705 if not OptionsInfo["QuietMode"]:
706 MiscUtil.PrintWarning(
707 'The scaffold SMARTS pattern, %s, specified using "-s, --scaffold" option is missing in input molecule, %s.'
708 % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))
709 )
710 return None
711
712 return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"])
713
714
715 def GenerateCoreMol(RefMol, SMARTSCore):
716 """Generate core molecule for embedding."""
717
718 # Create a molecule corresponding to core atoms...
719 SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore)
720
721 # Setup a ref molecule containing core atoms with dummy atoms as
722 # attachment points for atoms around the core atoms...
723 Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol)
724
725 # Delete any substructures containing dummy atoms..
726 RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles("*"))
727 RefMolCore.UpdatePropertyCache()
728
729 return RefMolCore
730
731
732 def ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolNum=None):
733 """Constrain and minimize molecule."""
734
735 # Add hydrogens...
736 Mol = Chem.AddHs(Mol, addCoords=True)
737
738 MolName = RDKitUtil.GetMolName(Mol, MolNum)
739
740 # Setup constrained conformers...
741 MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum)
742 if not MolConfsStatus:
743 if not OptionsInfo["QuietMode"]:
744 MiscUtil.PrintWarning(
745 "Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n"
746 % MolName
747 )
748 return (Mol, False, None, None)
749
750 # Minimize conformers...
751 ConfNums = []
752 CalcEnergyMap = {}
753 MolConfsMap = {}
754
755 for ConfNum, MolConf in enumerate(MolConfs):
756 if not OptionsInfo["QuietMode"]:
757 MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, ConfNum))
758
759 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum)
760 if not CalcStatus:
761 if not OptionsInfo["QuietMode"]:
762 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
763 return (Mol, False, None, None)
764
765 ConfNums.append(ConfNum)
766 CalcEnergyMap[ConfNum] = Energy
767 MolConfsMap[ConfNum] = MolConf
768
769 SortedConfNums = sorted(ConfNums, key=lambda ConfNum: CalcEnergyMap[ConfNum])
770 MinEnergyConfNum = SortedConfNums[0]
771
772 MinEnergy = (
773 "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum]) if OptionsInfo["EnergyOut"] else None
774 )
775 MinEnergyMolConf = MolConfsMap[MinEnergyConfNum]
776
777 ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp("EmbedRMS")) if OptionsInfo["ScaffoldRMSDOut"] else None
778 MinEnergyMolConf.ClearProp("EmbedRMS")
779
780 return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD)
781
782
783 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID=-1):
784 """Minimize molecule using Psi4."""
785
786 # Setup a list for constrained atoms...
787 ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore)
788 if len(ConstrainedAtomIndices) == 0:
789 return (False, None)
790
791 # Setup a Psi4Mol...
792 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
793 if Psi4Mol is None:
794 return (False, None)
795
796 # Setup reference wave function...
797 Reference = SetupReferenceWavefunction(Mol)
798 Psi4Handle.set_options({"Reference": Reference})
799
800 # Setup method name and basis set...
801 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
802
803 # Setup freeze list for constrained atoms...
804 FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices]
805 FreezeXYZString = " %s " % " ".join(FreezeXYZList)
806 Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString})
807
808 # Optimize geometry...
809 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(
810 Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
811 )
812
813 if not Status:
814 PerformPsi4Cleanup(Psi4Handle)
815 return (False, None)
816
817 # Update atom positions...
818 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms=True)
819 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
820
821 # Convert energy units...
822 if OptionsInfo["ApplyEnergyConversionFactor"]:
823 Energy = Energy * OptionsInfo["EnergyConversionFactor"]
824
825 # Clean up
826 PerformPsi4Cleanup(Psi4Handle)
827
828 return (True, Energy)
829
830
831 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum=None):
832 """Constrain, embed, and minimize molecule."""
833
834 # Setup forcefield function to use for constrained minimization...
835 ForceFieldFunction = None
836 ForceFieldName = None
837 if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
838 ForceFieldFunction = lambda mol, confId=-1: AllChem.UFFGetMoleculeForceField(mol, confId=confId)
839 ForceFieldName = "UFF"
840 else:
841 ForceFieldFunction = lambda mol, confId=-1: AllChem.MMFFGetMoleculeForceField(
842 mol,
843 AllChem.MMFFGetMoleculeProperties(mol, mmffVariant=OptionsInfo["ConfGenerationParams"]["MMFFVariant"]),
844 confId=confId,
845 )
846 ForceFieldName = "MMFF"
847
848 if ForceFieldFunction is None:
849 if not OptionsInfo["QuietMode"]:
850 MiscUtil.PrintWarning(
851 "Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))
852 )
853 return (None, False)
854
855 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
856 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
857 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
858 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
859 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
860 UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"]
861
862 MolConfs = []
863 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)]
864
865 for ConfID in ConfIDs:
866 try:
867 MolConf = Chem.Mol(Mol)
868 AllChem.ConstrainedEmbed(
869 MolConf,
870 RefMolCore,
871 useTethers=UseTethers,
872 coreConfId=-1,
873 randomseed=ConfID,
874 getForceField=ForceFieldFunction,
875 enforceChirality=EnforceChirality,
876 useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
877 useBasicKnowledge=UseBasicKnowledge,
878 ETversion=ETVersion,
879 )
880 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
881 if not OptionsInfo["QuietMode"]:
882 MiscUtil.PrintWarning(
883 "Constrained embedding couldn't be performed for molecule %s:\n%s\n"
884 % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)
885 )
886 return (None, False)
887
888 MolConfs.append(MolConf)
889
890 return FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum)
891
892
893 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum=None):
894 """Filter contarained conformations by RMSD."""
895
896 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
897 if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0:
898 if not OptionsInfo["QuietMode"]:
899 MiscUtil.PrintInfo(
900 "\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: None; Size: %s"
901 % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))
902 )
903 return (MolConfs, True)
904
905 FirstMolConf = True
906 SelectedMolConfs = []
907 for MolConfIndex, MolConf in enumerate(MolConfs):
908 if FirstMolConf:
909 FirstMolConf = False
910 SelectedMolConfs.append(MolConf)
911 continue
912
913 # Compare RMSD against all selected conformers...
914 ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf))
915 IgnoreConf = False
916 for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs):
917 RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf))
918 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf)
919
920 if CalcRMSD < EmbedRMSDCutoff:
921 IgnoreConf = True
922 break
923
924 if IgnoreConf:
925 continue
926
927 SelectedMolConfs.append(MolConf)
928
929 if not OptionsInfo["QuietMode"]:
930 MiscUtil.PrintInfo(
931 "\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s"
932 % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))
933 )
934
935 return (SelectedMolConfs, True)
936
937
938 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens=False):
939 """Setup a list of atom indices to be constrained during Psi4 minimizaiton."""
940
941 AtomIndices = []
942
943 # Collect matched heavy atoms along with attached hydrogens...
944 for AtomIndex in Mol.GetSubstructMatch(RefMolCore):
945 Atom = Mol.GetAtomWithIdx(AtomIndex)
946 if Atom.GetAtomicNum() == 1:
947 continue
948
949 AtomIndices.append(AtomIndex)
950 for AtomNbr in Atom.GetNeighbors():
951 if AtomNbr.GetAtomicNum() == 1:
952 if ConstrainHydrogens:
953 AtomNbrIndex = AtomNbr.GetIdx()
954 AtomIndices.append(AtomNbrIndex)
955
956 AtomIndices = sorted(AtomIndices)
957
958 # Atom indices start from 1 for Psi4 instead 0 for RDKit...
959 AtomIndices = [AtomIndex + 1 for AtomIndex in AtomIndices]
960
961 return AtomIndices
962
963
964 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID=-1):
965 """Setup a Psi4 molecule object."""
966
967 # Turn off recentering and reorientation to perform optimization in the
968 # constrained coordinate space...
969 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=True, NoReorient=True)
970
971 try:
972 Psi4Mol = Psi4Handle.geometry(MolGeometry)
973 except Exception as ErrMsg:
974 Psi4Mol = None
975 if not OptionsInfo["QuietMode"]:
976 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
977 MolName = RDKitUtil.GetMolName(Mol, MolNum)
978 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
979
980 return Psi4Mol
981
982
983 def PerformPsi4Cleanup(Psi4Handle):
984 """Perform clean up."""
985
986 # Clean up after Psi4 run...
987 Psi4Handle.core.clean()
988
989 # Clean up any leftover scratch files...
990 if OptionsInfo["MPMode"]:
991 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
992
993
994 def CheckAndValidateMolecule(Mol, MolCount=None):
995 """Validate molecule for Psi4 calculations."""
996
997 if Mol is None:
998 if not OptionsInfo["QuietMode"]:
999 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
1000 return False
1001
1002 MolName = RDKitUtil.GetMolName(Mol, MolCount)
1003 if not OptionsInfo["QuietMode"]:
1004 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
1005
1006 if RDKitUtil.IsMolEmpty(Mol):
1007 if not OptionsInfo["QuietMode"]:
1008 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
1009 return False
1010
1011 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
1012 if not OptionsInfo["QuietMode"]:
1013 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
1014 return False
1015
1016 return True
1017
1018
1019 def SetupMethodNameAndBasisSet(Mol):
1020 """Setup method name and basis set."""
1021
1022 MethodName = OptionsInfo["MethodName"]
1023 if OptionsInfo["MethodNameAuto"]:
1024 MethodName = "B3LYP"
1025
1026 BasisSet = OptionsInfo["BasisSet"]
1027 if OptionsInfo["BasisSetAuto"]:
1028 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
1029
1030 return (MethodName, BasisSet)
1031
1032
1033 def SetupReferenceWavefunction(Mol):
1034 """Setup reference wavefunction."""
1035
1036 Reference = OptionsInfo["Reference"]
1037 if OptionsInfo["ReferenceAuto"]:
1038 Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
1039
1040 return Reference
1041
1042
1043 def SetupEnergyConversionFactor(Psi4Handle):
1044 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
1045
1046 EnergyUnits = OptionsInfo["EnergyUnits"]
1047
1048 ApplyConversionFactor = True
1049 if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
1050 ConversionFactor = Psi4Handle.constants.hartree2kcalmol
1051 elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
1052 ConversionFactor = Psi4Handle.constants.hartree2kJmol
1053 elif re.match("^eV$", EnergyUnits, re.I):
1054 ConversionFactor = Psi4Handle.constants.hartree2ev
1055 else:
1056 ApplyConversionFactor = False
1057 ConversionFactor = 1.0
1058
1059 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
1060 OptionsInfo["EnergyConversionFactor"] = ConversionFactor
1061
1062
1063 def WriteMolecule(
1064 Writer,
1065 Mol,
1066 MolNum=None,
1067 Energy=None,
1068 ScaffoldEmbedRMSD=None,
1069 ConfID=None,
1070 ):
1071 """Write molecule."""
1072
1073 if ScaffoldEmbedRMSD is not None:
1074 Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD)
1075
1076 if OptionsInfo["EnergyOut"] and Energy is not None:
1077 Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
1078
1079 try:
1080 if ConfID is None:
1081 Writer.write(Mol)
1082 else:
1083 Writer.write(Mol, confId=ConfID)
1084 except (ValueError, RuntimeError) as ErrMsg:
1085 if not OptionsInfo["QuietMode"]:
1086 MolName = RDKitUtil.GetMolName(Mol, MolNum)
1087 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
1088 return False
1089
1090 return True
1091
1092
1093 def ProcessMCSParameters():
1094 """Set up and process MCS parameters."""
1095
1096 SetupMCSParameters()
1097 ProcessSpecifiedMCSParameters()
1098
1099
1100 def SetupMCSParameters():
1101 """Set up default MCS parameters."""
1102
1103 OptionsInfo["MCSParams"] = {
1104 "MaximizeBonds": True,
1105 "Threshold": 0.9,
1106 "TimeOut": 3600,
1107 "Verbose": False,
1108 "MatchValences": True,
1109 "MatchChiralTag": False,
1110 "RingMatchesRingOnly": True,
1111 "CompleteRingsOnly": True,
1112 "AtomCompare": rdFMCS.AtomCompare.CompareElements,
1113 "BondCompare": rdFMCS.BondCompare.CompareOrder,
1114 "SeedSMARTS": "",
1115 "MinNumAtoms": 1,
1116 "MinNumBonds": 0,
1117 }
1118
1119
1120 def ProcessSpecifiedMCSParameters():
1121 """Process specified MCS parameters."""
1122
1123 if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I):
1124 # Nothing to process...
1125 return
1126
1127 # Parse specified parameters...
1128 MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"])
1129 if not MCSParams:
1130 MiscUtil.PrintError('No valid parameter name and value pairs specified using "-m, --mcsParams" option.')
1131
1132 MCSParamsWords = MCSParams.split(",")
1133 if len(MCSParamsWords) % 2:
1134 MiscUtil.PrintError(
1135 'The number of comma delimited paramater names and values, %d, specified using "-m, --mcsParams" option must be an even number.'
1136 % (len(MCSParamsWords))
1137 )
1138
1139 # Setup canonical parameter names...
1140 ValidParamNames = []
1141 CanonicalParamNamesMap = {}
1142 for ParamName in sorted(OptionsInfo["MCSParams"]):
1143 ValidParamNames.append(ParamName)
1144 CanonicalParamNamesMap[ParamName.lower()] = ParamName
1145
1146 # Validate and set paramater names and value...
1147 for Index in range(0, len(MCSParamsWords), 2):
1148 Name = MCSParamsWords[Index]
1149 Value = MCSParamsWords[Index + 1]
1150
1151 CanonicalName = Name.lower()
1152 if CanonicalName not in CanonicalParamNamesMap:
1153 MiscUtil.PrintError(
1154 'The parameter name, %s, specified using "-m, --mcsParams" option is not a valid name. Supported parameter names: %s'
1155 % (Name, " ".join(ValidParamNames))
1156 )
1157
1158 ParamName = CanonicalParamNamesMap[CanonicalName]
1159 if re.match("^Threshold$", ParamName, re.I):
1160 Value = float(Value)
1161 if Value <= 0.0 or Value > 1.0:
1162 MiscUtil.PrintError(
1163 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0'
1164 % (Value, Name)
1165 )
1166 ParamValue = Value
1167 elif re.match("^Timeout$", ParamName, re.I):
1168 Value = int(Value)
1169 if Value <= 0:
1170 MiscUtil.PrintError(
1171 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: > 0'
1172 % (Value, Name)
1173 )
1174 ParamValue = Value
1175 elif re.match("^MinNumAtoms$", ParamName, re.I):
1176 Value = int(Value)
1177 if Value < 1:
1178 MiscUtil.PrintError(
1179 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: >= 1'
1180 % (Value, Name)
1181 )
1182 ParamValue = Value
1183 elif re.match("^MinNumBonds$", ParamName, re.I):
1184 Value = int(Value)
1185 if Value < 0:
1186 MiscUtil.PrintError(
1187 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: >=0 '
1188 % (Value, Name)
1189 )
1190 ParamValue = Value
1191 elif re.match("^AtomCompare$", ParamName, re.I):
1192 if re.match("^CompareAny$", Value, re.I):
1193 ParamValue = rdFMCS.AtomCompare.CompareAny
1194 elif re.match("^CompareElements$", Value, re.I):
1195 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements
1196 elif re.match("^CompareIsotopes$", Value, re.I):
1197 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes
1198 else:
1199 MiscUtil.PrintError(
1200 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes'
1201 % (Value, Name)
1202 )
1203 elif re.match("^BondCompare$", ParamName, re.I):
1204 if re.match("^CompareAny$", Value, re.I):
1205 ParamValue = Chem.rdFMCS.BondCompare.CompareAny
1206 elif re.match("^CompareOrder$", Value, re.I):
1207 ParamValue = rdFMCS.BondCompare.CompareOrder
1208 elif re.match("^CompareOrderExact$", Value, re.I):
1209 ParamValue = rdFMCS.BondCompare.CompareOrderExact
1210 else:
1211 MiscUtil.PrintError(
1212 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact'
1213 % (Value, Name)
1214 )
1215 elif re.match("^SeedSMARTS$", ParamName, re.I):
1216 if not len(Value):
1217 MiscUtil.PrintError(
1218 'The parameter value specified using "-m, --mcsParams" option for parameter, %s, is empty. '
1219 % (Name)
1220 )
1221 ParamValue = Value
1222 else:
1223 if not re.match("^(Yes|No|True|False)$", Value, re.I):
1224 MiscUtil.PrintError(
1225 'The parameter value, %s, specified using "-m, --mcsParams" option for parameter, %s, is not a valid value. Supported values: Yes No True False'
1226 % (Value, Name)
1227 )
1228 ParamValue = False
1229 if re.match("^(Yes|True)$", Value, re.I):
1230 ParamValue = True
1231
1232 # Set value...
1233 OptionsInfo["MCSParams"][ParamName] = ParamValue
1234
1235
1236 def ProcessOptions():
1237 """Process and validate command line arguments and options."""
1238
1239 MiscUtil.PrintInfo("Processing options...")
1240
1241 # Validate options...
1242 ValidateOptions()
1243
1244 OptionsInfo["Infile"] = Options["--infile"]
1245 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
1246 ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
1247 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
1248 "--infileParams",
1249 Options["--infileParams"],
1250 InfileName=Options["--infile"],
1251 ParamsDefaultInfo=ParamsDefaultInfoOverride,
1252 )
1253
1254 OptionsInfo["RefFile"] = Options["--reffile"]
1255
1256 OptionsInfo["Outfile"] = Options["--outfile"]
1257 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
1258 "--outfileParams", Options["--outfileParams"]
1259 )
1260
1261 OptionsInfo["Overwrite"] = Options["--overwrite"]
1262
1263 OptionsInfo["Scaffold"] = Options["--scaffold"]
1264 if re.match("^auto$", Options["--scaffold"], re.I):
1265 UseScaffoldMCS = True
1266 UseScaffoldSMARTS = False
1267 ScaffoldSMARTS = None
1268 else:
1269 UseScaffoldMCS = False
1270 UseScaffoldSMARTS = True
1271 ScaffoldSMARTS = OptionsInfo["Scaffold"]
1272
1273 OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS
1274 OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS
1275 OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS
1276 OptionsInfo["ScaffoldPatternMol"] = None
1277
1278 OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"]
1279 ProcessMCSParameters()
1280
1281 OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False
1282
1283 # Method, basis set, and reference wavefunction...
1284 OptionsInfo["BasisSet"] = Options["--basisSet"]
1285 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
1286
1287 OptionsInfo["MethodName"] = Options["--methodName"]
1288 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
1289
1290 OptionsInfo["Reference"] = Options["--reference"]
1291 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
1292
1293 # Run and options parameters...
1294 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
1295 "--psi4OptionsParams", Options["--psi4OptionsParams"]
1296 )
1297 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
1298 "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
1299 )
1300
1301 # Conformer generation paramaters...
1302 ParamsDefaultInfoOverride = {"MaxConfs": 50}
1303 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters(
1304 "--confParams", Options["--confParams"], ParamsDefaultInfoOverride
1305 )
1306
1307 # Write energy parameters...
1308 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
1309 OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
1310
1311 EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
1312 if re.match("^auto$", EnergyDataFieldLabel, re.I):
1313 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
1314 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
1315
1316 OptionsInfo["MaxIters"] = int(Options["--maxIters"])
1317
1318 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
1319 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
1320
1321 # Multiprocessing level...
1322 MPLevelMoleculesMode = False
1323 MPLevelConformersMode = False
1324 MPLevel = Options["--mpLevel"]
1325 if re.match("^Molecules$", MPLevel, re.I):
1326 MPLevelMoleculesMode = True
1327 elif re.match("^Conformers$", MPLevel, re.I):
1328 MPLevelConformersMode = True
1329 else:
1330 MiscUtil.PrintError('The value, %s, specified for option "--mpLevel" is not valid. ' % MPLevel)
1331 OptionsInfo["MPLevel"] = MPLevel
1332 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
1333 OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
1334
1335 OptionsInfo["Precision"] = int(Options["--precision"])
1336 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
1337
1338
1339 def RetrieveOptions():
1340 """Retrieve command line arguments and options."""
1341
1342 # Get options...
1343 global Options
1344 Options = docopt(_docoptUsage_)
1345
1346 # Set current working directory to the specified directory...
1347 WorkingDir = Options["--workingdir"]
1348 if WorkingDir:
1349 os.chdir(WorkingDir)
1350
1351 # Handle examples option...
1352 if "--examples" in Options and Options["--examples"]:
1353 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
1354 sys.exit(0)
1355
1356
1357 def ValidateOptions():
1358 """Validate option values."""
1359
1360 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
1361 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
1362
1363 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
1364 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
1365
1366 MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"])
1367 MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol")
1368
1369 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
1370
1371 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
1372 MiscUtil.ValidateOptionsOutputFileOverwrite(
1373 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
1374 )
1375 MiscUtil.ValidateOptionsDistinctFileNames(
1376 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
1377 )
1378
1379 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
1380 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
1381
1382 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
1383 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
1384
1385 MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no")
1386
1387
1388 # Setup a usage string for docopt...
1389 _docoptUsage_ = """
1390 Psi4PerformConstrainedMinimization.py - Perform constrained minimization
1391
1392 Usage:
1393 Psi4PerformConstrainedMinimization.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>]
1394 [--energyDataFieldLabel <text>] [--energyUnits <text>] [--infileParams <Name,Value,...>]
1395 [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>]
1396 [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
1397 [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
1398 [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
1399 [--quiet <yes or no>] [--reference <text>] [--scaffold <auto or SMARTS>]
1400 [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile>
1401 Psi4PerformConstrainedMinimization.py -h | --help | -e | --examples
1402
1403 Description:
1404 Generate 3D structures for molecules by performing a constrained energy
1405 minimization against a reference molecule. The 3D structures for molecules
1406 are generated using a combination of distance geometry and forcefield
1407 minimization followed by geometry optimization using a quantum chemistry
1408 method.
1409
1410 An initial set of 3D conformers are generated for input molecules using
1411 distance geometry. A common core scaffold, corresponding to a Maximum
1412 Common Substructure (MCS) or an explicit SMARTS pattern, is identified
1413 between a pair of input and reference molecules. The core scaffold atoms in
1414 input molecules are aligned against the same atoms in the reference molecule.
1415 The energy of aligned structures are sequentially minimized using the forcefield
1416 and a quantum chemistry method. The conformation with the lowest energy is
1417 selected to represent the final structure.
1418
1419 A Psi4 XYZ format geometry string is automatically generated for each molecule
1420 in input file. It contains atom symbols and 3D coordinates for each atom in a
1421 molecule. In addition, the formal charge and spin multiplicity are present in the
1422 the geometry string. These values are either retrieved from molecule properties
1423 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
1424 molecule.
1425
1426 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
1427 .csv, .tsv, .txt)
1428
1429 The supported output file formats are: SD (.sdf, .sd)
1430
1431 Options:
1432 -b, --basisSet <text> [default: auto]
1433 Basis set to use for constrained energy minimization. Default: 6-31+G**
1434 for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified
1435 value must be a valid Psi4 basis set. No validation is performed.
1436
1437 The following list shows a representative sample of basis sets available
1438 in Psi4:
1439
1440 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*,
1441 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
1442 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
1443 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
1444 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
1445
1446 --confParams <Name,Value,...> [default: auto]
1447 A comma delimited list of parameter name and value pairs for generating
1448 initial 3D coordinates for molecules in input file. A common core scaffold is
1449 identified between a pair of input and reference molecules. The atoms in
1450 common core scaffold of input molecules are aligned against the reference
1451 molecule followed by constrained energy minimization using forcefield
1452 available in RDKit. The 3D structures are subsequently constrained and
1453 minimized by a quantum chemistry method available in Psi4.
1454
1455 The supported parameter names along with their default values are shown
1456 below:
1457
1458 confMethod,ETKDGv2,
1459 forceField,MMFF, forceFieldMMFFVariant,MMFF94,
1460 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
1461 useTethers,yes
1462
1463 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG,
1464 ETKDG , or ETKDGv2]
1465 forceField, MMFF [ Possible values: UFF or MMFF ]
1466 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ]
1467 enforceChirality,yes [ Possible values: yes or no ]
1468 useTethers,yes [ Possible values: yes or no ]
1469
1470 confMethod: Conformation generation methodology for generating initial 3D
1471 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
1472 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
1473 with Distance Geometry (KDG) and Experimental Torsion-angle preference
1474 along with basic Knowledge-terms with Distance Geometry (ETKDG or
1475 ETKDGv2) [Ref 129, 167] .
1476
1477 forceField: Forcefield method to use for constrained energy minimization.
1478 Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular
1479 Mechanics Force Field [ Ref 83-87 ] .
1480
1481 enforceChirality: Enforce chirality for defined chiral centers during
1482 forcefield minimization.
1483
1484 maxConfs: Maximum number of conformations to generate for each molecule
1485 during the generation of an initial 3D conformation ensemble using conformation
1486 generation methodology. The conformations are constrained and minimized using
1487 the specified forcefield and a quantum chemistry method. The lowest energy
1488 conformation is written to the output file.
1489
1490 embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded
1491 using distance geometry and forcefield minimization. All embedded conformers
1492 are kept for 'None' value. Otherwise, only those conformers which are different
1493 from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first
1494 embedded conformer is always retained.
1495
1496 useTethers: Use tethers to optimize the final embedded conformation by
1497 applying a series of extra forces to align matching atoms to the positions of
1498 the core atoms. Otherwise, use simple distance constraints during the
1499 optimization.
1500 --energyOut <yes or no> [default: yes]
1501 Write out energy values.
1502 --energyDataFieldLabel <text> [default: auto]
1503 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>).
1504 --energyUnits <text> [default: kcal/mol]
1505 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
1506 -e, --examples
1507 Print examples.
1508 -h, --help
1509 Print this help message.
1510 -i, --infile <infile>
1511 Input file name.
1512 --infileParams <Name,Value,...> [default: auto]
1513 A comma delimited list of parameter name and value pairs for reading
1514 molecules from files. The supported parameter names for different file
1515 formats, along with their default values, are shown below:
1516
1517 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
1518
1519 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
1520 smilesTitleLine,auto,sanitize,yes
1521
1522 Possible values for smilesDelimiter: space, comma or tab.
1523 --maxIters <number> [default: 50]
1524 Maximum number of iterations to perform for each molecule or conformer
1525 during energy minimization by a quantum chemistry method.
1526 -m, --methodName <text> [default: auto]
1527 Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ].
1528 The specified value must be a valid Psi4 method name. No validation is
1529 performed.
1530
1531 The following list shows a representative sample of methods available
1532 in Psi4:
1533
1534 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
1535 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05,
1536 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
1537 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
1538
1539 --mcsParams <Name,Value,...> [default: auto]
1540 Parameter values to use for identifying a maximum common substructure
1541 (MCS) in between a pair of reference and input molecules.In general, it is a
1542 comma delimited list of parameter name and value pairs. The supported
1543 parameter names along with their default values are shown below:
1544
1545 atomCompare,CompareElements,bondCompare,CompareOrder,
1546 maximizeBonds,yes,matchValences,yes,matchChiralTag,no,
1547 minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes,
1548 completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none
1549
1550 Possible values for atomCompare: CompareAny, CompareElements,
1551 CompareIsotopes. Possible values for bondCompare: CompareAny,
1552 CompareOrder, CompareOrderExact.
1553
1554 A brief description of MCS parameters taken from RDKit documentation is
1555 as follows:
1556
1557 atomCompare - Controls match between two atoms
1558 bondCompare - Controls match between two bonds
1559 maximizeBonds - Maximize number of bonds instead of atoms
1560 matchValences - Include atom valences in the MCS match
1561 matchChiralTag - Include atom chirality in the MCS match
1562 minNumAtoms - Minimum number of atoms in the MCS match
1563 minNumBonds - Minimum number of bonds in the MCS match
1564 ringMatchesRingOnly - Ring bonds only match other ring bonds
1565 completeRingsOnly - Partial rings not allowed during the match
1566 threshold - Fraction of the dataset that must contain the MCS
1567 seedSMARTS - SMARTS string as the seed of the MCS
1568 timeout - Timeout for the MCS calculation in seconds
1569
1570 --mp <yes or no> [default: no]
1571 Use multiprocessing.
1572
1573 By default, input data is retrieved in a lazy manner via mp.Pool.imap()
1574 function employing lazy RDKit data iterable. This allows processing of
1575 arbitrary large data sets without any additional requirements memory.
1576
1577 All input data may be optionally loaded into memory by mp.Pool.map()
1578 before starting worker processes in a process pool by setting the value
1579 of 'inputDataMode' to 'InMemory' in '--mpParams' option.
1580
1581 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
1582 data mode may adversely impact the performance. The '--mpParams' section
1583 provides additional information to tune the value of 'chunkSize'.
1584 --mpLevel <Molecules or Conformers> [default: Molecules]
1585 Perform multiprocessing at molecules or conformers level. Possible values:
1586 Molecules or Conformers. The 'Molecules' value starts a process pool at the
1587 molecules level. All conformers of a molecule are processed in a single
1588 process. The 'Conformers' value, however, starts a process pool at the
1589 conformers level. Each conformer of a molecule is processed in an individual
1590 process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
1591 using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
1592 a large number of Psi4 output files.
1593 --mpParams <Name,Value,...> [default: auto]
1594 A comma delimited list of parameter name and value pairs to configure
1595 multiprocessing.
1596
1597 The supported parameter names along with their default and possible
1598 values are shown below:
1599
1600 chunkSize, auto
1601 inputDataMode, Lazy [ Possible values: InMemory or Lazy ]
1602 numProcesses, auto [ Default: mp.cpu_count() ]
1603
1604 These parameters are used by the following functions to configure and
1605 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
1606 mp.Pool.imap().
1607
1608 The chunkSize determines chunks of input data passed to each worker
1609 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
1610 The default value of chunkSize is dependent on the value of 'inputDataMode'.
1611
1612 The mp.Pool.map() function, invoked during 'InMemory' input data mode,
1613 automatically converts RDKit data iterable into a list, loads all data into
1614 memory, and calculates the default chunkSize using the following method
1615 as shown in its code:
1616
1617 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
1618 if extra: chunkSize += 1
1619
1620 For example, the default chunkSize will be 7 for a pool of 4 worker processes
1621 and 100 data items.
1622
1623 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
1624 'lazy' RDKit data iterable to retrieve data as needed, without loading all the
1625 data into memory. Consequently, the size of input data is not known a priori.
1626 It's not possible to estimate an optimal value for the chunkSize. The default
1627 chunkSize is set to 1.
1628
1629 The default value for the chunkSize during 'Lazy' data mode may adversely
1630 impact the performance due to the overhead associated with exchanging
1631 small chunks of data. It is generally a good idea to explicitly set chunkSize to
1632 a larger value during 'Lazy' input data mode, based on the size of your input
1633 data and number of processes in the process pool.
1634
1635 The mp.Pool.map() function waits for all worker processes to process all
1636 the data and return the results. The mp.Pool.imap() function, however,
1637 returns the the results obtained from worker processes as soon as the
1638 results become available for specified chunks of data.
1639
1640 The order of data in the results returned by both mp.Pool.map() and
1641 mp.Pool.imap() functions always corresponds to the input data.
1642 -o, --outfile <outfile>
1643 Output file name.
1644 --outfileParams <Name,Value,...> [default: auto]
1645 A comma delimited list of parameter name and value pairs for writing
1646 molecules to files. The supported parameter names for different file
1647 formats, along with their default values, are shown below:
1648
1649 SD: kekulize,yes,forceV3000,no
1650
1651 --overwrite
1652 Overwrite existing files.
1653 --precision <number> [default: 6]
1654 Floating point precision for writing energy values.
1655 --psi4OptionsParams <Name,Value,...> [default: none]
1656 A comma delimited list of Psi4 option name and value pairs for setting
1657 global and module options. The names are 'option_name' for global options
1658 and 'module_name__option_name' for options local to a module. The
1659 specified option names must be valid Psi4 names. No validation is
1660 performed.
1661
1662 The specified option name and value pairs are processed and passed to
1663 psi4.set_options() as a dictionary. The supported value types are float,
1664 integer, boolean, or string. The float value string is converted into a float.
1665 The valid values for a boolean string are yes, no, true, false, on, or off.
1666 --psi4RunParams <Name,Value,...> [default: auto]
1667 A comma delimited list of parameter name and value pairs for configuring
1668 Psi4 jobs.
1669
1670 The supported parameter names along with their default and possible
1671 values are shown below:
1672
1673 MemoryInGB, 1
1674 NumThreads, 1
1675 OutputFile, auto [ Possible values: stdout, quiet, or FileName ]
1676 ScratchDir, auto [ Possivle values: DirName]
1677 RemoveOutputFile, yes [ Possible values: yes, no, true, or false]
1678
1679 These parameters control the runtime behavior of Psi4.
1680
1681 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
1682 is appended to output file name during multiprocessing as shown:
1683 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
1684 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
1685 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during
1686 'Conformers' of '--mpLevel' option.
1687
1688 The default 'Yes' value of 'RemoveOutputFile' option forces the removal
1689 of any existing Psi4 before creating new files to append output from
1690 multiple Psi4 runs.
1691
1692 The option 'ScratchDir' is a directory path to the location of scratch
1693 files. The default value corresponds to Psi4 default. It may be used to
1694 override the deafult path.
1695 -q, --quiet <yes or no> [default: no]
1696 Use quiet mode. The warning and information messages will not be printed.
1697 -r, --reffile <reffile>
1698 Reference input file name containing a 3D reference molecule. A common
1699 core scaffold must be present in a pair of an input and reference molecules.
1700 Otherwise, no constrained minimization is performed on the input molecule.
1701 --reference <text> [default: auto]
1702 Reference wave function to use for energy calculation. Default: RHF or UHF.
1703 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
1704 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
1705 molecules with unpaired electrons.
1706
1707 The specified value must be a valid Psi4 reference wave function. No validation
1708 is performed. For example: ROHF, CUHF, RKS, etc.
1709
1710 The spin multiplicity determines the default value of reference wave function
1711 for input molecules. It is calculated from number of free radical electrons using
1712 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
1713 electron spin. The total spin is 1/2 the number of free radical electrons in a
1714 molecule. The value of 'SpinMultiplicity' molecule property takes precedence
1715 over the calculated value of spin multiplicity.
1716 -s, --scaffold <auto or SMARTS> [default: auto]
1717 Common core scaffold between a pair of input and reference molecules used for
1718 constrained minimization of molecules in input file. Possible values: Auto or a
1719 valid SMARTS pattern. The common core scaffold is automatically detected
1720 corresponding to the Maximum Common Substructure (MCS) between a pair of
1721 reference and input molecules. A valid SMARTS pattern may be optionally specified
1722 for the common core scaffold.
1723 --scaffoldRMSDOut <yes or no> [default: No]
1724 Write out RMSD value for common core alignment between a pair of input and
1725 reference molecules.
1726 -w, --workingdir <dir>
1727 Location of working directory which defaults to the current directory.
1728
1729 Examples:
1730 To perform constrained energy minimization for molecules in a SMILES file against
1731 a reference 3D molecule in a SD file using a common core scaffold between pairs of
1732 input and reference molecules identified using MCS, generating up to 50 conformations
1733 using ETKDGv2 methodology followed by initial MMFF forcefield minimization and final
1734 energy minimization using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and
1735 sulfur containing molecules, and write out a SD file containing minimum energy
1736 structure corresponding to each constrained molecule, type:
1737
1738 % Psi4PerformConstrainedMinimization.py -i Psi4SampleAlkanes.smi
1739 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
1740
1741 To run the first example in a quiet mode and write out a SD file, type:
1742
1743 % Psi4PerformConstrainedMinimization.py -q yes
1744 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1745 -o Psi4SampleAlkanesOut.sdf
1746
1747 To run the first example in multiprocessing mode at molecules level on all
1748 available CPUs without loading all data into memory and write out a SD file,
1749 type:
1750
1751 % Psi4PerformConstrainedMinimization.py --mp yes
1752 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1753 -o Psi4SampleAlkanesOut.sdf
1754
1755 To run the first example in multiprocessing mode at conformers level on all
1756 available CPUs without loading all data into memory and write out a SD file,
1757 type:
1758
1759 % Psi4PerformConstrainedMinimization.py --mp yes --mpLevel Conformers
1760 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1761 -o Psi4SampleAlkanesOut.sdf
1762
1763 To run the first example in multiprocessing mode at molecules level on all
1764 available CPUs by loading all data into memory and write out a SD file, type:
1765
1766 % Psi4PerformConstrainedMinimization.py --mp yes --mpParams
1767 "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
1768 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1769 -o Psi4SampleAlkanesOut.sdf
1770
1771 To rerun the first example using an explicit SMARTS string for a common core
1772 scaffold and write out a SD file, type:
1773
1774 % Psi4PerformConstrainedMinimization.py --scaffold "CC"
1775 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1776 -o Psi4SampleAlkanesOut.sdf
1777
1778 To run the first example using a specific set of parameters for generating
1779 an initial set of conformers followed by energy minimization using forcefield
1780 and a quantum chemistry method and write out a SD file type:
1781
1782 % Psi4PerformConstrainedMinimization.py --confParams "
1783 confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
1784 maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
1785 -b "6-31+G**" --maxIters 20 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf
1786 -o Psi4SampleAlkanesOut.sdf
1787
1788 To run the first example for molecules in a CSV SMILES file, SMILES strings
1789 in column 1, name column 2, and write out a SD file, type:
1790
1791 % Psi4PerformConstrainedMinimization.py --infileParams
1792 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
1793 smilesNameColumn,2" -i Psi4SampleAlkanes.csv
1794 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf
1795
1796 Author:
1797
1798 Manish Sud(msud@san.rr.com)
1799
1800 See also:
1801 Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py,
1802 Psi4GenerateConstrainedConformers.py, Psi4PerformMinimization.py
1803
1804 Copyright:
1805 Copyright (C) 2026 Manish Sud. All rights reserved.
1806
1807 The functionality available in this script is implemented using Psi4, an
1808 open source quantum chemistry software package, and RDKit, an open
1809 source toolkit for cheminformatics developed by Greg Landrum.
1810
1811 This file is part of MayaChemTools.
1812
1813 MayaChemTools is free software; you can redistribute it and/or modify it under
1814 the terms of the GNU Lesser General Public License as published by the Free
1815 Software Foundation; either version 3 of the License, or (at your option) any
1816 later version.
1817
1818 """
1819
1820 if __name__ == "__main__":
1821 main()