1 #!/bin/env python
2 #
3 # File: Psi4PerformMinimization.py
4 # Author: Manish Sud <msud@san.rr.com>
5 #
6 # Contributor: Francois Berenger
7 #
8 # Copyright (C) 2025 Manish Sud. All rights reserved.
9 #
10 # The functionality available in this script is implemented using Psi4, an
11 # open source quantum chemistry software package, and RDKit, an open
12 # source toolkit for cheminformatics developed by Greg Landrum.
13 #
14 # This file is part of MayaChemTools.
15 #
16 # MayaChemTools is free software; you can redistribute it and/or modify it under
17 # the terms of the GNU Lesser General Public License as published by the Free
18 # Software Foundation; either version 3 of the License, or (at your option) any
19 # later version.
20 #
21 # MayaChemTools is distributed in the hope that it will be useful, but without
22 # any warranty; without even the implied warranty of merchantability of fitness
23 # for a particular purpose. See the GNU Lesser General Public License for more
24 # details.
25 #
26 # You should have received a copy of the GNU Lesser General Public License
27 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
28 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
29 # Boston, MA, 02111-1307, USA.
30 #
31
32 from __future__ import print_function
33
34 import os
35 import sys
36 import time
37 import re
38 import shutil
39 import multiprocessing as mp
40
41 # Psi4 imports...
42 if hasattr(shutil, "which") and shutil.which("psi4") is None:
43 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
44 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
45 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
46 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
47 sys.stderr.write("Psi4 environment and try again.\n\n")
48
49 # RDKit imports...
50 try:
51 from rdkit import rdBase
52 from rdkit import Chem
53 from rdkit.Chem import AllChem
54 except ImportError as ErrMsg:
55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
57 sys.exit(1)
58
59 # MayaChemTools imports...
60 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
61 try:
62 from docopt import docopt
63 import MiscUtil
64 import Psi4Util
65 import RDKitUtil
66 except ImportError as ErrMsg:
67 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
68 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
69 sys.exit(1)
70
71 ScriptName = os.path.basename(sys.argv[0])
72 Options = {}
73 OptionsInfo = {}
74
75
76 def main():
77 """Start execution of the script."""
78
79 MiscUtil.PrintInfo(
80 "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
81 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
82 )
83
84 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
85
86 # Retrieve command line arguments and options...
87 RetrieveOptions()
88
89 # Process and validate command line arguments and options...
90 ProcessOptions()
91
92 # Perform actions required by the script...
93 PerformMinimization()
94
95 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
96 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
97
98
99 def PerformMinimization():
100 """Perform minimization."""
101
102 # Setup a molecule reader...
103 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
104 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
105
106 # Set up a molecule writer...
107 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
108 if Writer is None:
109 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
110 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
111
112 MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer)
113
114 if Writer is not None:
115 Writer.close()
116
117 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
118 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
119 MiscUtil.PrintInfo(
120 "Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount
121 )
122 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
123 MiscUtil.PrintInfo(
124 "Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount)
125 )
126
127
128 def ProcessMolecules(Mols, Writer):
129 """Process and minimize molecules."""
130
131 if OptionsInfo["MPMode"]:
132 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
133 else:
134 return ProcessMoleculesUsingSingleProcess(Mols, Writer)
135
136
137 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
138 """Process and minimize molecules using a single process."""
139
140 # Intialize Psi4...
141 MiscUtil.PrintInfo("\nInitializing Psi4...")
142 Psi4Handle = Psi4Util.InitializePsi4(
143 Psi4RunParams=OptionsInfo["Psi4RunParams"],
144 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
145 PrintVersion=True,
146 PrintHeader=True,
147 )
148 if OptionsInfo["Psi4DDXSolvationMode"]:
149 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"])
150 OptionsInfo["psi4"] = Psi4Handle
151
152 # Setup max iterations global variable...
153 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
154
155 # Setup conversion factor for energy units...
156 SetupEnergyConversionFactor(Psi4Handle)
157
158 if OptionsInfo["SkipConformerGeneration"]:
159 if OptionsInfo["Psi4ConstrainTorsionsMode"]:
160 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers and constrained torsions...")
161 else:
162 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...")
163 else:
164 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...")
165
166 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
167 for Mol in Mols:
168 MolCount += 1
169
170 if not CheckAndValidateMolecule(Mol, MolCount):
171 continue
172
173 # Setup 2D coordinates for SMILES input file...
174 if OptionsInfo["SMILESInfileStatus"]:
175 AllChem.Compute2DCoords(Mol)
176
177 ValidMolCount += 1
178
179 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolCount)
180
181 if not CalcStatus:
182 if not OptionsInfo["QuietMode"]:
183 MolName = RDKitUtil.GetMolName(Mol, MolCount)
184 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
185
186 MinimizationFailedCount += 1
187 continue
188
189 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
190
191 if not WriteStatus:
192 WriteFailedCount += 1
193
194 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
195
196
197 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
198 """Process and minimize molecules using multiprocessing."""
199
200 if OptionsInfo["MPLevelConformersMode"]:
201 return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer)
202 elif OptionsInfo["MPLevelMoleculesMode"]:
203 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer)
204 else:
205 MiscUtil.PrintError('The value, %s, option "--mpLevel" is not supported.' % (OptionsInfo["MPLevel"]))
206
207
208 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer):
209 """Process and minimize molecules using multiprocessing at molecules level."""
210
211 if OptionsInfo["SkipConformerGeneration"]:
212 if OptionsInfo["Psi4ConstrainTorsionsMode"]:
213 MiscUtil.PrintInfo(
214 "\nPerforming minimization without generation of conformers and constrained torsions using multiprocessing at molecules level..."
215 )
216 else:
217 MiscUtil.PrintInfo(
218 "\nPerforming minimization without generation of conformers using multiprocessing at molecules level..."
219 )
220 else:
221 MiscUtil.PrintInfo(
222 "\nPerforming minimization with generation of conformers using multiprocessing at molecules level..."
223 )
224
225 MPParams = OptionsInfo["MPParams"]
226
227 # Setup data for initializing a worker process...
228 InitializeWorkerProcessArgs = (
229 MiscUtil.ObjectToBase64EncodedString(Options),
230 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
231 )
232
233 # Setup a encoded mols data iterable for a worker process...
234 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
235
236 # Setup process pool along with data initialization for each process...
237 if not OptionsInfo["QuietMode"]:
238 MiscUtil.PrintInfo(
239 "\nConfiguring multiprocessing using %s method..."
240 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
241 )
242 MiscUtil.PrintInfo(
243 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
244 % (
245 MPParams["NumProcesses"],
246 MPParams["InputDataMode"],
247 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
248 )
249 )
250
251 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
252
253 # Start processing...
254 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
255 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
256 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
257 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
258 else:
259 MiscUtil.PrintError(
260 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
261 )
262
263 # Print out Psi4 version in the main process...
264 MiscUtil.PrintInfo("\nInitializing Psi4...\n")
265 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
266 OptionsInfo["psi4"] = Psi4Handle
267
268 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
269 for Result in Results:
270 MolCount += 1
271 MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
272
273 if EncodedMol is None:
274 continue
275 ValidMolCount += 1
276
277 if not CalcStatus:
278 MinimizationFailedCount += 1
279 continue
280
281 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
282 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
283
284 if not WriteStatus:
285 WriteFailedCount += 1
286
287 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
288
289
290 def InitializeWorkerProcess(*EncodedArgs):
291 """Initialize data for a worker process."""
292
293 global Options, OptionsInfo
294
295 if not OptionsInfo["QuietMode"]:
296 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
297
298 # Decode Options and OptionInfo...
299 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
300 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
301
302 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
303 # output files for each process...
304 OptionsInfo["Psi4Initialized"] = False
305
306
307 def WorkerProcess(EncodedMolInfo):
308 """Process data for a worker process."""
309
310 if not OptionsInfo["Psi4Initialized"]:
311 InitializePsi4ForWorkerProcess()
312
313 MolIndex, EncodedMol = EncodedMolInfo
314
315 MolNum = MolIndex + 1
316 CalcStatus = False
317 ConfID = None
318 Energy = None
319 SolvationEnergy = None
320
321 if EncodedMol is None:
322 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
323
324 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
325
326 if not CheckAndValidateMolecule(Mol, MolNum):
327 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
328
329 # Setup 2D coordinates for SMILES input file...
330 if OptionsInfo["SMILESInfileStatus"]:
331 AllChem.Compute2DCoords(Mol)
332
333 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(OptionsInfo["psi4"], Mol, MolNum)
334
335 return [
336 MolIndex,
337 RDKitUtil.MolToBase64EncodedMolString(
338 Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
339 ),
340 CalcStatus,
341 ConfID,
342 Energy,
343 SolvationEnergy,
344 ]
345
346
347 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer):
348 """Process and minimize molecules using multiprocessing at conformers level."""
349
350 MiscUtil.PrintInfo(
351 "\nPerforming minimization with generation of conformers using multiprocessing at conformers level..."
352 )
353
354 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4
355 for Mol in Mols:
356 MolCount += 1
357
358 if not CheckAndValidateMolecule(Mol, MolCount):
359 continue
360
361 # Setup 2D coordinates for SMILES input file...
362 if OptionsInfo["SMILESInfileStatus"]:
363 AllChem.Compute2DCoords(Mol)
364
365 ValidMolCount += 1
366
367 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = ProcessConformersUsingMultipleProcesses(Mol, MolCount)
368
369 if not CalcStatus:
370 if not OptionsInfo["QuietMode"]:
371 MolName = RDKitUtil.GetMolName(Mol, MolCount)
372 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName)
373
374 MinimizationFailedCount += 1
375 continue
376
377 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy)
378
379 if not WriteStatus:
380 WriteFailedCount += 1
381
382 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount)
383
384
385 def ProcessConformersUsingMultipleProcesses(Mol, MolNum):
386 """Generate conformers and minimize them using multiple processes."""
387
388 # Add hydrogens...
389 Mol = Chem.AddHs(Mol)
390
391 # Setup conformers...
392 ConfIDs = EmbedMolecule(Mol, MolNum)
393 if not len(ConfIDs):
394 if not OptionsInfo["QuietMode"]:
395 MolName = RDKitUtil.GetMolName(Mol, MolNum)
396 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
397 return (Mol, False, None, None, None)
398
399 MPParams = OptionsInfo["MPParams"]
400
401 # Setup data for initializing a worker process...
402 InitializeWorkerProcessArgs = (
403 MiscUtil.ObjectToBase64EncodedString(Options),
404 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
405 )
406
407 # Setup a encoded mols data iterable for a worker process...
408 MolIndex = MolNum - 1
409 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs)
410
411 # Setup process pool along with data initialization for each process...
412 if not OptionsInfo["QuietMode"]:
413 MiscUtil.PrintInfo(
414 "\nConfiguring multiprocessing using %s method..."
415 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
416 )
417 MiscUtil.PrintInfo(
418 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
419 % (
420 MPParams["NumProcesses"],
421 MPParams["InputDataMode"],
422 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
423 )
424 )
425
426 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs)
427
428 # Start processing...
429 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
430 Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
431 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
432 Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
433 else:
434 MiscUtil.PrintError(
435 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
436 )
437
438 CalcEnergyMap = {}
439 CalcSolvationEnergyMap = {}
440 CalcFailedCount = 0
441 for Result in Results:
442 MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result
443
444 if EncodedMol is None:
445 CalcFailedCount += 1
446 continue
447
448 if not CalcStatus:
449 CalcFailedCount += 1
450 continue
451
452 # Retrieve minimized atom positions...
453 MinimizedMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
454 AtomPositions = RDKitUtil.GetAtomPositions(MinimizedMol, ConfID=ConfID)
455
456 # Update atom positions...
457 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
458
459 CalcEnergyMap[ConfID] = Energy
460 CalcSolvationEnergyMap[ConfID] = SolvationEnergy
461
462 if CalcFailedCount:
463 return (Mol, False, None, None, None)
464
465 SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
466 MinEnergyConfID = SortedConfIDs[0]
467 MinEnergy = CalcEnergyMap[MinEnergyConfID]
468 SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
469
470 return (Mol, True, MinEnergyConfID, MinEnergy, SolvationEnergy)
471
472
473 def InitializeConformerWorkerProcess(*EncodedArgs):
474 """Initialize data for a conformer worker process."""
475
476 global Options, OptionsInfo
477
478 if not OptionsInfo["QuietMode"]:
479 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
480
481 # Decode Options and OptionInfo...
482 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
483 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
484
485 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
486 # output files for each process...
487 OptionsInfo["Psi4Initialized"] = False
488
489
490 def ConformerWorkerProcess(EncodedMolInfo):
491 """Process data for a conformer worker process."""
492
493 if not OptionsInfo["Psi4Initialized"]:
494 InitializePsi4ForWorkerProcess()
495
496 MolIndex, EncodedMol, ConfID = EncodedMolInfo
497
498 MolNum = MolIndex + 1
499
500 CalcStatus = False
501 Energy = None
502 SolvationEnergy = None
503
504 if EncodedMol is None:
505 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy]
506
507 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
508 MolName = RDKitUtil.GetMolName(Mol, MolNum)
509
510 if not OptionsInfo["QuietMode"]:
511 MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
512
513 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
514 if not Status:
515 return [MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy]
516
517 if ConvergeStatus != 0:
518 if not OptionsInfo["QuietMode"]:
519 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
520 MiscUtil.PrintWarning(
521 'Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for "maxIters" in "--confParams" option...\n'
522 % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])
523 )
524
525 # Perform Psi4 minimization...
526 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], Mol, MolNum, ConfID)
527 if not CalcStatus:
528 if not OptionsInfo["QuietMode"]:
529 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
530 return [MolIndex, EncodedMol, False, ConfID, Energy, SolvationEnergy]
531
532 return [
533 MolIndex,
534 RDKitUtil.MolToBase64EncodedMolString(
535 Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
536 ),
537 CalcStatus,
538 ConfID,
539 Energy,
540 SolvationEnergy,
541 ]
542
543
544 def InitializePsi4ForWorkerProcess():
545 """Initialize Psi4 for a worker process."""
546
547 if OptionsInfo["Psi4Initialized"]:
548 return
549
550 OptionsInfo["Psi4Initialized"] = True
551
552 if OptionsInfo["MPLevelConformersMode"] and re.match(
553 "auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I
554 ):
555 # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile...
556 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet"
557 else:
558 # Update output file...
559 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
560 OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
561 )
562
563 # Intialize Psi4...
564 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
565 Psi4RunParams=OptionsInfo["Psi4RunParams"],
566 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
567 PrintVersion=False,
568 PrintHeader=True,
569 )
570
571 if OptionsInfo["Psi4DDXSolvationMode"]:
572 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"])
573
574 # Setup max iterations global variable...
575 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {"GEOM_MAXITER": OptionsInfo["MaxIters"]})
576
577 # Setup conversion factor for energy units...
578 SetupEnergyConversionFactor(OptionsInfo["psi4"])
579
580
581 def MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolNum):
582 """Minimize molecule or conformers."""
583
584 ConfID = None
585 if OptionsInfo["SkipConformerGeneration"]:
586 Mol, CalcStatus, Energy, SolvationEnergy = MinimizeMolecule(Psi4Handle, Mol, MolNum)
587 else:
588 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum)
589
590 return (Mol, CalcStatus, ConfID, Energy, SolvationEnergy)
591
592
593 def GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum=None):
594 """Generate and minimize conformers for a molecule to get the lowest energy conformer."""
595
596 # Add hydrogens..
597 Mol = Chem.AddHs(Mol)
598
599 MolName = RDKitUtil.GetMolName(Mol, MolNum)
600
601 # Setup conformers...
602 ConfIDs = EmbedMolecule(Mol, MolNum)
603 if not len(ConfIDs):
604 if not OptionsInfo["QuietMode"]:
605 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName)
606 return (Mol, False, None, None, None)
607
608 # Minimize conformers...
609 CalcEnergyMap = {}
610 CalcSolvationEnergyMap = {}
611 for ConfID in ConfIDs:
612 if not OptionsInfo["QuietMode"]:
613 MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID))
614
615 # Perform forcefield minimization...
616 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID)
617 if not Status:
618 return (Mol, False, None, None, None)
619
620 if ConvergeStatus != 0:
621 if not OptionsInfo["QuietMode"]:
622 MiscUtil.PrintWarning(
623 'Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for "maxIters" in "--confParams" option...\n'
624 % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])
625 )
626
627 # Perform Psi4 minimization...
628 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID)
629 if not CalcStatus:
630 if not OptionsInfo["QuietMode"]:
631 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName))
632 return (Mol, False, None, None, None)
633
634 CalcEnergyMap[ConfID] = Energy
635 CalcSolvationEnergyMap[ConfID] = SolvationEnergy
636
637 # Select the lowest energy conformer..
638 SortedConfIDs = sorted(ConfIDs, key=lambda ConfID: CalcEnergyMap[ConfID])
639 MinEnergyConfID = SortedConfIDs[0]
640 Energy = CalcEnergyMap[MinEnergyConfID]
641 SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID]
642
643 return (Mol, True, MinEnergyConfID, Energy, SolvationEnergy)
644
645
646 def MinimizeMolecule(Psi4Handle, Mol, MolNum):
647 """Minimize molecule."""
648
649 # Skip conformation generation and forcefield minimization...
650 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum)
651
652 return (Mol, CalcStatus, Energy, SolvationEnergy)
653
654
655 def MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID=-1):
656 """Minimize molecule using Psi4."""
657
658 # Setup a Psi4Mol...
659 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID)
660 if Psi4Mol is None:
661 return (False, None, None)
662
663 # Setup reference wave function...
664 Reference = SetupReferenceWavefunction(Mol)
665 Psi4Handle.set_options({"Reference": Reference})
666
667 # Setup method name and basis set...
668 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
669
670 # Setup constrained torsions...
671 if OptionsInfo["Psi4ConstrainTorsionsMode"]:
672 SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol)
673
674 # Optimize geometry...
675 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(
676 Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
677 )
678
679 if not Status:
680 PerformPsi4Cleanup(Psi4Handle)
681 return (False, None, None)
682
683 SolvationEnergy = None
684 if OptionsInfo["Psi4DDXSolvationMode"]:
685 SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction)
686
687 # Update atom positions...
688 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms=True)
689 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID=ConfID)
690
691 # Convert energy units...
692 if OptionsInfo["ApplyEnergyConversionFactor"]:
693 Energy = Energy * OptionsInfo["EnergyConversionFactor"]
694 if SolvationEnergy is not None:
695 SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"]
696
697 # Clean up
698 PerformPsi4Cleanup(Psi4Handle)
699
700 return (True, Energy, SolvationEnergy)
701
702
703 def RetrievePsi4DDXSolvationEnergy(WaveFunction):
704 """Retrieve DDX solvation energy."""
705
706 SolvationEnergy = None
707 try:
708 SolvationEnergy = WaveFunction.variable("DD Solvation Energy")
709 except Exception as ErrMsg:
710 SolvationEnergy = None
711 if not OptionsInfo["QuietMode"]:
712 MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg)
713
714 return SolvationEnergy
715
716
717 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID=-1):
718 """Minimize molecule using forcefield available in RDKit."""
719
720 try:
721 if OptionsInfo["ConfGenerationParams"]["UseUFF"]:
722 ConvergeStatus = AllChem.UFFOptimizeMolecule(
723 Mol, confId=ConfID, maxIters=OptionsInfo["ConfGenerationParams"]["MaxIters"]
724 )
725 elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]:
726 ConvergeStatus = AllChem.MMFFOptimizeMolecule(
727 Mol,
728 confId=ConfID,
729 maxIters=OptionsInfo["ConfGenerationParams"]["MaxIters"],
730 mmffVariant=OptionsInfo["ConfGenerationParams"]["MMFFVariant"],
731 )
732 else:
733 MiscUtil.PrintError(
734 "Minimization couldn't be performed: Specified forcefield, %s, is not supported"
735 % OptionsInfo["ConfGenerationParams"]["ForceField"]
736 )
737 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg:
738 if not OptionsInfo["QuietMode"]:
739 MolName = RDKitUtil.GetMolName(Mol, MolNum)
740 MiscUtil.PrintWarning(
741 "Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)
742 )
743 return (False, None)
744
745 return (True, ConvergeStatus)
746
747
748 def EmbedMolecule(Mol, MolNum=None):
749 """Embed conformations."""
750
751 ConfIDs = []
752
753 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"]
754 RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"]
755 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"]
756 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"]
757 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"]
758 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"]
759 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"]
760
761 try:
762 ConfIDs = AllChem.EmbedMultipleConfs(
763 Mol,
764 numConfs=MaxConfs,
765 randomSeed=RandomSeed,
766 pruneRmsThresh=EmbedRMSDCutoff,
767 enforceChirality=EnforceChirality,
768 useExpTorsionAnglePrefs=UseExpTorsionAnglePrefs,
769 useBasicKnowledge=UseBasicKnowledge,
770 ETversion=ETVersion,
771 )
772 except ValueError as ErrMsg:
773 if not OptionsInfo["QuietMode"]:
774 MolName = RDKitUtil.GetMolName(Mol, MolNum)
775 MiscUtil.PrintWarning("Embedding failed for molecule %s:\n%s\n" % (MolName, ErrMsg))
776 ConfIDs = []
777
778 if not OptionsInfo["QuietMode"]:
779 if EmbedRMSDCutoff > 0:
780 MiscUtil.PrintInfo(
781 "Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s"
782 % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, MaxConfs, len(ConfIDs))
783 )
784 else:
785 MiscUtil.PrintInfo(
786 "Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: None; Size: %s"
787 % (RDKitUtil.GetMolName(Mol, MolNum), len(ConfIDs))
788 )
789
790 return ConfIDs
791
792
793 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID=-1):
794 """Setup a Psi4 molecule object."""
795
796 if OptionsInfo["RecenterAndReorient"]:
797 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=False, NoReorient=False)
798 else:
799 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID=ConfID, NoCom=True, NoReorient=True)
800
801 try:
802 Psi4Mol = Psi4Handle.geometry(MolGeometry)
803 except Exception as ErrMsg:
804 Psi4Mol = None
805 if not OptionsInfo["QuietMode"]:
806 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
807 MolName = RDKitUtil.GetMolName(Mol, MolNum)
808 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
809
810 if OptionsInfo["Symmetrize"]:
811 Psi4Mol.symmetrize(OptionsInfo["SymmetrizeTolerance"])
812
813 return Psi4Mol
814
815
816 def PerformPsi4Cleanup(Psi4Handle):
817 """Perform clean up."""
818
819 # Clean up after Psi4 run...
820 Psi4Handle.core.clean()
821
822 # Clean up any leftover scratch files...
823 if OptionsInfo["MPMode"]:
824 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
825
826
827 def CheckAndValidateMolecule(Mol, MolCount=None):
828 """Validate molecule for Psi4 calculations."""
829
830 if Mol is None:
831 if not OptionsInfo["QuietMode"]:
832 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
833 return False
834
835 MolName = RDKitUtil.GetMolName(Mol, MolCount)
836 if not OptionsInfo["QuietMode"]:
837 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
838
839 if RDKitUtil.IsMolEmpty(Mol):
840 if not OptionsInfo["QuietMode"]:
841 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
842 return False
843
844 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
845 if not OptionsInfo["QuietMode"]:
846 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
847 return False
848
849 # Check for 3D flag...
850 if OptionsInfo["SkipConformerGeneration"]:
851 if not Mol.GetConformer().Is3D():
852 if not OptionsInfo["QuietMode"]:
853 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
854
855 # Check for missing hydrogens...
856 if OptionsInfo["SkipConformerGeneration"]:
857 # Otherwise, Hydrogens are always added...
858 if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
859 if not OptionsInfo["QuietMode"]:
860 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
861
862 return True
863
864
865 def SetupMethodNameAndBasisSet(Mol):
866 """Setup method name and basis set."""
867
868 MethodName = OptionsInfo["MethodName"]
869 if OptionsInfo["MethodNameAuto"]:
870 MethodName = "B3LYP"
871
872 BasisSet = OptionsInfo["BasisSet"]
873 if OptionsInfo["BasisSetAuto"]:
874 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
875
876 return (MethodName, BasisSet)
877
878
879 def SetupReferenceWavefunction(Mol):
880 """Setup reference wavefunction."""
881
882 Reference = OptionsInfo["Reference"]
883 if OptionsInfo["ReferenceAuto"]:
884 Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
885
886 return Reference
887
888
889 def SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol):
890 """Setup constrained torsions around rotatable bonds."""
891
892 if not OptionsInfo["Psi4ConstrainTorsionsMode"]:
893 return
894
895 # Get torsions around rotatable bonds...
896 TorsionAtomsIndicesList = RDKitUtil.GetTorsionsAroundRotatableBonds(
897 Mol, GetConstrainTorsionsRotBondsPatternMol(), OptionsInfo["Psi4ConstrainTorsionsParams"]["IgnoreHydrogens"]
898 )
899
900 # Freeze dihedrals corresponding to torsions around rotatable bonds...
901 FreezeDihedralsString = SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList)
902 if FreezeDihedralsString is None:
903 # Clear any frozen dihedral values
904 Psi4Handle.set_options({"OPTKING__frozen_dihedral": ""})
905 else:
906 # Setup frozen dihedral values...
907 Psi4Handle.set_options({"OPTKING__frozen_dihedral": FreezeDihedralsString})
908
909
910 def SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList):
911 """Setup freeze dihedrals string for Psi4."""
912
913 FreezeDihedralsString = None
914 if TorsionAtomsIndicesList is None or len(TorsionAtomsIndicesList) == 0:
915 return FreezeDihedralsString
916
917 TorsionsList = []
918 for TorsionAtomIndices in TorsionAtomsIndicesList:
919 AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4 = TorsionAtomIndices
920
921 # Atom indices start from 1 for Psi4 instead 0 for RDKit...
922 TorsionString = "%s %s %s %s" % (AtomIndex1 + 1, AtomIndex2 + 1, AtomIndex3 + 1, AtomIndex4 + 1)
923 TorsionsList.append(TorsionString)
924
925 if len(TorsionsList):
926 FreezeDihedralsString = "%s" % "\n".join(TorsionsList)
927
928 return FreezeDihedralsString
929
930
931 def GetConstrainTorsionsRotBondsPatternMol():
932 """Get constrain torsions rotatable bonds pattern molecule."""
933
934 if "RotBondsSMARTSPatternMol" not in OptionsInfo["Psi4ConstrainTorsionsParams"]:
935 OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"] = Chem.MolFromSmarts(
936 OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"]
937 )
938
939 return OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"]
940
941
942 def SetupEnergyConversionFactor(Psi4Handle):
943 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees."""
944
945 EnergyUnits = OptionsInfo["EnergyUnits"]
946
947 ApplyConversionFactor = True
948 if re.match(r"^kcal\/mol$", EnergyUnits, re.I):
949 ConversionFactor = Psi4Handle.constants.hartree2kcalmol
950 elif re.match(r"^kJ\/mol$", EnergyUnits, re.I):
951 ConversionFactor = Psi4Handle.constants.hartree2kJmol
952 elif re.match("^eV$", EnergyUnits, re.I):
953 ConversionFactor = Psi4Handle.constants.hartree2ev
954 else:
955 ApplyConversionFactor = False
956 ConversionFactor = 1.0
957
958 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor
959 OptionsInfo["EnergyConversionFactor"] = ConversionFactor
960
961
962 def WriteMolecule(Writer, Mol, MolNum=None, ConfID=None, Energy=None, SolvationEnergy=None):
963 """Write molecule."""
964
965 if OptionsInfo["EnergyOut"] and Energy is not None:
966 Energy = "%.*f" % (OptionsInfo["Precision"], Energy)
967 Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy)
968
969 if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None:
970 SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy)
971 Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy)
972
973 try:
974 if ConfID is None:
975 Writer.write(Mol)
976 else:
977 Writer.write(Mol, confId=ConfID)
978 except (ValueError, RuntimeError) as ErrMsg:
979 if not OptionsInfo["QuietMode"]:
980 MolName = RDKitUtil.GetMolName(Mol, MolNum)
981 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg))
982 return False
983
984 return True
985
986
987 def CheckAndSetupDDX():
988 """Load pyddx and return its handle."""
989
990 try:
991 import pyddx
992 except ImportError as ErrMsg:
993 sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg)
994 sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n")
995 sys.stderr.write("You may need to install it from pypi or conda-forge.\n")
996 sys.stderr.write("Check/update your environment and try again.\n\n")
997 sys.exit(1)
998
999 return pyddx
1000
1001
1002 def ProcessDDXListSolventsOption():
1003 """List solvent information provided by tge DDX module for the calculation
1004 of solvation energy."""
1005
1006 PyDDXHandle = CheckAndSetupDDX()
1007
1008 MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n")
1009 MiscUtil.PrintInfo("Solvent\tEpsilon")
1010 Count = 0
1011 for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()):
1012 Count += 1
1013 Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent]
1014 MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon))
1015
1016 MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count)
1017
1018
1019 def ProcessOptions():
1020 """Process and validate command line arguments and options."""
1021
1022 MiscUtil.PrintInfo("Processing options...")
1023
1024 # Validate options...
1025 ValidateOptions()
1026
1027 OptionsInfo["Infile"] = Options["--infile"]
1028 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False
1029 ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
1030 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
1031 "--infileParams",
1032 Options["--infileParams"],
1033 InfileName=Options["--infile"],
1034 ParamsDefaultInfo=ParamsDefaultInfoOverride,
1035 )
1036
1037 OptionsInfo["Outfile"] = Options["--outfile"]
1038 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
1039 "--outfileParams", Options["--outfileParams"]
1040 )
1041
1042 OptionsInfo["Overwrite"] = Options["--overwrite"]
1043
1044 # Method, basis set, and reference wavefunction...
1045 OptionsInfo["BasisSet"] = Options["--basisSet"]
1046 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
1047
1048 OptionsInfo["MethodName"] = Options["--methodName"]
1049 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
1050
1051 OptionsInfo["Reference"] = Options["--reference"]
1052 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
1053
1054 # Run and options parameters...
1055 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
1056 "--psi4OptionsParams", Options["--psi4OptionsParams"]
1057 )
1058 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
1059 "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
1060 )
1061
1062 # Psi4 constrain torsions paramaters...
1063 OptionsInfo["Psi4ConstrainTorsionsMode"] = (
1064 True if re.match("^yes$", Options["--psi4ConstrainTorsions"], re.I) else False
1065 )
1066 ParamsDefaultInfoOverride = {"RotBondsSMARTSMode": "NonStrict"}
1067 Psi4ConstrainTorsionsParams = Psi4Util.ProcessPsi4ConstrainTorsionsParameters(
1068 "--psi4ConstrainTorsionsParams", Options["--psi4ConstrainTorsionsParams"], ParamsDefaultInfoOverride
1069 )
1070 OptionsInfo["Psi4ConstrainTorsionsParams"] = Psi4ConstrainTorsionsParams
1071
1072 if OptionsInfo["Psi4ConstrainTorsionsMode"]:
1073 RotBondsSMARTSPattern = OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"]
1074 RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern)
1075 if RotBondsPatternMol is None:
1076 MiscUtil.PrintError(
1077 'Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, "%s", specified for parameter name "--rotBondsSMARTSPattern" using "--psi4ConstrainTorsions"option is not valid.'
1078 % (RotBondsSMARTSPattern)
1079 )
1080
1081 # DDX solvation parameters...
1082 OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False
1083 Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters(
1084 "--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"]
1085 )
1086 Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(
1087 OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams
1088 )
1089 OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams
1090 OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions
1091
1092 if OptionsInfo["Psi4DDXSolvationMode"]:
1093 CheckAndSetupDDX()
1094
1095 # Conformer generation paramaters...
1096 OptionsInfo["ConfGeneration"] = Options["--confGeneration"]
1097 OptionsInfo["SkipConformerGeneration"] = False if re.match("^yes$", Options["--confGeneration"], re.I) else True
1098
1099 if OptionsInfo["SkipConformerGeneration"]:
1100 if OptionsInfo["SMILESInfileStatus"]:
1101 MiscUtil.PrintError(
1102 'The value, %s, specified for option "-c --confGeneration" is not allowed during, %s, value for option "-i, --infile" . The input file must be a 3D SD or MOL file. '
1103 % (Options["--confGeneration"], Options["--infile"])
1104 )
1105
1106 if not OptionsInfo["SkipConformerGeneration"]:
1107 if OptionsInfo["Psi4ConstrainTorsionsMode"]:
1108 MiscUtil.PrintError(
1109 'The value, %s, specified for option "-c --confGeneration" is not allowed during, %s, value for option "--psi4ConstrainTorsions".'
1110 % (Options["--confGeneration"], Options["--psi4ConstrainTorsions"])
1111 )
1112
1113 ParamsDefaultInfoOverride = {"MaxConfs": 50, "MaxIters": 250}
1114 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters(
1115 "--confParams", Options["--confParams"], ParamsDefaultInfoOverride
1116 )
1117
1118 # Write energy parameters...
1119 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False
1120 OptionsInfo["EnergyUnits"] = Options["--energyUnits"]
1121
1122 EnergyDataFieldLabel = Options["--energyDataFieldLabel"]
1123 if re.match("^auto$", EnergyDataFieldLabel, re.I):
1124 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"]
1125 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel
1126
1127 SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"]
1128 OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel
1129
1130 OptionsInfo["MaxIters"] = int(Options["--maxIters"])
1131
1132 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
1133 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
1134
1135 # Multiprocessing level...
1136 MPLevelMoleculesMode = False
1137 MPLevelConformersMode = False
1138 MPLevel = Options["--mpLevel"]
1139 if re.match("^Molecules$", MPLevel, re.I):
1140 MPLevelMoleculesMode = True
1141 elif re.match("^Conformers$", MPLevel, re.I):
1142 if OptionsInfo["SkipConformerGeneration"]:
1143 MiscUtil.PrintError(
1144 'The value, %s, specified for option "--mpLevel" is not allowed during, %s, value of option "--confGeneration" . '
1145 % (MPLevel, Options["--confGeneration"])
1146 )
1147 MPLevelConformersMode = True
1148 else:
1149 MiscUtil.PrintError('The value, %s, specified for option "--mpLevel" is not valid. ' % MPLevel)
1150 OptionsInfo["MPLevel"] = MPLevel
1151 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode
1152 OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode
1153
1154 OptionsInfo["Precision"] = int(Options["--precision"])
1155 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
1156
1157 RecenterAndReorient = Options["--recenterAndReorient"]
1158 if re.match("^auto$", RecenterAndReorient, re.I):
1159 RecenterAndReorient = False if OptionsInfo["SkipConformerGeneration"] else True
1160 else:
1161 RecenterAndReorient = True if re.match("^yes$", RecenterAndReorient, re.I) else False
1162 OptionsInfo["RecenterAndReorient"] = RecenterAndReorient
1163
1164 Symmetrize = Options["--symmetrize"]
1165 if re.match("^auto$", Symmetrize, re.I):
1166 Symmetrize = True if RecenterAndReorient else False
1167 else:
1168 Symmetrize = True if re.match("^yes$", Symmetrize, re.I) else False
1169 OptionsInfo["Symmetrize"] = Symmetrize
1170
1171 OptionsInfo["SymmetrizeTolerance"] = float(Options["--symmetrizeTolerance"])
1172
1173
1174 def RetrieveOptions():
1175 """Retrieve command line arguments and options."""
1176
1177 # Get options...
1178 global Options
1179 Options = docopt(_docoptUsage_)
1180
1181 # Set current working directory to the specified directory...
1182 WorkingDir = Options["--workingdir"]
1183 if WorkingDir:
1184 os.chdir(WorkingDir)
1185
1186 # Handle examples option...
1187 if "--examples" in Options and Options["--examples"]:
1188 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
1189 sys.exit(0)
1190
1191 # Handle listing of solvent information...
1192 if Options and Options["--psi4DDXListSolvents"]:
1193 ProcessDDXListSolventsOption()
1194 sys.exit(0)
1195
1196
1197 def ValidateOptions():
1198 """Validate option values."""
1199
1200 MiscUtil.ValidateOptionTextValue("-c, --confGeneration", Options["--confGeneration"], "yes no")
1201
1202 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no")
1203 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV")
1204
1205 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
1206 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
1207
1208 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0})
1209
1210 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
1211 MiscUtil.ValidateOptionsOutputFileOverwrite(
1212 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
1213 )
1214 MiscUtil.ValidateOptionsDistinctFileNames(
1215 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
1216 )
1217
1218 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
1219 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers")
1220
1221 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
1222
1223 MiscUtil.ValidateOptionTextValue("--psi4ConstrainTorsions", Options["--psi4ConstrainTorsions"], "yes no")
1224
1225 MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no")
1226
1227 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
1228
1229 MiscUtil.ValidateOptionTextValue("--recenterAndReorient", Options["--recenterAndReorient"], "yes no auto")
1230 MiscUtil.ValidateOptionTextValue("--symmetrize", Options["--symmetrize"], "yes no auto")
1231 MiscUtil.ValidateOptionFloatValue("--symmetrizeTolerance", Options["--symmetrizeTolerance"], {">": 0})
1232
1233
1234 # Setup a usage string for docopt...
1235 _docoptUsage_ = """
1236 Psi4PerformMinimization.py - Perform structure minimization
1237
1238 Usage:
1239 Psi4PerformMinimization.py [--basisSet <text>] [--confGeneration <yes or no> ] [--confParams <Name,Value,...>]
1240 [--energyOut <yes or no>] [--energyDataFieldLabel <text>] [--energyUnits <text>]
1241 [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>]
1242 [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>]
1243 [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ]
1244 [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
1245 [--psi4ConstrainTorsions <yes or no>] [--psi4ConstrainTorsionsParams <Name,Value,...>]
1246 [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>]
1247 [--quiet <yes or no>] [--reference <text>] [--recenterAndReorient <yes or no>]
1248 [--symmetrize <yes or no>] [--symmetrizeTolerance <number>] [-w <dir>] -i <infile> -o <outfile>
1249 Psi4PerformMinimization.py --psi4DDXListSolvents
1250 Psi4PerformMinimization.py -h | --help | -e | --examples
1251
1252 Description:
1253 Generate 3D structures for molecules using a combination of distance geometry
1254 and forcefield minimization followed by geometry optimization using a quantum
1255 chemistry method. A set of initial 3D structures are generated for a molecule
1256 employing distance geometry. The 3D structures in the conformation ensemble
1257 are sequentially minimized using forcefield and a quantum chemistry method.
1258 The conformation with lowest energy is selected to represent the final structure.
1259 An option is available to skip the generation of the conformation ensemble along
1260 with forcefield minimization and simply perform minimization on the initial 3D
1261 structure using a quantum chemistry method.
1262
1263 A Psi4 XYZ format geometry string is automatically generated for each molecule
1264 in input file. It contains atom symbols and 3D coordinates for each atom in a
1265 molecule. In addition, the formal charge and spin multiplicity are present in the
1266 the geometry string. These values are either retrieved from molecule properties
1267 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
1268 molecule.
1269
1270 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
1271 .csv, .tsv, .txt)
1272
1273 The supported output file formats are: SD (.sdf, .sd)
1274
1275 Options:
1276 -b, --basisSet <text> [default: auto]
1277 Basis set to use for energy minimization. Default: 6-31+G** for sulfur
1278 containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified
1279 value must be a valid Psi4 basis set. No validation is performed.
1280
1281 The following list shows a representative sample of basis sets available
1282 in Psi4:
1283
1284 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*,
1285 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
1286 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
1287 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
1288 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
1289
1290 -c, --confGeneration <yes or no> [default: yes]
1291 Generate an initial 3D conformation ensemble using distance geometry and
1292 forcefield minimization before final geometry optimization by a specified
1293 method name and basis set. Possible values: yes or no.
1294
1295 The 'No' value skips the generation of conformations employing distance
1296 geometry and forcefield minimization. The 3D structures in input file are
1297 minimized by a quantum method. It is not allowed for SMILES input file.
1298 --confParams <Name,Value,...> [default: auto]
1299 A comma delimited list of parameter name and value pairs for generating
1300 initial sets of 3D conformations for molecules. The 3D conformation ensemble
1301 is generated using distance geometry and forcefield functionality available
1302 in RDKit. The 3D structures in the conformation ensemble are subsequently
1303 minimized by a quantum chemistry method available in Psi4.
1304
1305 The supported parameter names along with their default values are shown
1306 below:
1307
1308 confMethod,ETKDGv2,
1309 forceField,MMFF, forceFieldMMFFVariant,MMFF94,
1310 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50,
1311 maxIters,250,randomSeed,auto
1312
1313 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG,
1314 ETKDG , or ETKDGv2]
1315 forceField, MMFF [ Possible values: UFF or MMFF ]
1316 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ]
1317 enforceChirality,yes [ Possible values: yes or no ]
1318 embedRMSDCutoff,0.5 [ Possible values: number or None]
1319
1320 confMethod: Conformation generation methodology for generating initial 3D
1321 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental
1322 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms
1323 with Distance Geometry (KDG) and Experimental Torsion-angle preference
1324 along with basic Knowledge-terms and Distance Geometry (ETKDG or
1325 ETKDGv2) [Ref 129, 167] .
1326
1327 forceField: Forcefield method to use for energy minimization. Possible
1328 values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics
1329 Force Field [ Ref 83-87 ] .
1330
1331 enforceChirality: Enforce chirality for defined chiral centers during
1332 forcefield minimization.
1333
1334 maxConfs: Maximum number of conformations to generate for each molecule
1335 during the generation of an initial 3D conformation ensemble using
1336 conformation generation methodology. The conformations are minimized
1337 using the specified forcefield and a quantum chemistry method. The lowest
1338 energy conformation is written to the output file.
1339
1340 embedRMSDCutoff: RMSD cutoff for retaining initial set conformers embedded
1341 using distance geometry and before forcefield minimization. All embedded
1342 conformers are kept for 'None' value. Otherwise, only those conformers which
1343 are different from each other by the specified RMSD cutoff, 0.5 by default,
1344 are kept. The first embedded conformer is always retained.
1345
1346 maxIters: Maximum number of iterations to perform for each conformation
1347 during forcefield minimization.
1348
1349 randomSeed: Seed for the random number generator for reproducing initial
1350 3D coordinates in a conformation ensemble. Default is to use a random seed.
1351 --energyOut <yes or no> [default: yes]
1352 Write out energy values.
1353 --energyDataFieldLabel <text> [default: auto]
1354 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>).
1355 --energyUnits <text> [default: kcal/mol]
1356 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV.
1357 -e, --examples
1358 Print examples.
1359 -h, --help
1360 Print this help message.
1361 -i, --infile <infile>
1362 Input file name.
1363 --infileParams <Name,Value,...> [default: auto]
1364 A comma delimited list of parameter name and value pairs for reading
1365 molecules from files. The supported parameter names for different file
1366 formats, along with their default values, are shown below:
1367
1368 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
1369 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
1370 smilesTitleLine,auto,sanitize,yes
1371
1372 Possible values for smilesDelimiter: space, comma or tab.
1373 --maxIters <number> [default: 50]
1374 Maximum number of iterations to perform for each molecule or conformer
1375 during energy minimization by a quantum chemistry method.
1376 -m, --methodName <text> [default: auto]
1377 Method to use for energy minimization. Default: B3LYP [ Ref 150 ]. The
1378 specified value must be a valid Psi4 method name. No validation is
1379 performed.
1380
1381 The following list shows a representative sample of methods available
1382 in Psi4:
1383
1384 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
1385 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05,
1386 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
1387 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
1388
1389 --mp <yes or no> [default: no]
1390 Use multiprocessing.
1391
1392 By default, input data is retrieved in a lazy manner via mp.Pool.imap()
1393 function employing lazy RDKit data iterable. This allows processing of
1394 arbitrary large data sets without any additional requirements memory.
1395
1396 All input data may be optionally loaded into memory by mp.Pool.map()
1397 before starting worker processes in a process pool by setting the value
1398 of 'inputDataMode' to 'InMemory' in '--mpParams' option.
1399
1400 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
1401 data mode may adversely impact the performance. The '--mpParams' section
1402 provides additional information to tune the value of 'chunkSize'.
1403 --mpLevel <Molecules or Conformers> [default: Molecules]
1404 Perform multiprocessing at molecules or conformers level. Possible values:
1405 Molecules or Conformers. The 'Molecules' value starts a process pool at the
1406 molecules level. All conformers of a molecule are processed in a single
1407 process. The 'Conformers' value, however, starts a process pool at the
1408 conformers level. Each conformer of a molecule is processed in an individual
1409 process in the process pool. The default Psi4 'OutputFile' is set to 'quiet'
1410 using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate
1411 a large number of Psi4 output files.
1412 --mpParams <Name,Value,...> [default: auto]
1413 A comma delimited list of parameter name and value pairs to configure
1414 multiprocessing.
1415
1416 The supported parameter names along with their default and possible
1417 values are shown below:
1418
1419 chunkSize, auto
1420 inputDataMode, Lazy [ Possible values: InMemory or Lazy ]
1421 numProcesses, auto [ Default: mp.cpu_count() ]
1422
1423 These parameters are used by the following functions to configure and
1424 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
1425 mp.Pool.imap().
1426
1427 The chunkSize determines chunks of input data passed to each worker
1428 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
1429 The default value of chunkSize is dependent on the value of 'inputDataMode'.
1430
1431 The mp.Pool.map() function, invoked during 'InMemory' input data mode,
1432 automatically converts RDKit data iterable into a list, loads all data into
1433 memory, and calculates the default chunkSize using the following method
1434 as shown in its code:
1435
1436 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
1437 if extra: chunkSize += 1
1438
1439 For example, the default chunkSize will be 7 for a pool of 4 worker processes
1440 and 100 data items.
1441
1442 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
1443 'lazy' RDKit data iterable to retrieve data as needed, without loading all the
1444 data into memory. Consequently, the size of input data is not known a priori.
1445 It's not possible to estimate an optimal value for the chunkSize. The default
1446 chunkSize is set to 1.
1447
1448 The default value for the chunkSize during 'Lazy' data mode may adversely
1449 impact the performance due to the overhead associated with exchanging
1450 small chunks of data. It is generally a good idea to explicitly set chunkSize to
1451 a larger value during 'Lazy' input data mode, based on the size of your input
1452 data and number of processes in the process pool.
1453
1454 The mp.Pool.map() function waits for all worker processes to process all
1455 the data and return the results. The mp.Pool.imap() function, however,
1456 returns the the results obtained from worker processes as soon as the
1457 results become available for specified chunks of data.
1458
1459 The order of data in the results returned by both mp.Pool.map() and
1460 mp.Pool.imap() functions always corresponds to the input data.
1461 -o, --outfile <outfile>
1462 Output file name.
1463 --outfileParams <Name,Value,...> [default: auto]
1464 A comma delimited list of parameter name and value pairs for writing
1465 molecules to files. The supported parameter names for different file
1466 formats, along with their default values, are shown below:
1467
1468 SD: kekulize,yes,forceV3000,no
1469
1470 --overwrite
1471 Overwrite existing files.
1472 --precision <number> [default: 6]
1473 Floating point precision for writing energy values.
1474 --psi4OptionsParams <Name,Value,...> [default: none]
1475 A comma delimited list of Psi4 option name and value pairs for setting
1476 global and module options. The names are 'option_name' for global options
1477 and 'module_name__option_name' for options local to a module. The
1478 specified option names must be valid Psi4 names. No validation is
1479 performed.
1480
1481 The specified option name and value pairs are processed and passed to
1482 psi4.set_options() as a dictionary. The supported value types are float,
1483 integer, boolean, or string. The float value string is converted into a float.
1484 The valid values for a boolean string are yes, no, true, false, on, or off.
1485 --psi4RunParams <Name,Value,...> [default: auto]
1486 A comma delimited list of parameter name and value pairs for configuring
1487 Psi4 jobs.
1488
1489 The supported parameter names along with their default and possible
1490 values are shown below:
1491
1492 MemoryInGB, 1
1493 NumThreads, 1
1494 OutputFile, auto [ Possible values: stdout, quiet, or FileName ]
1495 ScratchDir, auto [ Possivle values: DirName]
1496 RemoveOutputFile, yes [ Possible values: yes, no, true, or false]
1497
1498 These parameters control the runtime behavior of Psi4.
1499
1500 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
1501 is appended to output file name during multiprocessing as shown:
1502 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
1503 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
1504 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during
1505 'Conformers' of '--mpLevel' option.
1506
1507 The default 'Yes' value of 'RemoveOutputFile' option forces the removal
1508 of any existing Psi4 before creating new files to append output from
1509 multiple Psi4 runs.
1510
1511 The option 'ScratchDir' is a directory path to the location of scratch
1512 files. The default value corresponds to Psi4 default. It may be used to
1513 override the deafult path.
1514 --psi4ConstrainTorsions <yes or no> [default: no]
1515 Constrain torsions around rotatable bonds during Psi4 minimization.
1516 --psi4ConstrainTorsionsParams <Name,Value,...> [default: auto]
1517 A space delimited list of parameter name and value pairs for constraining
1518 torsions around rotatable bonds during Psi4 minimization. The supported
1519 parameter names, along with their default values, are shown below:
1520
1521 ignoreHydrogens, yes, [ Possible values: yes or no ]
1522 rotBondsSMARTSMode, NonStrict, [ Possible values: NonStrict,
1523 SemiStrict, Strict, Specify ]
1524 rotBondsSMARTSPattern, auto [ Possible values: Valid SMARTS ]
1525
1526 ignoreHydrogens: Ignore torsions around rotatable bonds containing
1527 hydrogens during Psi4 minimization.
1528
1529 rotBondsSMARTSMode: SMARTS pattern to use for identifying rotatable bonds
1530 for constraining torsions during Psi4 minimization. Possible values: NonStrict,
1531 SemiStrict, Strict or Specify. The following SMARTS patterns are used to identify
1532 rotatable bonds for different modes:
1533
1534 NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
1535
1536 SemiStrict:
1537 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
1538 &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
1539 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
1540
1541 Strict:
1542 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
1543 &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
1544 &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
1545 &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
1546 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
1547
1548 The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The
1549 'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
1550 specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is
1551 derived from 'Strict' SMARTS patterns for its usage in this script.
1552
1553 You may use any arbitrary SMARTS pattern to identify rotatable bonds by
1554 choosing 'Specify' value for 'rotBondsSMARTSMode' parameter and providing
1555 its value via '--rotBondsSMARTSPattern'.
1556
1557 rotBondsSMARTSPattern: SMARTS pattern for identifying rotatable bonds for
1558 constraining torsions during Psi4 minimization. This option is only valid for
1559 'Specify' value of 'rotBondsSMARTSMode' parameter.
1560 --psi4DDXSolvation <yes or no> [default: no]
1561 Perform minimization in solution using domain-decomposition-based
1562 continuum solvation models [ Ref 160-161 ]. The script relies on Psi4
1563 interface to the DDX module to perform these calculations. The DDX library
1564 provides a linear-scaling implementation of standard continuum solvation
1565 models using a domain- decomposition ansatz. Two solvation models are
1566 supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and
1567 Polarizable Continuum Model (PCM) [ Ref 164-165 ].
1568
1569 The solvation energy is included in the value of the total energy written to
1570 the output SD file. In addition, the value of solvation energy is written to the
1571 output file under its own data field label.
1572
1573 Psi4 relies on Python module PYDDX to calculate solvation energy. It must be
1574 present in your environment.
1575 --psi4DDXSolvationParams <Name,Value,...> [default: auto]
1576 A space delimited list of parameter name and value pairs for calculating
1577 solvation energy using the DDX module. The supported parameter names,
1578 along with their default values, are shown below:
1579
1580 solvationModel PCM [ Possible values: COSMO or PCM]
1581 solvent water [ Possible values: A valid solvent name]
1582 solventEpsilon None
1583 radiiSet UFF [ Possible values: Bondi or UFF]
1584 radiiScaling auto [ Default: 1.2 for Bondi; 1.1 for UFF]
1585
1586 solvationModel: Solvation model for calculating solvation energy.
1587 The corresponding Psi4 option is DDX_MODEL.
1588
1589 solvent: Solvent to use. The corresponding Ps4 option is
1590 DDX_SOLVENT
1591
1592 solventEpsilon: Dielectric constant of the solvent. The
1593 corresponding Psi4 option is DDX_SOLVENT_EPSILON.
1594
1595 radiiSet: Radius set for cavity spheres. The corresponding Psi4
1596 option is DDX_RADII_SET.
1597
1598 radiiScaling: Scaling factor for cavity spheres. The default value
1599 depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding
1600 Psi4 option is DDX_RADII_SCALING.
1601
1602 These parameter names are automatically mapped to appropriate DDX keywords.
1603
1604 You may specify the solvent either by directly providing a dielectric constant using
1605 'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter.
1606 The solvent name must be a valid named supported by the DDX module. For example:
1607 water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent'
1608 parameter is ignored for non-zero value of 'solvent' option.
1609
1610 The DDX module contains a variety of addtional options to confgure the calculation
1611 of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may
1612 use '--psi4OptionsParams' to modify values for additional DDX options.
1613 --psi4DDXListSolvents
1614 List solvent names, along with dielectric values, supported by the DDX module
1615 for the calculation of solvent energy without performing any calculation.
1616 -q, --quiet <yes or no> [default: no]
1617 Use quiet mode. The warning and information messages will not be printed.
1618 -r, --reference <text> [default: auto]
1619 Reference wave function to use for energy calculation. Default: RHF or UHF.
1620 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules
1621 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell
1622 molecules with unpaired electrons.
1623
1624 The specified value must be a valid Psi4 reference wave function. No validation
1625 is performed. For example: ROHF, CUHF, RKS, etc.
1626
1627 The spin multiplicity determines the default value of reference wave function
1628 for input molecules. It is calculated from number of free radical electrons using
1629 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
1630 electron spin. The total spin is 1/2 the number of free radical electrons in a
1631 molecule. The value of 'SpinMultiplicity' molecule property takes precedence
1632 over the calculated value of spin multiplicity.
1633 --recenterAndReorient <yes or no> [default: auto]
1634 Recenter and reorient a molecule during creation of a Psi4 molecule from
1635 a geometry string. Default: 'No' during 'No' value of '--confGeneration';
1636 Otherwise, 'Yes'.
1637
1638 The 'No' values allows the minimization of a molecule in its initial 3D
1639 coordinate space in input file or conformer ensemble.
1640 --symmetrize <yes or no> [default: auto]
1641 Symmetrize molecules before energy minimization. Default: 'Yes' during
1642 'Yes' value of '--recenterAndReorient'; Otherwise, 'No'. The psi4 function,
1643 psi4mol.symmetrize( SymmetrizeTolerance), is called to symmetrize
1644 the molecule before calling psi4.optimize().
1645
1646 The 'No' value of '--symmetrize' during 'Yes' value of '--recenterAndReorient'
1647 may cause psi4.optimize() to fail with a 'Point group changed...' error
1648 message.
1649 --symmetrizeTolerance <number> [default: 0.01]
1650 Symmetry tolerance for '--symmetrize'.
1651 -w, --workingdir <dir>
1652 Location of working directory which defaults to the current directory.
1653
1654 Examples:
1655 To generate an initial conformer ensemble of up to 50 conformations using a
1656 combination of ETKDGv2 distance geometry methodology, applying RMSD cutoff
1657 of 0.5 and MMFF forcefield minimization, followed by energy minimization
1658 using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and sulfur containing
1659 molecules in a SMILES file and write out a SD file containing minimum energy
1660 structure corresponding to each molecule, type:
1661
1662 % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
1663
1664 To run the first example in a quiet mode and write out a SD file, type:
1665
1666 % Psi4PerformMinimization.py -q yes -i Psi4Sample.smi -o
1667 Psi4SampleOut.sdf
1668
1669 To run the first example along with performing energy mimimization in solution
1670 using default DDX solvation parameters for water and write out a SD file, type:
1671
1672 % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf
1673 --psi4DDXSolvation yes
1674
1675 To run the first example in multiprocessing mode at molecules level on all
1676 available CPUs without loading all data into memory and write out a SD file,
1677 type:
1678
1679 % Psi4PerformMinimization.py --mp yes -i Psi4Sample.smi -o
1680 Psi4SampleOut.sdf
1681
1682 To run the first example in multiprocessing mode at conformers level on all
1683 available CPUs without loading all data into memory and write out a SD file,
1684 type:
1685
1686 % Psi4PerformMinimization.py --mp yes --mpLevel Conformers
1687 -i Psi4Sample.smi -o Psi4SampleOut.sdf
1688
1689 To run the first example in multiprocessing mode at molecules level on all
1690 available CPUs by loading all data into memory and write out a SD file, type:
1691
1692 % Psi4PerformMinimization.py --mp yes --mpParams "inputDataMode,
1693 InMemory" -i Psi4Sample.smi -o Psi4SampleOut.sdf
1694
1695 To run the first example in multiprocessing mode at molecules level on specific
1696 number of CPUs and chunk size without loading all data into memory and write
1697 out a SD file, type:
1698
1699 % Psi4PerformMinimization.py --mp yes --mpParams "inputDataMode,Lazy,
1700 numProcesses,4,chunkSize,8" -i Psi4Sample.smi -o Psi4SampleOut.sdf
1701
1702 To generate an initial conformer ensemble of up to 20 conformations using a
1703 combination of ETKDGv2 distance geometry methodology and MMFF94s forcefield
1704 minimization followed by energy minimization for a maxium of 20 iterations
1705 using B3LYP/6-31+G** molecules in a SMILES file and write out a SD file
1706 containing minimum energy structure along with energy in specific units,
1707 type:
1708
1709 % Psi4PerformMinimization.py --confGeneration yes --confParams
1710 "confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s,
1711 maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP
1712 -b "6-31+G**" --maxIters 20 -i Psi4Sample.smi -o Psi4SampleOut.sdf
1713
1714 To minimize molecules in a 3D files using B3LYP/6-31G** and B3LYP/6-31+G**
1715 for non-sulfur and sulfur containing molecules for a maximum of 25 iterations
1716 without generating any conformations and write out a SD file containing
1717 minimum energy structures corresponding to each molecule, type:
1718
1719 % Psi4PerformMinimization.py --confGeneration no --maxIters 25
1720 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
1721
1722 To run the first example for molecules in a CSV SMILES file, SMILES strings
1723 in column 1, name column 2, and write out a SD file, type:
1724
1725 % Psi4PerformMinimization.py --infileParams "smilesDelimiter,comma,
1726 smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2"
1727 -i Psi4Sample.csv -o Psi4SampleOut.sdf
1728
1729 Author:
1730
1731 Manish Sud(msud@san.rr.com)
1732
1733 Contributor:
1734 Francois Berenger
1735
1736 See also:
1737 Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py
1738
1739 Copyright:
1740 Copyright (C) 2025 Manish Sud. All rights reserved.
1741
1742 The functionality available in this script is implemented using Psi4, an
1743 open source quantum chemistry software package, and RDKit, an open
1744 source toolkit for cheminformatics developed by Greg Landrum.
1745
1746 This file is part of MayaChemTools.
1747
1748 MayaChemTools is free software; you can redistribute it and/or modify it under
1749 the terms of the GNU Lesser General Public License as published by the Free
1750 Software Foundation; either version 3 of the License, or (at your option) any
1751 later version.
1752
1753 """
1754
1755 if __name__ == "__main__":
1756 main()