1 #!/bin/env python
2 #
3 # File: Psi4CalculatePartialCharges.py
4 # Author: Manish Sud <msud@san.rr.com>
5 #
6 # Copyright (C) 2026 Manish Sud. All rights reserved.
7 #
8 # The functionality available in this script is implemented using Psi4, an
9 # open source quantum chemistry software package, and RDKit, an open
10 # source toolkit for cheminformatics developed by Greg Landrum.
11 #
12 # This file is part of MayaChemTools.
13 #
14 # MayaChemTools is free software; you can redistribute it and/or modify it under
15 # the terms of the GNU Lesser General Public License as published by the Free
16 # Software Foundation; either version 3 of the License, or (at your option) any
17 # later version.
18 #
19 # MayaChemTools is distributed in the hope that it will be useful, but without
20 # any warranty; without even the implied warranty of merchantability of fitness
21 # for a particular purpose. See the GNU Lesser General Public License for more
22 # details.
23 #
24 # You should have received a copy of the GNU Lesser General Public License
25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
27 # Boston, MA, 02111-1307, USA.
28 #
29
30 from __future__ import print_function
31
32 import os
33 import sys
34 import time
35 import re
36 import shutil
37 import multiprocessing as mp
38
39 # Psi4 imports...
40 if hasattr(shutil, "which") and shutil.which("psi4") is None:
41 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
42 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
43 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
44 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
45 sys.stderr.write("Psi4 environment and try again.\n\n")
46
47 # RDKit imports...
48 try:
49 from rdkit import rdBase
50 from rdkit import Chem
51 except ImportError as ErrMsg:
52 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
53 sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
54 sys.exit(1)
55
56 # MayaChemTools imports...
57 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
58 try:
59 from docopt import docopt
60 import MiscUtil
61 import Psi4Util
62 import RDKitUtil
63 except ImportError as ErrMsg:
64 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
65 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
66 sys.exit(1)
67
68 ScriptName = os.path.basename(sys.argv[0])
69 Options = {}
70 OptionsInfo = {}
71
72
73 def main():
74 """Start execution of the script."""
75
76 MiscUtil.PrintInfo(
77 "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
78 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
79 )
80
81 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
82
83 # Retrieve command line arguments and options...
84 RetrieveOptions()
85
86 # Process and validate command line arguments and options...
87 ProcessOptions()
88
89 # Perform actions required by the script...
90 CalculatePartialCharges()
91
92 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
93 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
94
95
96 def CalculatePartialCharges():
97 """Calculate partial atomic charges."""
98
99 CheckSupportForRESPCalculations()
100
101 # Setup a molecule reader...
102 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
103 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
104
105 # Set up a molecule writer...
106 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
107 if Writer is None:
108 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
109 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
110
111 MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer)
112
113 if Writer is not None:
114 Writer.close()
115
116 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
117 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
118 MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount)
119 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
120
121
122 def ProcessMolecules(Mols, Writer):
123 """Process molecules and calculate partial charges."""
124
125 if OptionsInfo["MPMode"]:
126 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
127 else:
128 return ProcessMoleculesUsingSingleProcess(Mols, Writer)
129
130
131 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
132 """Process molecules and calculate partial charges using a single process."""
133
134 # Intialize Psi4...
135 MiscUtil.PrintInfo("\nInitializing Psi4...")
136 Psi4Handle = Psi4Util.InitializePsi4(
137 Psi4RunParams=OptionsInfo["Psi4RunParams"],
138 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
139 PrintVersion=True,
140 PrintHeader=True,
141 )
142 OptionsInfo["psi4"] = Psi4Handle
143
144 # Initialize RESP...
145 OptionsInfo["resp"] = SetupRESP()
146
147 MiscUtil.PrintInfo("\nCalculating partial atomic charges...")
148
149 (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
150 for Mol in Mols:
151 MolCount += 1
152
153 if not CheckAndValidateMolecule(Mol, MolCount):
154 continue
155
156 # Setup a Psi4 molecule...
157 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount)
158 if Psi4Mol is None:
159 continue
160
161 ValidMolCount += 1
162
163 # Retrieve charges...
164 CalcStatus, PartialCharges = CalculateMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolCount)
165
166 if not CalcStatus:
167 if not OptionsInfo["QuietMode"]:
168 MolName = RDKitUtil.GetMolName(Mol, MolCount)
169 MiscUtil.PrintWarning("Failed to calculate partial atomic charges for molecule %s" % MolName)
170
171 CalcFailedCount += 1
172 continue
173
174 WriteMolPartialCharges(Writer, Mol, PartialCharges)
175
176 return (MolCount, ValidMolCount, CalcFailedCount)
177
178
179 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
180 """Process molecules and calculate partial charges using a multiprocessing."""
181
182 MiscUtil.PrintInfo("\nCalculating partial atomic charges using multiprocessing...")
183
184 MPParams = OptionsInfo["MPParams"]
185
186 # Setup data for initializing a worker process...
187 InitializeWorkerProcessArgs = (
188 MiscUtil.ObjectToBase64EncodedString(Options),
189 MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
190 )
191
192 # Setup a encoded mols data iterable for a worker process...
193 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
194
195 # Setup process pool along with data initialization for each process...
196 if not OptionsInfo["QuietMode"]:
197 MiscUtil.PrintInfo(
198 "\nConfiguring multiprocessing using %s method..."
199 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
200 )
201 MiscUtil.PrintInfo(
202 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
203 % (
204 MPParams["NumProcesses"],
205 MPParams["InputDataMode"],
206 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
207 )
208 )
209
210 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
211
212 # Start processing...
213 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
214 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
215 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
216 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
217 else:
218 MiscUtil.PrintError(
219 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
220 )
221
222 # Print out Psi4 version in the main process...
223 MiscUtil.PrintInfo("\nInitializing Psi4...\n")
224 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
225 OptionsInfo["psi4"] = Psi4Handle
226
227 (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
228 for Result in Results:
229 MolCount += 1
230 MolIndex, EncodedMol, CalcStatus, PartialCharges = Result
231
232 if EncodedMol is None:
233 continue
234
235 ValidMolCount += 1
236
237 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
238
239 if not CalcStatus:
240 if not OptionsInfo["QuietMode"]:
241 MolName = RDKitUtil.GetMolName(Mol, MolCount)
242 MiscUtil.PrintWarning("Failed to calculate partial atomic charges for molecule %s" % MolName)
243
244 CalcFailedCount += 1
245 continue
246
247 WriteMolPartialCharges(Writer, Mol, PartialCharges)
248
249 return (MolCount, ValidMolCount, CalcFailedCount)
250
251
252 def InitializeWorkerProcess(*EncodedArgs):
253 """Initialize data for a worker process."""
254
255 global Options, OptionsInfo
256
257 if not OptionsInfo["QuietMode"]:
258 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
259
260 # Decode Options and OptionInfo...
261 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
262 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
263
264 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
265 # output files for each process...
266 OptionsInfo["Psi4Initialized"] = False
267
268
269 def InitializePsi4ForWorkerProcess():
270 """Initialize Psi4 for a worker process."""
271
272 if OptionsInfo["Psi4Initialized"]:
273 return
274
275 OptionsInfo["Psi4Initialized"] = True
276
277 # Update output file...
278 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
279 OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
280 )
281
282 # Intialize Psi4...
283 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
284 Psi4RunParams=OptionsInfo["Psi4RunParams"],
285 Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
286 PrintVersion=False,
287 PrintHeader=True,
288 )
289
290
291 def WorkerProcess(EncodedMolInfo):
292 """Process data for a worker process."""
293
294 if not OptionsInfo["Psi4Initialized"]:
295 InitializePsi4ForWorkerProcess()
296
297 MolIndex, EncodedMol = EncodedMolInfo
298
299 CalcStatus = False
300 PartialCharges = None
301
302 if EncodedMol is None:
303 return [MolIndex, None, CalcStatus, PartialCharges]
304
305 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
306 MolCount = MolIndex + 1
307
308 if not CheckAndValidateMolecule(Mol, MolCount):
309 return [MolIndex, None, CalcStatus, PartialCharges]
310
311 # Setup a Psi4 molecule...
312 Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount)
313 if Psi4Mol is None:
314 return [MolIndex, None, CalcStatus, PartialCharges]
315
316 CalcStatus, PartialCharges = CalculateMolPartialCharges(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount)
317
318 return [
319 MolIndex,
320 RDKitUtil.MolToBase64EncodedMolString(
321 Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
322 ),
323 CalcStatus,
324 PartialCharges,
325 ]
326
327
328 def CalculateMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
329 """Calculate partial atomic charges for a molecule."""
330
331 if OptionsInfo["RESPChargesTypeMode"]:
332 return CalculateRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum)
333 else:
334 return CalculateNonRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum)
335
336
337 def CalculateNonRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
338 """Calculate non-RESP partial atomic charges for a molecule."""
339
340 Status = False
341 PartialCharges = []
342
343 # Setup reference wave function...
344 Reference = SetupReferenceWavefunction(Mol)
345 Psi4Handle.set_options({"Reference": Reference})
346
347 # Setup method name and basis set...
348 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
349
350 # Calculate single point energy to setup a wavefunction...
351 Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy(
352 Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
353 )
354
355 if not Status:
356 PerformPsi4Cleanup(Psi4Handle)
357 return (False, PartialCharges)
358
359 # Calculate atomic point charges...
360 try:
361 Psi4Handle.oeprop(WaveFunction, OptionsInfo["ChargesPropType"])
362 except Exception as ErrMsg:
363 if not OptionsInfo["QuietMode"]:
364 MiscUtil.PrintWarning("Failed to calculate partial atomic charges:\n%s" % ErrMsg)
365 PerformPsi4Cleanup(Psi4Handle)
366 return (False, PartialCharges)
367
368 AtomicPointCharges = WaveFunction.atomic_point_charges().np.tolist()
369
370 # Clean up...
371 PerformPsi4Cleanup(Psi4Handle)
372
373 # Format charges...
374 PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in AtomicPointCharges]
375
376 return (Status, PartialCharges)
377
378
379 def CalculateRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
380 """Calculate RESP partial atomic charges for a molecule."""
381
382 PartialCharges = []
383
384 RESPHandle = OptionsInfo["resp"]
385 RESPOptions = OptionsInfo["ChargesRespOtions"]
386
387 # Setup method name and basis set...
388 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
389 RESPOptions["METHOD_ESP"] = MethodName
390 RESPOptions["BASIS_ESP"] = BasisSet
391
392 # Calculate RESP charges...
393 try:
394 RESPCalcResults = RESPHandle.resp([Psi4Mol], RESPOptions)
395 except Exception as ErrMsg:
396 if not OptionsInfo["QuietMode"]:
397 MiscUtil.PrintWarning("Failed to calculate RESP partial atomic charges:\n%s" % ErrMsg)
398 RemoveRESPGridFiles(MolNum)
399 return (False, PartialCharges)
400
401 # ESPCharges = RESPCalcResults[0]
402 RESPCharges = RESPCalcResults[1]
403
404 PerformRESPCleanup(MolNum)
405
406 # Format charges...
407 PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in RESPCharges]
408
409 return (True, PartialCharges)
410
411
412 def PerformRESPCleanup(MolNum):
413 """Peform RESP cleanup."""
414
415 if OptionsInfo["ChargesRespParams"]["RemoveGridFiles"]:
416 RemoveRESPGridFiles(MolNum)
417 else:
418 RenameRESPGridFiles(MolNum)
419
420
421 def RemoveRESPGridFiles(MolNum):
422 """Remove RESP grid files."""
423
424 try:
425 for GridFile in ["1_default_grid.dat", "1_default_grid_esp.dat", "results.out"]:
426 if os.path.isfile(GridFile):
427 os.remove(GridFile)
428 except Exception as ErrMsg:
429 if not OptionsInfo["QuietMode"]:
430 MiscUtil.PrintWarning("Failed to remove RESP results/grid files: %s\n" % ErrMsg)
431
432
433 def RenameRESPGridFiles(MolNum):
434 """Rename RESP grid files."""
435
436 try:
437 MolPrefix = "Mol%s" % MolNum
438 GridFiles = ["1_default_grid.dat", "1_default_grid_esp.dat", "results.out"]
439 NewGridFiles = ["%s_grid.dat" % MolPrefix, "%s_grid_esp.dat" % MolPrefix, "%s_resp_results.out" % MolPrefix]
440 for GridFile, NewGridFile in zip(GridFiles, NewGridFiles):
441 if os.path.isfile(GridFile):
442 shutil.move(GridFile, NewGridFile)
443 except Exception as ErrMsg:
444 if not OptionsInfo["QuietMode"]:
445 MiscUtil.PrintWarning("Failed to move RESP results/grid files: %s\n" % ErrMsg)
446
447
448 def WriteMolPartialCharges(Writer, Mol, PartialCharges):
449 """Write out partial atomic charges for a molecule."""
450
451 if PartialCharges is None:
452 return
453
454 if OptionsInfo["AtomAliasesFormatMode"]:
455 for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges):
456 Atom.SetProp("molFileAlias", PartialCharge)
457 else:
458 ChargesValues = "\n".join(PartialCharges)
459 Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues)
460
461 Writer.write(Mol)
462
463
464 def SetupPsi4Mol(Psi4Handle, Mol, MolCount=None):
465 """Setup a Psi4 molecule object."""
466
467 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom=True, NoReorient=True)
468
469 try:
470 Psi4Mol = Psi4Handle.geometry(MolGeometry)
471 except Exception as ErrMsg:
472 Psi4Mol = None
473 if not OptionsInfo["QuietMode"]:
474 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
475 MolName = RDKitUtil.GetMolName(Mol, MolCount)
476 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
477
478 return Psi4Mol
479
480
481 def PerformPsi4Cleanup(Psi4Handle):
482 """Perform clean up."""
483
484 # Clean up after Psi4 run ...
485 Psi4Handle.core.clean()
486
487 # Clean up any leftover scratch files...
488 if OptionsInfo["MPMode"]:
489 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
490
491
492 def SetupRESP():
493 """Load resp and return its handle."""
494
495 if not OptionsInfo["RESPChargesTypeMode"]:
496 return None
497
498 try:
499 import resp
500 except ImportError as ErrMsg:
501 sys.stderr.write("\nFailed to import Psi4 module/package resp: %s\n" % ErrMsg)
502 sys.stderr.write("Check/update your Psi4 environment and try again.\n\n")
503 sys.exit(1)
504
505 return resp
506
507
508 def CheckAndValidateMolecule(Mol, MolCount=None):
509 """Validate molecule for Psi4 calculations."""
510
511 if Mol is None:
512 if not OptionsInfo["QuietMode"]:
513 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
514 return False
515
516 MolName = RDKitUtil.GetMolName(Mol, MolCount)
517 if not OptionsInfo["QuietMode"]:
518 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
519
520 if RDKitUtil.IsMolEmpty(Mol):
521 if not OptionsInfo["QuietMode"]:
522 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
523 return False
524
525 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
526 if not OptionsInfo["QuietMode"]:
527 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
528 return False
529
530 # Check for 3D flag...
531 if not Mol.GetConformer().Is3D():
532 if not OptionsInfo["QuietMode"]:
533 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
534
535 # Check for missing hydrogens...
536 if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
537 if not OptionsInfo["QuietMode"]:
538 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
539
540 return True
541
542
543 def SetupMethodNameAndBasisSet(Mol):
544 """Setup method name and basis set."""
545
546 MethodName = OptionsInfo["MethodName"]
547 if OptionsInfo["MethodNameAuto"]:
548 MethodName = "B3LYP"
549
550 BasisSet = OptionsInfo["BasisSet"]
551 if OptionsInfo["BasisSetAuto"]:
552 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
553
554 return (MethodName, BasisSet)
555
556
557 def SetupReferenceWavefunction(Mol):
558 """Setup reference wavefunction."""
559
560 Reference = OptionsInfo["Reference"]
561 if OptionsInfo["ReferenceAuto"]:
562 Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
563
564 return Reference
565
566
567 def ProcessOptionChargesRespParameters():
568 """Process charges RSEP parameters option."""
569
570 ParamsOptionName = "--chargesRespParams"
571 ParamsOptionValue = Options["--chargesRespParams"]
572
573 VDWScaleFactors = [1.4, 1.6, 1.8, 2.0]
574 VDWRadii = {
575 "H": 1.20,
576 "HE": 1.20,
577 "LI": 1.37,
578 "BE": 1.45,
579 "B": 1.45,
580 "C": 1.50,
581 "N": 1.50,
582 "O": 1.40,
583 "F": 1.35,
584 "NE": 1.30,
585 "NA": 1.57,
586 "MG": 1.36,
587 "AL": 1.24,
588 "SI": 1.17,
589 "P": 1.80,
590 "S": 1.75,
591 "CL": 1.70,
592 }
593
594 ParamsInfo = {
595 "MaxIter": 25,
596 "RestrainHydrogens": False,
597 "RemoveGridFiles": True,
598 "RespA": 0.0005,
599 "RespB": 0.1,
600 "Tolerance": 1e-5,
601 "VDWRadii": VDWRadii,
602 "VDWScaleFactors": VDWScaleFactors,
603 "VDWPointDensity": 1.0,
604 }
605
606 if re.match("^auto$", ParamsOptionValue, re.I):
607 SetupChargesRespOptions(ParamsInfo)
608 return
609
610 # Setup a canonical paramater names...
611 ValidParamNames = []
612 CanonicalParamNamesMap = {}
613 for ParamName in sorted(ParamsInfo):
614 ValidParamNames.append(ParamName)
615 CanonicalParamNamesMap[ParamName.lower()] = ParamName
616
617 ParamsOptionValue = ParamsOptionValue.strip()
618 if not ParamsOptionValue:
619 MiscUtil.PrintError('No valid parameter name and value pairs specified using "%s" option' % ParamsOptionName)
620
621 ParamsOptionValueWords = ParamsOptionValue.split(",")
622 if len(ParamsOptionValueWords) % 2:
623 MiscUtil.PrintError(
624 'The number of comma delimited paramater names and values, %d, specified using "%s" option must be an even number.'
625 % (len(ParamsOptionValueWords), ParamsOptionName)
626 )
627
628 # Validate paramater name and value pairs...
629 for Index in range(0, len(ParamsOptionValueWords), 2):
630 Name = ParamsOptionValueWords[Index].strip()
631 Value = ParamsOptionValueWords[Index + 1].strip()
632
633 CanonicalName = Name.lower()
634 if CanonicalName not in CanonicalParamNamesMap:
635 MiscUtil.PrintError(
636 'The parameter name, %s, specified using "%s" is not a valid name. Supported parameter names: %s'
637 % (Name, ParamsOptionName, " ".join(ValidParamNames))
638 )
639
640 ParamName = CanonicalParamNamesMap[CanonicalName]
641 ParamValue = Value
642
643 if re.match("^MaxIter$", ParamName, re.I):
644 if not MiscUtil.IsInteger(Value):
645 MiscUtil.PrintError(
646 'The parameter value, %s, specified for parameter name, %s, using "%s" option must be an integer.'
647 % (Value, Name, ParamsOptionName)
648 )
649 Value = int(Value)
650 if Value <= 0:
651 MiscUtil.PrintError(
652 'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: > 0'
653 % (Value, Name, ParamsOptionName)
654 )
655 ParamValue = Value
656 elif re.match("^(RestrainHydrogens|RemoveGridFiles)$", ParamName, re.I):
657 if not re.match("^(yes|no)$", Value, re.I):
658 MiscUtil.PrintError(
659 'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: yes or no'
660 % (Value, Name, ParamsOptionName)
661 )
662 ParamValue = True if re.match("^yes$", Value, re.I) else False
663 elif re.match("^(RespA|RespB|VDWPointDensity)$", ParamName, re.I):
664 if not MiscUtil.IsNumber(Value):
665 MiscUtil.PrintError(
666 'The parameter value, %s, specified for parameter name, %s, using "%s" option must be a float.'
667 % (Value, Name, ParamsOptionName)
668 )
669 Value = float(Value)
670 if Value <= 0:
671 MiscUtil.PrintError(
672 'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: > 0'
673 % (Value, Name, ParamsOptionName)
674 )
675 ParamValue = Value
676 elif re.match("^Tolerance$", ParamName, re.I):
677 if not MiscUtil.IsNumber(Value):
678 MiscUtil.PrintError(
679 'The parameter value, %s, specified for parameter name, %s, using "%s" option must be a float.'
680 % (Value, Name, ParamsOptionName)
681 )
682 Value = float(Value)
683 if Value < 0:
684 MiscUtil.PrintError(
685 'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: >= 0'
686 % (Value, Name, ParamsOptionName)
687 )
688 ParamValue = Value
689 elif re.match("^VDWScaleFactors$", ParamName, re.I):
690 ScaleFactorsValue = Value.strip()
691 if not ScaleFactorsValue:
692 MiscUtil.PrintError(
693 'No parameter value specified for parameter name, %s, using "%s" option.' % (Name, ParamsOptionName)
694 )
695 ScaleFactorsWords = ScaleFactorsValue.split()
696
697 ScaleFactors = []
698 LastScaleFactor = 0.0
699 for ScaleFactor in ScaleFactorsWords:
700 if not MiscUtil.IsNumber(ScaleFactor):
701 MiscUtil.PrintError(
702 'The value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option must be a float.'
703 % (ScaleFactor, Value, Name, ParamsOptionName)
704 )
705 ScaleFactor = float(ScaleFactor)
706 if ScaleFactor <= 0:
707 MiscUtil.PrintError(
708 'The value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: > 0'
709 % (ScaleFactor, Value, Name, ParamsOptionName)
710 )
711 if len(ScaleFactors):
712 if ScaleFactor <= LastScaleFactor:
713 MiscUtil.PrintError(
714 'The value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. It must be greater than the previous value, %s, specified for the scale factor.'
715 % (ScaleFactor, Value, Name, ParamsOptionName, LastScaleFactor)
716 )
717
718 LastScaleFactor = ScaleFactor
719 ScaleFactors.append(ScaleFactor)
720
721 ParamValue = ScaleFactors
722 elif re.match("^VDWRadii$", ParamName, re.I):
723 RadiiValue = Value.strip()
724 if not RadiiValue:
725 MiscUtil.PrintError(
726 'No parameter value specified for parameter name, %s, using "%s" option.' % (Name, ParamsOptionName)
727 )
728 RadiiWords = RadiiValue.split()
729 if len(RadiiWords) % 2:
730 MiscUtil.PrintError(
731 'The number of space delimited values, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not valid. It must be an even number.'
732 % (len(RadiiWords), Value, Name, ParamsOptionName)
733 )
734
735 for RadiiWordsIndex in range(0, len(RadiiWords), 2):
736 ElementSymbol = RadiiWords[RadiiWordsIndex].upper()
737 VDWRadius = RadiiWords[RadiiWordsIndex + 1]
738
739 if not MiscUtil.IsNumber(VDWRadius):
740 MiscUtil.PrintError(
741 'The vdw radius value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid.'
742 % (VDWRadius, Value, Name, ParamsOptionName)
743 )
744
745 if not RDKitUtil.IsValidElementSymbol(ElementSymbol.capitalize()):
746 MiscUtil.PrintWarning(
747 'The element symbol, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid.'
748 % (ElementSymbol, Value, Name, ParamsOptionName)
749 )
750 VDWRadii[ElementSymbol] = float(VDWRadius)
751
752 ParamValue = VDWRadii
753 else:
754 ParamValue = Value
755
756 # Set value...
757 ParamsInfo[ParamName] = ParamValue
758
759 SetupChargesRespOptions(ParamsInfo)
760
761
762 def SetupChargesRespOptions(ParamsInfo):
763 """Setup options for calculating RESP charges."""
764
765 # Initialize ESP options...
766 ChargesRespOtions = {}
767
768 ChargesRespOtions["METHOD_ESP"] = None
769 ChargesRespOtions["BASIS_ESP"] = None
770
771 # Setup ESP options...
772 ParamNameToESPOptionID = {
773 "MaxIter": "MAX_IT",
774 "RespA": "RESP_A",
775 "RespB": "RESP_B",
776 "Tolerance": "TOLER",
777 "VDWRadii": "VDW_RADII",
778 "VDWScaleFactors": "VDW_SCALE_FACTORS",
779 "VDWPointDensity": "VDW_POINT_DENSITY",
780 }
781
782 for ParamName in ParamNameToESPOptionID:
783 ESPOptionID = ParamNameToESPOptionID[ParamName]
784 ChargesRespOtions[ESPOptionID] = ParamsInfo[ParamName]
785
786 # Setup IHFREE option...
787 ChargesRespOtions["IHFREE"] = False if ParamsInfo["RestrainHydrogens"] else True
788
789 OptionsInfo["ChargesRespParams"] = ParamsInfo
790 OptionsInfo["ChargesRespOtions"] = ChargesRespOtions
791
792
793 def CheckSupportForRESPCalculations():
794 """Check support for RESP calculations."""
795
796 if not OptionsInfo["MPMode"]:
797 return
798
799 if not OptionsInfo["RESPChargesTypeMode"]:
800 return
801
802 MiscUtil.PrintInfo("")
803 MiscUtil.PrintError(
804 "Multiprocessing is not supported during the calculation of RSEP partial atomic\ncharges. The RESP module is not conducive for multiprocessing. The names of the results\nand grid output files are not unique for molecules during the RESP calculations spread\nacross multiple processes."
805 )
806
807
808 def ProcessOptions():
809 """Process and validate command line arguments and options."""
810
811 MiscUtil.PrintInfo("Processing options...")
812
813 # Validate options...
814 ValidateOptions()
815
816 OptionsInfo["Infile"] = Options["--infile"]
817 ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
818 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
819 "--infileParams",
820 Options["--infileParams"],
821 InfileName=Options["--infile"],
822 ParamsDefaultInfo=ParamsDefaultInfoOverride,
823 )
824
825 OptionsInfo["Outfile"] = Options["--outfile"]
826 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
827 "--outfileParams", Options["--outfileParams"]
828 )
829
830 OptionsInfo["Overwrite"] = Options["--overwrite"]
831
832 # Method, basis set, and reference wavefunction...
833 OptionsInfo["BasisSet"] = Options["--basisSet"]
834 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
835
836 OptionsInfo["MethodName"] = Options["--methodName"]
837 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
838
839 OptionsInfo["Reference"] = Options["--reference"]
840 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
841
842 # Run and options parameters...
843 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
844 "--psi4OptionsParams", Options["--psi4OptionsParams"]
845 )
846 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
847 "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
848 )
849
850 ChargesType = Options["--chargesType"]
851 ChargesPropType = None
852 RESPChargesTypeMode = False
853 if re.match("^Mulliken$", ChargesType, re.I):
854 ChargesType = "Mulliken"
855 ChargesPropType = "MULLIKEN_CHARGES"
856 elif re.match("^Lowdin$", ChargesType, re.I):
857 ChargesType = "Lowdin"
858 ChargesPropType = "LOWDIN_CHARGES"
859 elif re.match("^RESP$", ChargesType, re.I):
860 ChargesType = "RESP"
861 ChargesPropType = None
862 RESPChargesTypeMode = True
863 else:
864 MiscUtil.PrintError("The value, %s, specified for charge mode is not supported. " % Options["--chargesType"])
865
866 OptionsInfo["ChargesType"] = ChargesType
867 OptionsInfo["ChargesPropType"] = ChargesPropType
868 OptionsInfo["RESPChargesTypeMode"] = RESPChargesTypeMode
869
870 ProcessOptionChargesRespParameters()
871
872 AtomAliasesFormatMode = True
873 if re.match("^DataField", Options["--chargesSDFormat"], re.I):
874 AtomAliasesFormatMode = False
875 OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode
876
877 DataFieldLabel = Options["--dataFieldLabel"]
878 if re.match("^auto$", DataFieldLabel, re.I):
879 DataFieldLabel = "Psi4_%s_Charges (a.u.)" % Options["--chargesType"]
880 OptionsInfo["DataFieldLabel"] = DataFieldLabel
881
882 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
883 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
884
885 OptionsInfo["Precision"] = int(Options["--precision"])
886
887 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
888
889
890 def RetrieveOptions():
891 """Retrieve command line arguments and options."""
892
893 # Get options...
894 global Options
895 Options = docopt(_docoptUsage_)
896
897 # Set current working directory to the specified directory...
898 WorkingDir = Options["--workingdir"]
899 if WorkingDir:
900 os.chdir(WorkingDir)
901
902 # Handle examples option...
903 if "--examples" in Options and Options["--examples"]:
904 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
905 sys.exit(0)
906
907
908 def ValidateOptions():
909 """Validate option values."""
910
911 MiscUtil.ValidateOptionTextValue("-c, --chargesType", Options["--chargesType"], "Mulliken Lowdin RESP")
912 MiscUtil.ValidateOptionTextValue("--chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField")
913
914 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
915 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
916
917 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
918 MiscUtil.ValidateOptionsOutputFileOverwrite(
919 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
920 )
921 MiscUtil.ValidateOptionsDistinctFileNames(
922 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
923 )
924
925 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
926 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
927
928 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
929
930
931 # Setup a usage string for docopt...
932 _docoptUsage_ = """
933 Psi4CalculatePartialCharges.py - Calculate partial atomic charges
934
935 Usage:
936 Psi4CalculatePartialCharges.py [--basisSet <text>] [--chargesType <Mulliken or Lowdin>] [--chargesRespParams <Name,Value,...>]
937 [--chargesSDFormat <AtomAliases or DataField>] [--dataFieldLabel <text>] [--infileParams <Name,Value,...>]
938 [--methodName <text>] [--mp <yes or no>] [--mpParams <Name, Value,...>] [ --outfileParams <Name,Value,...> ]
939 [--overwrite] [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>]
940 [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile>
941 Psi4CalculatePartialCharges.py -h | --help | -e | --examples
942
943 Description:
944 Calculate partial atomic charges for molecules using a specified method name
945 and basis set. The molecules must have 3D coordinates in input file. The molecular
946 geometry is not optimized before the calculation. In addition, hydrogens must
947 be present for all molecules in input file. A single point energy calculation is
948 performed before calculating the partial atomic charges. The 3D coordinates
949 are not modified during the calculation.
950
951 A Psi4 XYZ format geometry string is automatically generated for each molecule
952 in input file. It contains atom symbols and 3D coordinates for each atom in a
953 molecule. In addition, the formal charge and spin multiplicity are present in the
954 the geometry string. These values are either retrieved from molecule properties
955 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
956 molecule.
957
958 The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
959
960 The supported output file formats are: SD (.sdf, .sd)
961
962 Options:
963 -b, --basisSet <text> [default: auto]
964 Basis set to use for calculating single point energy before partial atomic
965 charges. Default: 6-31+G** for sulfur containing molecules; Otherwise,
966 6-31G** [ Ref 150 ]. The specified value must be a valid Psi4 basis set.
967 No validation is performed.
968
969 The following list shows a representative sample of basis sets available
970 in Psi4:
971
972 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*,
973 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
974 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
975 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
976 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
977
978 -c, --chargesType <Mulliken, Lowdin, or RESP> [default: Mulliken]
979 Type of partial atomic charges to calculate. Possible values: Mulliken, Lowdin,
980 or RESP [ Ref 158 ]. Multiprocessing is not supported during the calculation
981 of RSEP charges. In addition, the RSEP calculation relies on the presence of
982 the RESP Psi4 Plugin in your environment.
983 --chargesRespParams <Name,Value,...> [default: auto]
984 A comma delimited list of parameter name and value pairs for calculating
985 RESP [ Ref 158 ] charges. A space is used as a delimiter for multiple values in
986 a name and value pair. The supported parameter names, along with
987 their default values, are shown below:
988
989 maxIter, 25
990 restrainHydrogens, no
991 removeGridFiles, yes
992 respA, 0.0005
993 respB, 0.1
994 tolerance, 1e-5
995 vdwRadii, auto
996 vdwScaleFactors, 1.4 1.6 1.8 2.0
997 vdwPointDensity, 1.0
998
999 maxIter: Maximum number of iterations to perform during charge fitting.
1000
1001 restrainHydrogens: Restrain hydrogens during charge fitting.
1002
1003 removeGridFiles: Keep or remove the following ESP grid and output files:
1004 1_default_grid.dat, 1_default_grid_esp.dat, results.out. The output
1005 files are removed by default. You may optionally keep the output files. The
1006 output files are automatically renamed to the following file for 'No' value of
1007 'removeGridFiles': Mol<MolNum>_grid.dat, Mol<MolNum>_grid_esp.dat,
1008 Mol<MolNum>_resp_results.out.
1009
1010 respA: Scale factor defining the asymptotic limits of the strength of the
1011 restraint.
1012
1013 respB: The 'tightness' of the hyperbola around its minimum for the
1014 restraint.
1015
1016 tolerance: Tolerance for charges during charge fitting to the ESP.
1017
1018 vdwRadii: vdw radii for elements in angstroms. It's a space delimited list of
1019 element symbol and radius value pairs. The default list is shown below:
1020
1021 H 1.20 He 1.20 Li 1.37 Be 1.45 B 1.45 C 1.50 N 1.50 O 1.40 F 1.35
1022 Ne 1.30 Na 1.57 Mg 1.36 Al 1.24 Si 1.17P 1.80 S 1.75 Cl 1.7
1023
1024 You may specify all or a subset of element symbol and vdw radius pairs to
1025 update the default values.
1026
1027 vdwScaleFactors: The vdw radii are scaled by the scale factors to set the
1028 grid points at the shells for calculating the ESP using quantum methodology.
1029 The default number of shells is 4 and corresponds to the number of vdw
1030 scale factors.The 'shell' points are written to a grid file for calculating the ESP.
1031
1032 vdwPointDensity: Approximate number of points to generate per square
1033 angstrom surface area.
1034 --chargesSDFormat <AtomAliases or DataField> [default: AtomAliases]
1035 Format for writing out partial atomic charges to SD file. Possible values:
1036 AtomAliases or DataField.
1037
1038 The charges are stored as atom property named 'molFileAlias' for
1039 'AtomAliases' format and may be retrieved using the RDKit function
1040 'GetProp' for atoms: Aotm.GetProp('molFileAliases').
1041
1042 The charges are stored under a data field label specified using
1043 '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the
1044 RDKit function 'GetProp' for molecules.
1045 -d, --dataFieldLabel <text> [default: auto]
1046 Data field label to use for storing charged in SD file during 'DataField' value
1047 of '-c, --chargesSDFormat'. Default: Psi4_<ChargesType>_Charges (a.u.)
1048 -e, --examples
1049 Print examples.
1050 -h, --help
1051 Print this help message.
1052 -i, --infile <infile>
1053 Input file name.
1054 --infileParams <Name,Value,...> [default: auto]
1055 A comma delimited list of parameter name and value pairs for reading
1056 molecules from files. The supported parameter names for different file
1057 formats, along with their default values, are shown below:
1058
1059 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
1060
1061 -m, --methodName <text> [default: auto]
1062 Method to use for calculating single point energy before partial atomic
1063 charges. Default: B3LYP [ Ref 150 ]. The specified value must be a valid
1064 Psi4 method name. No validation is performed.
1065
1066 The following list shows a representative sample of methods available
1067 in Psi4:
1068
1069 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
1070 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05,
1071 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
1072 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
1073
1074 --mp <yes or no> [default: no]
1075 Use multiprocessing.
1076
1077 By default, input data is retrieved in a lazy manner via mp.Pool.imap()
1078 function employing lazy RDKit data iterable. This allows processing of
1079 arbitrary large data sets without any additional requirements memory.
1080
1081 All input data may be optionally loaded into memory by mp.Pool.map()
1082 before starting worker processes in a process pool by setting the value
1083 of 'inputDataMode' to 'InMemory' in '--mpParams' option.
1084
1085 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
1086 data mode may adversely impact the performance. The '--mpParams' section
1087 provides additional information to tune the value of 'chunkSize'.
1088 --mpParams <Name,Value,...> [default: auto]
1089 A comma delimited list of parameter name and value pairs to configure
1090 multiprocessing.
1091
1092 The supported parameter names along with their default and possible
1093 values are shown below:
1094
1095 chunkSize, auto
1096 inputDataMode, Lazy [ Possible values: InMemory or Lazy ]
1097 numProcesses, auto [ Default: mp.cpu_count() ]
1098
1099 These parameters are used by the following functions to configure and
1100 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
1101 mp.Pool.imap().
1102
1103 The chunkSize determines chunks of input data passed to each worker
1104 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
1105 The default value of chunkSize is dependent on the value of 'inputDataMode'.
1106
1107 The mp.Pool.map() function, invoked during 'InMemory' input data mode,
1108 automatically converts RDKit data iterable into a list, loads all data into
1109 memory, and calculates the default chunkSize using the following method
1110 as shown in its code:
1111
1112 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
1113 if extra: chunkSize += 1
1114
1115 For example, the default chunkSize will be 7 for a pool of 4 worker processes
1116 and 100 data items.
1117
1118 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
1119 'lazy' RDKit data iterable to retrieve data as needed, without loading all the
1120 data into memory. Consequently, the size of input data is not known a priori.
1121 It's not possible to estimate an optimal value for the chunkSize. The default
1122 chunkSize is set to 1.
1123
1124 The default value for the chunkSize during 'Lazy' data mode may adversely
1125 impact the performance due to the overhead associated with exchanging
1126 small chunks of data. It is generally a good idea to explicitly set chunkSize to
1127 a larger value during 'Lazy' input data mode, based on the size of your input
1128 data and number of processes in the process pool.
1129
1130 The mp.Pool.map() function waits for all worker processes to process all
1131 the data and return the results. The mp.Pool.imap() function, however,
1132 returns the the results obtained from worker processes as soon as the
1133 results become available for specified chunks of data.
1134
1135 The order of data in the results returned by both mp.Pool.map() and
1136 mp.Pool.imap() functions always corresponds to the input data.
1137 -o, --outfile <outfile>
1138 Output file name.
1139 --outfileParams <Name,Value,...> [default: auto]
1140 A comma delimited list of parameter name and value pairs for writing
1141 molecules to files. The supported parameter names for different file
1142 formats, along with their default values, are shown below:
1143
1144 SD: kekulize,yes,forceV3000,no
1145
1146 --overwrite
1147 Overwrite existing files.
1148 --precision <number> [default: 4]
1149 Floating point precision for writing energy values.
1150 --psi4OptionsParams <Name,Value,...> [default: none]
1151 A comma delimited list of Psi4 option name and value pairs for setting
1152 global and module options. The names are 'option_name' for global options
1153 and 'module_name__option_name' for options local to a module. The
1154 specified option names must be valid Psi4 names. No validation is
1155 performed.
1156
1157 The specified option name and value pairs are processed and passed to
1158 psi4.set_options() as a dictionary. The supported value types are float,
1159 integer, boolean, or string. The float value string is converted into a float.
1160 The valid values for a boolean string are yes, no, true, false, on, or off.
1161 --psi4RunParams <Name,Value,...> [default: auto]
1162 A comma delimited list of parameter name and value pairs for configuring
1163 Psi4 jobs.
1164
1165 The supported parameter names along with their default and possible
1166 values are shown below:
1167
1168 MemoryInGB, 1
1169 NumThreads, 1
1170 OutputFile, auto [ Possible values: stdout, quiet, or FileName ]
1171 ScratchDir, auto [ Possivle values: DirName]
1172 RemoveOutputFile, yes [ Possible values: yes, no, true, or false]
1173
1174 These parameters control the runtime behavior of Psi4.
1175
1176 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
1177 is appended to output file name during multiprocessing as shown:
1178 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
1179 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
1180 all Psi4 output.
1181
1182 The default 'Yes' value of 'RemoveOutputFile' option forces the removal
1183 of any existing Psi4 before creating new files to append output from
1184 multiple Psi4 runs.
1185
1186 The option 'ScratchDir' is a directory path to the location of scratch
1187 files. The default value corresponds to Psi4 default. It may be used to
1188 override the deafult path.
1189 -q, --quiet <yes or no> [default: no]
1190 Use quiet mode. The warning and information messages will not be printed.
1191 -r, --reference <text> [default: auto]
1192 Reference wave function to use for calculating single point energy before
1193 partial atomic charges. Default: RHF or UHF. The default values are Restricted
1194 Hartree-Fock (RHF) for closed-shell molecules with all electrons paired and
1195 Unrestricted Hartree-Fock (UHF) for open-shell molecules with unpaired electrons.
1196
1197 The specified value must be a valid Psi4 reference wave function. No validation
1198 is performed. For example: ROHF, CUHF, RKS, etc.
1199
1200 The spin multiplicity determines the default value of reference wave function
1201 for input molecules. It is calculated from number of free radical electrons using
1202 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
1203 electron spin. The total spin is 1/2 the number of free radical electrons in a
1204 molecule. The value of 'SpinMultiplicity' molecule property takes precedence
1205 over the calculated value of spin multiplicity.
1206 -w, --workingdir <dir>
1207 Location of working directory which defaults to the current directory.
1208
1209 Examples:
1210 To calculate Mulliken partial atomic charges using B3LYP/6-31G** and
1211 B3LYP/6-31+G** for non-sulfur and sulfur containing molecules in a SD
1212 file with 3D structures, use RHF and UHF for closed-shell and open-shell
1213 molecules, and write a new SD file, type:
1214
1215 % Psi4CalculatePartialCharges.py -i Psi4Sample3D.sdf
1216 -o Psi4Sample3DOut.sdf
1217
1218 To run the first example for calculating RESP charges using a default set of
1219 parameters for the RESP calculation and write out a SD file, type:
1220
1221 % Psi4CalculatePartialCharges.py --chargesType RESP
1222 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
1223
1224 To run the first example for calculating RESP charges using an explicit set
1225 of specific parameters for the RESP calculation and write out a SD file, type:
1226
1227 % Psi4CalculatePartialCharges.py --chargesType RESP
1228 --chargesRespParams "respA, 0.0005, respB, 0.1, vdwScaleFactors,
1229 1.4 1.6 1.8 2.0" -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
1230
1231 To run the first example in multiprocessing mode on all available CPUs
1232 without loading all data into memory and write out a SD file, type:
1233
1234 % Psi4CalculatePartialCharges.py --mp yes -i Psi4Sample3D.sdf
1235 -o Psi4Sample3DOut.sdf
1236
1237 To run the first example in multiprocessing mode on all available CPUs
1238 by loading all data into memory and write out a SD file, type:
1239
1240 % Psi4CalculatePartialCharges.py --mp yes --mpParams "inputDataMode,
1241 InMemory" -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
1242
1243 To run the first example in multiprocessing mode on all available CPUs
1244 without loading all data into memory along with multiple threads for each
1245 Psi4 run and write out a SD file, type:
1246
1247 % Psi4CalculatePartialCharges.py --mp yes --psi4RunParams "NumThreads,2"
1248 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
1249
1250 To run the first example for writing out charges to a new SD file under a
1251 datafield instead of storing them as atom property, type:
1252
1253 % Psi4CalculatePartialCharges.py --chargesSDFormat DataField
1254 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
1255
1256 To calculate specific partial atomic charges using a specific method and basis
1257 set for molecules in a SD ontaining 3D structures and write them out to a specific
1258 datafield in a new SD file, type:
1259
1260 % Psi4CalculatePartialCharges.py -c Lowdin -m SCF -b aug-cc-pVDZ
1261 --chargesSDFormat DataField --dataFieldLabel "Lowdin_Charges"
1262 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
1263
1264 Author:
1265 Manish Sud(msud@san.rr.com)
1266
1267 See also:
1268 Psi4CalculateEnergy.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py
1269
1270 Copyright:
1271 Copyright (C) 2026 Manish Sud. All rights reserved.
1272
1273 The functionality available in this script is implemented using Psi4, an
1274 open source quantum chemistry software package, and RDKit, an open
1275 source toolkit for cheminformatics developed by Greg Landrum.
1276
1277 This file is part of MayaChemTools.
1278
1279 MayaChemTools is free software; you can redistribute it and/or modify it under
1280 the terms of the GNU Lesser General Public License as published by the Free
1281 Software Foundation; either version 3 of the License, or (at your option) any
1282 later version.
1283
1284 """
1285
1286 if __name__ == "__main__":
1287 main()