MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4CalculatePartialCharges.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2026 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using Psi4, an
    9 # open source quantum chemistry software package, and RDKit, an open
   10 # source toolkit for cheminformatics developed by Greg Landrum.
   11 #
   12 # This file is part of MayaChemTools.
   13 #
   14 # MayaChemTools is free software; you can redistribute it and/or modify it under
   15 # the terms of the GNU Lesser General Public License as published by the Free
   16 # Software Foundation; either version 3 of the License, or (at your option) any
   17 # later version.
   18 #
   19 # MayaChemTools is distributed in the hope that it will be useful, but without
   20 # any warranty; without even the implied warranty of merchantability of fitness
   21 # for a particular purpose.  See the GNU Lesser General Public License for more
   22 # details.
   23 #
   24 # You should have received a copy of the GNU Lesser General Public License
   25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   27 # Boston, MA, 02111-1307, USA.
   28 #
   29 
   30 from __future__ import print_function
   31 
   32 import os
   33 import sys
   34 import time
   35 import re
   36 import shutil
   37 import multiprocessing as mp
   38 
   39 # Psi4 imports...
   40 if hasattr(shutil, "which") and shutil.which("psi4") is None:
   41     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   42     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   43     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   44     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   45     sys.stderr.write("Psi4 environment and try again.\n\n")
   46 
   47 # RDKit imports...
   48 try:
   49     from rdkit import rdBase
   50     from rdkit import Chem
   51 except ImportError as ErrMsg:
   52     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   53     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   54     sys.exit(1)
   55 
   56 # MayaChemTools imports...
   57 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   58 try:
   59     from docopt import docopt
   60     import MiscUtil
   61     import Psi4Util
   62     import RDKitUtil
   63 except ImportError as ErrMsg:
   64     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   65     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   66     sys.exit(1)
   67 
   68 ScriptName = os.path.basename(sys.argv[0])
   69 Options = {}
   70 OptionsInfo = {}
   71 
   72 
   73 def main():
   74     """Start execution of the script."""
   75 
   76     MiscUtil.PrintInfo(
   77         "\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
   78         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
   79     )
   80 
   81     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   82 
   83     # Retrieve command line arguments and options...
   84     RetrieveOptions()
   85 
   86     # Process and validate command line arguments and options...
   87     ProcessOptions()
   88 
   89     # Perform actions required by the script...
   90     CalculatePartialCharges()
   91 
   92     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   93     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   94 
   95 
   96 def CalculatePartialCharges():
   97     """Calculate partial atomic charges."""
   98 
   99     CheckSupportForRESPCalculations()
  100 
  101     # Setup a molecule reader...
  102     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  103     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  104 
  105     # Set up a molecule writer...
  106     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  107     if Writer is None:
  108         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  109     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  110 
  111     MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer)
  112 
  113     if Writer is not None:
  114         Writer.close()
  115 
  116     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  117     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  118     MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount)
  119     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
  120 
  121 
  122 def ProcessMolecules(Mols, Writer):
  123     """Process molecules and calculate partial charges."""
  124 
  125     if OptionsInfo["MPMode"]:
  126         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
  127     else:
  128         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
  129 
  130 
  131 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
  132     """Process molecules and calculate partial charges using a single process."""
  133 
  134     # Intialize Psi4...
  135     MiscUtil.PrintInfo("\nInitializing Psi4...")
  136     Psi4Handle = Psi4Util.InitializePsi4(
  137         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  138         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  139         PrintVersion=True,
  140         PrintHeader=True,
  141     )
  142     OptionsInfo["psi4"] = Psi4Handle
  143 
  144     # Initialize RESP...
  145     OptionsInfo["resp"] = SetupRESP()
  146 
  147     MiscUtil.PrintInfo("\nCalculating partial atomic charges...")
  148 
  149     (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
  150     for Mol in Mols:
  151         MolCount += 1
  152 
  153         if not CheckAndValidateMolecule(Mol, MolCount):
  154             continue
  155 
  156         # Setup a Psi4 molecule...
  157         Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount)
  158         if Psi4Mol is None:
  159             continue
  160 
  161         ValidMolCount += 1
  162 
  163         # Retrieve charges...
  164         CalcStatus, PartialCharges = CalculateMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolCount)
  165 
  166         if not CalcStatus:
  167             if not OptionsInfo["QuietMode"]:
  168                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  169                 MiscUtil.PrintWarning("Failed to calculate partial atomic charges for molecule %s" % MolName)
  170 
  171             CalcFailedCount += 1
  172             continue
  173 
  174         WriteMolPartialCharges(Writer, Mol, PartialCharges)
  175 
  176     return (MolCount, ValidMolCount, CalcFailedCount)
  177 
  178 
  179 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
  180     """Process molecules and calculate partial charges using a multiprocessing."""
  181 
  182     MiscUtil.PrintInfo("\nCalculating partial atomic charges using multiprocessing...")
  183 
  184     MPParams = OptionsInfo["MPParams"]
  185 
  186     # Setup data for initializing a worker process...
  187     InitializeWorkerProcessArgs = (
  188         MiscUtil.ObjectToBase64EncodedString(Options),
  189         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  190     )
  191 
  192     # Setup a encoded mols data iterable for a worker process...
  193     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  194 
  195     # Setup process pool along with data initialization for each process...
  196     if not OptionsInfo["QuietMode"]:
  197         MiscUtil.PrintInfo(
  198             "\nConfiguring multiprocessing using %s method..."
  199             % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  200         )
  201         MiscUtil.PrintInfo(
  202             "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  203             % (
  204                 MPParams["NumProcesses"],
  205                 MPParams["InputDataMode"],
  206                 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  207             )
  208         )
  209 
  210     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  211 
  212     # Start processing...
  213     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  214         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  215     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  216         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  217     else:
  218         MiscUtil.PrintError(
  219             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  220         )
  221 
  222     # Print out Psi4 version in the main process...
  223     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  224     Psi4Handle = Psi4Util.InitializePsi4(PrintVersion=True, PrintHeader=False)
  225     OptionsInfo["psi4"] = Psi4Handle
  226 
  227     (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
  228     for Result in Results:
  229         MolCount += 1
  230         MolIndex, EncodedMol, CalcStatus, PartialCharges = Result
  231 
  232         if EncodedMol is None:
  233             continue
  234 
  235         ValidMolCount += 1
  236 
  237         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  238 
  239         if not CalcStatus:
  240             if not OptionsInfo["QuietMode"]:
  241                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  242                 MiscUtil.PrintWarning("Failed to calculate partial atomic charges for molecule %s" % MolName)
  243 
  244             CalcFailedCount += 1
  245             continue
  246 
  247         WriteMolPartialCharges(Writer, Mol, PartialCharges)
  248 
  249     return (MolCount, ValidMolCount, CalcFailedCount)
  250 
  251 
  252 def InitializeWorkerProcess(*EncodedArgs):
  253     """Initialize data for a worker process."""
  254 
  255     global Options, OptionsInfo
  256 
  257     if not OptionsInfo["QuietMode"]:
  258         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  259 
  260     # Decode Options and OptionInfo...
  261     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  262     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  263 
  264     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  265     # output files for each process...
  266     OptionsInfo["Psi4Initialized"] = False
  267 
  268 
  269 def InitializePsi4ForWorkerProcess():
  270     """Initialize Psi4 for a worker process."""
  271 
  272     if OptionsInfo["Psi4Initialized"]:
  273         return
  274 
  275     OptionsInfo["Psi4Initialized"] = True
  276 
  277     # Update output file...
  278     OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(
  279         OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()
  280     )
  281 
  282     # Intialize Psi4...
  283     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(
  284         Psi4RunParams=OptionsInfo["Psi4RunParams"],
  285         Psi4OptionsParams=OptionsInfo["Psi4OptionsParams"],
  286         PrintVersion=False,
  287         PrintHeader=True,
  288     )
  289 
  290 
  291 def WorkerProcess(EncodedMolInfo):
  292     """Process data for a worker process."""
  293 
  294     if not OptionsInfo["Psi4Initialized"]:
  295         InitializePsi4ForWorkerProcess()
  296 
  297     MolIndex, EncodedMol = EncodedMolInfo
  298 
  299     CalcStatus = False
  300     PartialCharges = None
  301 
  302     if EncodedMol is None:
  303         return [MolIndex, None, CalcStatus, PartialCharges]
  304 
  305     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  306     MolCount = MolIndex + 1
  307 
  308     if not CheckAndValidateMolecule(Mol, MolCount):
  309         return [MolIndex, None, CalcStatus, PartialCharges]
  310 
  311     # Setup a Psi4 molecule...
  312     Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount)
  313     if Psi4Mol is None:
  314         return [MolIndex, None, CalcStatus, PartialCharges]
  315 
  316     CalcStatus, PartialCharges = CalculateMolPartialCharges(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount)
  317 
  318     return [
  319         MolIndex,
  320         RDKitUtil.MolToBase64EncodedMolString(
  321             Mol, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
  322         ),
  323         CalcStatus,
  324         PartialCharges,
  325     ]
  326 
  327 
  328 def CalculateMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
  329     """Calculate partial atomic charges for a molecule."""
  330 
  331     if OptionsInfo["RESPChargesTypeMode"]:
  332         return CalculateRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum)
  333     else:
  334         return CalculateNonRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum)
  335 
  336 
  337 def CalculateNonRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
  338     """Calculate non-RESP partial atomic charges for a molecule."""
  339 
  340     Status = False
  341     PartialCharges = []
  342 
  343     #  Setup reference wave function...
  344     Reference = SetupReferenceWavefunction(Mol)
  345     Psi4Handle.set_options({"Reference": Reference})
  346 
  347     # Setup method name and basis set...
  348     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  349 
  350     # Calculate single point energy to setup a wavefunction...
  351     Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy(
  352         Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction=True, Quiet=OptionsInfo["QuietMode"]
  353     )
  354 
  355     if not Status:
  356         PerformPsi4Cleanup(Psi4Handle)
  357         return (False, PartialCharges)
  358 
  359     # Calculate atomic point charges...
  360     try:
  361         Psi4Handle.oeprop(WaveFunction, OptionsInfo["ChargesPropType"])
  362     except Exception as ErrMsg:
  363         if not OptionsInfo["QuietMode"]:
  364             MiscUtil.PrintWarning("Failed to calculate partial atomic charges:\n%s" % ErrMsg)
  365         PerformPsi4Cleanup(Psi4Handle)
  366         return (False, PartialCharges)
  367 
  368     AtomicPointCharges = WaveFunction.atomic_point_charges().np.tolist()
  369 
  370     # Clean up...
  371     PerformPsi4Cleanup(Psi4Handle)
  372 
  373     # Format charges...
  374     PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in AtomicPointCharges]
  375 
  376     return (Status, PartialCharges)
  377 
  378 
  379 def CalculateRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
  380     """Calculate RESP partial atomic charges for a molecule."""
  381 
  382     PartialCharges = []
  383 
  384     RESPHandle = OptionsInfo["resp"]
  385     RESPOptions = OptionsInfo["ChargesRespOtions"]
  386 
  387     # Setup method name and basis set...
  388     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  389     RESPOptions["METHOD_ESP"] = MethodName
  390     RESPOptions["BASIS_ESP"] = BasisSet
  391 
  392     # Calculate RESP charges...
  393     try:
  394         RESPCalcResults = RESPHandle.resp([Psi4Mol], RESPOptions)
  395     except Exception as ErrMsg:
  396         if not OptionsInfo["QuietMode"]:
  397             MiscUtil.PrintWarning("Failed to calculate RESP partial atomic charges:\n%s" % ErrMsg)
  398         RemoveRESPGridFiles(MolNum)
  399         return (False, PartialCharges)
  400 
  401     # ESPCharges = RESPCalcResults[0]
  402     RESPCharges = RESPCalcResults[1]
  403 
  404     PerformRESPCleanup(MolNum)
  405 
  406     # Format charges...
  407     PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in RESPCharges]
  408 
  409     return (True, PartialCharges)
  410 
  411 
  412 def PerformRESPCleanup(MolNum):
  413     """Peform RESP cleanup."""
  414 
  415     if OptionsInfo["ChargesRespParams"]["RemoveGridFiles"]:
  416         RemoveRESPGridFiles(MolNum)
  417     else:
  418         RenameRESPGridFiles(MolNum)
  419 
  420 
  421 def RemoveRESPGridFiles(MolNum):
  422     """Remove RESP grid files."""
  423 
  424     try:
  425         for GridFile in ["1_default_grid.dat", "1_default_grid_esp.dat", "results.out"]:
  426             if os.path.isfile(GridFile):
  427                 os.remove(GridFile)
  428     except Exception as ErrMsg:
  429         if not OptionsInfo["QuietMode"]:
  430             MiscUtil.PrintWarning("Failed to remove RESP results/grid files: %s\n" % ErrMsg)
  431 
  432 
  433 def RenameRESPGridFiles(MolNum):
  434     """Rename RESP grid files."""
  435 
  436     try:
  437         MolPrefix = "Mol%s" % MolNum
  438         GridFiles = ["1_default_grid.dat", "1_default_grid_esp.dat", "results.out"]
  439         NewGridFiles = ["%s_grid.dat" % MolPrefix, "%s_grid_esp.dat" % MolPrefix, "%s_resp_results.out" % MolPrefix]
  440         for GridFile, NewGridFile in zip(GridFiles, NewGridFiles):
  441             if os.path.isfile(GridFile):
  442                 shutil.move(GridFile, NewGridFile)
  443     except Exception as ErrMsg:
  444         if not OptionsInfo["QuietMode"]:
  445             MiscUtil.PrintWarning("Failed to move RESP results/grid files: %s\n" % ErrMsg)
  446 
  447 
  448 def WriteMolPartialCharges(Writer, Mol, PartialCharges):
  449     """Write out partial atomic charges for a molecule."""
  450 
  451     if PartialCharges is None:
  452         return
  453 
  454     if OptionsInfo["AtomAliasesFormatMode"]:
  455         for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges):
  456             Atom.SetProp("molFileAlias", PartialCharge)
  457     else:
  458         ChargesValues = "\n".join(PartialCharges)
  459         Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues)
  460 
  461     Writer.write(Mol)
  462 
  463 
  464 def SetupPsi4Mol(Psi4Handle, Mol, MolCount=None):
  465     """Setup a Psi4 molecule object."""
  466 
  467     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom=True, NoReorient=True)
  468 
  469     try:
  470         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  471     except Exception as ErrMsg:
  472         Psi4Mol = None
  473         if not OptionsInfo["QuietMode"]:
  474             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  475             MolName = RDKitUtil.GetMolName(Mol, MolCount)
  476             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  477 
  478     return Psi4Mol
  479 
  480 
  481 def PerformPsi4Cleanup(Psi4Handle):
  482     """Perform clean up."""
  483 
  484     # Clean up after Psi4 run ...
  485     Psi4Handle.core.clean()
  486 
  487     # Clean up any leftover scratch files...
  488     if OptionsInfo["MPMode"]:
  489         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  490 
  491 
  492 def SetupRESP():
  493     """Load resp and return its handle."""
  494 
  495     if not OptionsInfo["RESPChargesTypeMode"]:
  496         return None
  497 
  498     try:
  499         import resp
  500     except ImportError as ErrMsg:
  501         sys.stderr.write("\nFailed to import Psi4 module/package resp: %s\n" % ErrMsg)
  502         sys.stderr.write("Check/update your Psi4 environment and try again.\n\n")
  503         sys.exit(1)
  504 
  505     return resp
  506 
  507 
  508 def CheckAndValidateMolecule(Mol, MolCount=None):
  509     """Validate molecule for Psi4 calculations."""
  510 
  511     if Mol is None:
  512         if not OptionsInfo["QuietMode"]:
  513             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
  514         return False
  515 
  516     MolName = RDKitUtil.GetMolName(Mol, MolCount)
  517     if not OptionsInfo["QuietMode"]:
  518         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
  519 
  520     if RDKitUtil.IsMolEmpty(Mol):
  521         if not OptionsInfo["QuietMode"]:
  522             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
  523         return False
  524 
  525     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
  526         if not OptionsInfo["QuietMode"]:
  527             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
  528         return False
  529 
  530     # Check for 3D flag...
  531     if not Mol.GetConformer().Is3D():
  532         if not OptionsInfo["QuietMode"]:
  533             MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
  534 
  535     # Check for missing hydrogens...
  536     if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
  537         if not OptionsInfo["QuietMode"]:
  538             MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
  539 
  540     return True
  541 
  542 
  543 def SetupMethodNameAndBasisSet(Mol):
  544     """Setup method name and basis set."""
  545 
  546     MethodName = OptionsInfo["MethodName"]
  547     if OptionsInfo["MethodNameAuto"]:
  548         MethodName = "B3LYP"
  549 
  550     BasisSet = OptionsInfo["BasisSet"]
  551     if OptionsInfo["BasisSetAuto"]:
  552         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
  553 
  554     return (MethodName, BasisSet)
  555 
  556 
  557 def SetupReferenceWavefunction(Mol):
  558     """Setup reference wavefunction."""
  559 
  560     Reference = OptionsInfo["Reference"]
  561     if OptionsInfo["ReferenceAuto"]:
  562         Reference = "UHF" if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else "RHF"
  563 
  564     return Reference
  565 
  566 
  567 def ProcessOptionChargesRespParameters():
  568     """Process charges RSEP parameters option."""
  569 
  570     ParamsOptionName = "--chargesRespParams"
  571     ParamsOptionValue = Options["--chargesRespParams"]
  572 
  573     VDWScaleFactors = [1.4, 1.6, 1.8, 2.0]
  574     VDWRadii = {
  575         "H": 1.20,
  576         "HE": 1.20,
  577         "LI": 1.37,
  578         "BE": 1.45,
  579         "B": 1.45,
  580         "C": 1.50,
  581         "N": 1.50,
  582         "O": 1.40,
  583         "F": 1.35,
  584         "NE": 1.30,
  585         "NA": 1.57,
  586         "MG": 1.36,
  587         "AL": 1.24,
  588         "SI": 1.17,
  589         "P": 1.80,
  590         "S": 1.75,
  591         "CL": 1.70,
  592     }
  593 
  594     ParamsInfo = {
  595         "MaxIter": 25,
  596         "RestrainHydrogens": False,
  597         "RemoveGridFiles": True,
  598         "RespA": 0.0005,
  599         "RespB": 0.1,
  600         "Tolerance": 1e-5,
  601         "VDWRadii": VDWRadii,
  602         "VDWScaleFactors": VDWScaleFactors,
  603         "VDWPointDensity": 1.0,
  604     }
  605 
  606     if re.match("^auto$", ParamsOptionValue, re.I):
  607         SetupChargesRespOptions(ParamsInfo)
  608         return
  609 
  610     # Setup a canonical paramater names...
  611     ValidParamNames = []
  612     CanonicalParamNamesMap = {}
  613     for ParamName in sorted(ParamsInfo):
  614         ValidParamNames.append(ParamName)
  615         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  616 
  617     ParamsOptionValue = ParamsOptionValue.strip()
  618     if not ParamsOptionValue:
  619         MiscUtil.PrintError('No valid parameter name and value pairs specified using "%s" option' % ParamsOptionName)
  620 
  621     ParamsOptionValueWords = ParamsOptionValue.split(",")
  622     if len(ParamsOptionValueWords) % 2:
  623         MiscUtil.PrintError(
  624             'The number of comma delimited paramater names and values, %d, specified using "%s" option must be an even number.'
  625             % (len(ParamsOptionValueWords), ParamsOptionName)
  626         )
  627 
  628     # Validate paramater name and value pairs...
  629     for Index in range(0, len(ParamsOptionValueWords), 2):
  630         Name = ParamsOptionValueWords[Index].strip()
  631         Value = ParamsOptionValueWords[Index + 1].strip()
  632 
  633         CanonicalName = Name.lower()
  634         if CanonicalName not in CanonicalParamNamesMap:
  635             MiscUtil.PrintError(
  636                 'The parameter name, %s, specified using "%s" is not a valid name. Supported parameter names: %s'
  637                 % (Name, ParamsOptionName, " ".join(ValidParamNames))
  638             )
  639 
  640         ParamName = CanonicalParamNamesMap[CanonicalName]
  641         ParamValue = Value
  642 
  643         if re.match("^MaxIter$", ParamName, re.I):
  644             if not MiscUtil.IsInteger(Value):
  645                 MiscUtil.PrintError(
  646                     'The parameter value, %s, specified for parameter name, %s, using "%s" option must be an integer.'
  647                     % (Value, Name, ParamsOptionName)
  648                 )
  649             Value = int(Value)
  650             if Value <= 0:
  651                 MiscUtil.PrintError(
  652                     'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: > 0'
  653                     % (Value, Name, ParamsOptionName)
  654                 )
  655             ParamValue = Value
  656         elif re.match("^(RestrainHydrogens|RemoveGridFiles)$", ParamName, re.I):
  657             if not re.match("^(yes|no)$", Value, re.I):
  658                 MiscUtil.PrintError(
  659                     'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: yes or no'
  660                     % (Value, Name, ParamsOptionName)
  661                 )
  662             ParamValue = True if re.match("^yes$", Value, re.I) else False
  663         elif re.match("^(RespA|RespB|VDWPointDensity)$", ParamName, re.I):
  664             if not MiscUtil.IsNumber(Value):
  665                 MiscUtil.PrintError(
  666                     'The parameter value, %s, specified for parameter name, %s, using "%s" option must be a float.'
  667                     % (Value, Name, ParamsOptionName)
  668                 )
  669             Value = float(Value)
  670             if Value <= 0:
  671                 MiscUtil.PrintError(
  672                     'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: > 0'
  673                     % (Value, Name, ParamsOptionName)
  674                 )
  675             ParamValue = Value
  676         elif re.match("^Tolerance$", ParamName, re.I):
  677             if not MiscUtil.IsNumber(Value):
  678                 MiscUtil.PrintError(
  679                     'The parameter value, %s, specified for parameter name, %s, using "%s" option must be a float.'
  680                     % (Value, Name, ParamsOptionName)
  681                 )
  682             Value = float(Value)
  683             if Value < 0:
  684                 MiscUtil.PrintError(
  685                     'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: >= 0'
  686                     % (Value, Name, ParamsOptionName)
  687                 )
  688             ParamValue = Value
  689         elif re.match("^VDWScaleFactors$", ParamName, re.I):
  690             ScaleFactorsValue = Value.strip()
  691             if not ScaleFactorsValue:
  692                 MiscUtil.PrintError(
  693                     'No parameter value specified for parameter name, %s, using "%s" option.' % (Name, ParamsOptionName)
  694                 )
  695             ScaleFactorsWords = ScaleFactorsValue.split()
  696 
  697             ScaleFactors = []
  698             LastScaleFactor = 0.0
  699             for ScaleFactor in ScaleFactorsWords:
  700                 if not MiscUtil.IsNumber(ScaleFactor):
  701                     MiscUtil.PrintError(
  702                         'The value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option must be a float.'
  703                         % (ScaleFactor, Value, Name, ParamsOptionName)
  704                     )
  705                 ScaleFactor = float(ScaleFactor)
  706                 if ScaleFactor <= 0:
  707                     MiscUtil.PrintError(
  708                         'The value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: > 0'
  709                         % (ScaleFactor, Value, Name, ParamsOptionName)
  710                     )
  711                 if len(ScaleFactors):
  712                     if ScaleFactor <= LastScaleFactor:
  713                         MiscUtil.PrintError(
  714                             'The value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. It must be greater than the previous value, %s, specified for the scale factor.'
  715                             % (ScaleFactor, Value, Name, ParamsOptionName, LastScaleFactor)
  716                         )
  717 
  718                 LastScaleFactor = ScaleFactor
  719                 ScaleFactors.append(ScaleFactor)
  720 
  721             ParamValue = ScaleFactors
  722         elif re.match("^VDWRadii$", ParamName, re.I):
  723             RadiiValue = Value.strip()
  724             if not RadiiValue:
  725                 MiscUtil.PrintError(
  726                     'No parameter value specified for parameter name, %s, using "%s" option.' % (Name, ParamsOptionName)
  727                 )
  728             RadiiWords = RadiiValue.split()
  729             if len(RadiiWords) % 2:
  730                 MiscUtil.PrintError(
  731                     'The number of space delimited values, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not valid. It must be an even number.'
  732                     % (len(RadiiWords), Value, Name, ParamsOptionName)
  733                 )
  734 
  735             for RadiiWordsIndex in range(0, len(RadiiWords), 2):
  736                 ElementSymbol = RadiiWords[RadiiWordsIndex].upper()
  737                 VDWRadius = RadiiWords[RadiiWordsIndex + 1]
  738 
  739                 if not MiscUtil.IsNumber(VDWRadius):
  740                     MiscUtil.PrintError(
  741                         'The vdw radius value, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid.'
  742                         % (VDWRadius, Value, Name, ParamsOptionName)
  743                     )
  744 
  745                 if not RDKitUtil.IsValidElementSymbol(ElementSymbol.capitalize()):
  746                     MiscUtil.PrintWarning(
  747                         'The element symbol, %s, in parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid.'
  748                         % (ElementSymbol, Value, Name, ParamsOptionName)
  749                     )
  750                 VDWRadii[ElementSymbol] = float(VDWRadius)
  751 
  752             ParamValue = VDWRadii
  753         else:
  754             ParamValue = Value
  755 
  756         # Set value...
  757         ParamsInfo[ParamName] = ParamValue
  758 
  759     SetupChargesRespOptions(ParamsInfo)
  760 
  761 
  762 def SetupChargesRespOptions(ParamsInfo):
  763     """Setup options for calculating RESP charges."""
  764 
  765     # Initialize ESP options...
  766     ChargesRespOtions = {}
  767 
  768     ChargesRespOtions["METHOD_ESP"] = None
  769     ChargesRespOtions["BASIS_ESP"] = None
  770 
  771     # Setup ESP options...
  772     ParamNameToESPOptionID = {
  773         "MaxIter": "MAX_IT",
  774         "RespA": "RESP_A",
  775         "RespB": "RESP_B",
  776         "Tolerance": "TOLER",
  777         "VDWRadii": "VDW_RADII",
  778         "VDWScaleFactors": "VDW_SCALE_FACTORS",
  779         "VDWPointDensity": "VDW_POINT_DENSITY",
  780     }
  781 
  782     for ParamName in ParamNameToESPOptionID:
  783         ESPOptionID = ParamNameToESPOptionID[ParamName]
  784         ChargesRespOtions[ESPOptionID] = ParamsInfo[ParamName]
  785 
  786     # Setup IHFREE option...
  787     ChargesRespOtions["IHFREE"] = False if ParamsInfo["RestrainHydrogens"] else True
  788 
  789     OptionsInfo["ChargesRespParams"] = ParamsInfo
  790     OptionsInfo["ChargesRespOtions"] = ChargesRespOtions
  791 
  792 
  793 def CheckSupportForRESPCalculations():
  794     """Check support for RESP calculations."""
  795 
  796     if not OptionsInfo["MPMode"]:
  797         return
  798 
  799     if not OptionsInfo["RESPChargesTypeMode"]:
  800         return
  801 
  802     MiscUtil.PrintInfo("")
  803     MiscUtil.PrintError(
  804         "Multiprocessing is not supported during the calculation of RSEP partial atomic\ncharges. The RESP module is not conducive for multiprocessing. The names of the results\nand grid output files are not unique for molecules during the RESP calculations spread\nacross multiple processes."
  805     )
  806 
  807 
  808 def ProcessOptions():
  809     """Process and validate command line arguments and options."""
  810 
  811     MiscUtil.PrintInfo("Processing options...")
  812 
  813     # Validate options...
  814     ValidateOptions()
  815 
  816     OptionsInfo["Infile"] = Options["--infile"]
  817     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
  818     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
  819         "--infileParams",
  820         Options["--infileParams"],
  821         InfileName=Options["--infile"],
  822         ParamsDefaultInfo=ParamsDefaultInfoOverride,
  823     )
  824 
  825     OptionsInfo["Outfile"] = Options["--outfile"]
  826     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
  827         "--outfileParams", Options["--outfileParams"]
  828     )
  829 
  830     OptionsInfo["Overwrite"] = Options["--overwrite"]
  831 
  832     # Method, basis set, and reference wavefunction...
  833     OptionsInfo["BasisSet"] = Options["--basisSet"]
  834     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
  835 
  836     OptionsInfo["MethodName"] = Options["--methodName"]
  837     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
  838 
  839     OptionsInfo["Reference"] = Options["--reference"]
  840     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
  841 
  842     # Run and options parameters...
  843     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters(
  844         "--psi4OptionsParams", Options["--psi4OptionsParams"]
  845     )
  846     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters(
  847         "--psi4RunParams", Options["--psi4RunParams"], InfileName=OptionsInfo["Infile"]
  848     )
  849 
  850     ChargesType = Options["--chargesType"]
  851     ChargesPropType = None
  852     RESPChargesTypeMode = False
  853     if re.match("^Mulliken$", ChargesType, re.I):
  854         ChargesType = "Mulliken"
  855         ChargesPropType = "MULLIKEN_CHARGES"
  856     elif re.match("^Lowdin$", ChargesType, re.I):
  857         ChargesType = "Lowdin"
  858         ChargesPropType = "LOWDIN_CHARGES"
  859     elif re.match("^RESP$", ChargesType, re.I):
  860         ChargesType = "RESP"
  861         ChargesPropType = None
  862         RESPChargesTypeMode = True
  863     else:
  864         MiscUtil.PrintError("The value, %s, specified for charge mode is not supported. " % Options["--chargesType"])
  865 
  866     OptionsInfo["ChargesType"] = ChargesType
  867     OptionsInfo["ChargesPropType"] = ChargesPropType
  868     OptionsInfo["RESPChargesTypeMode"] = RESPChargesTypeMode
  869 
  870     ProcessOptionChargesRespParameters()
  871 
  872     AtomAliasesFormatMode = True
  873     if re.match("^DataField", Options["--chargesSDFormat"], re.I):
  874         AtomAliasesFormatMode = False
  875     OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode
  876 
  877     DataFieldLabel = Options["--dataFieldLabel"]
  878     if re.match("^auto$", DataFieldLabel, re.I):
  879         DataFieldLabel = "Psi4_%s_Charges (a.u.)" % Options["--chargesType"]
  880     OptionsInfo["DataFieldLabel"] = DataFieldLabel
  881 
  882     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
  883     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
  884 
  885     OptionsInfo["Precision"] = int(Options["--precision"])
  886 
  887     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
  888 
  889 
  890 def RetrieveOptions():
  891     """Retrieve command line arguments and options."""
  892 
  893     # Get options...
  894     global Options
  895     Options = docopt(_docoptUsage_)
  896 
  897     # Set current working directory to the specified directory...
  898     WorkingDir = Options["--workingdir"]
  899     if WorkingDir:
  900         os.chdir(WorkingDir)
  901 
  902     # Handle examples option...
  903     if "--examples" in Options and Options["--examples"]:
  904         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
  905         sys.exit(0)
  906 
  907 
  908 def ValidateOptions():
  909     """Validate option values."""
  910 
  911     MiscUtil.ValidateOptionTextValue("-c, --chargesType", Options["--chargesType"], "Mulliken Lowdin RESP")
  912     MiscUtil.ValidateOptionTextValue("--chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField")
  913 
  914     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
  915     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
  916 
  917     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
  918     MiscUtil.ValidateOptionsOutputFileOverwrite(
  919         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
  920     )
  921     MiscUtil.ValidateOptionsDistinctFileNames(
  922         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
  923     )
  924 
  925     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
  926     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
  927 
  928     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
  929 
  930 
  931 # Setup a usage string for docopt...
  932 _docoptUsage_ = """
  933 Psi4CalculatePartialCharges.py - Calculate partial atomic charges
  934 
  935 Usage:
  936     Psi4CalculatePartialCharges.py [--basisSet <text>] [--chargesType <Mulliken or Lowdin>] [--chargesRespParams <Name,Value,...>]
  937                                    [--chargesSDFormat <AtomAliases or DataField>] [--dataFieldLabel <text>] [--infileParams <Name,Value,...>]
  938                                    [--methodName <text>] [--mp <yes or no>] [--mpParams <Name, Value,...>] [ --outfileParams <Name,Value,...> ]
  939                                    [--overwrite] [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 
  940                                    [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile> 
  941     Psi4CalculatePartialCharges.py -h | --help | -e | --examples
  942 
  943 Description:
  944     Calculate partial atomic charges for molecules using a specified method name
  945     and basis set. The molecules must have 3D coordinates in input file. The molecular
  946     geometry is not optimized before the calculation. In addition, hydrogens must
  947     be present for all molecules in input file. A single point energy calculation is 
  948     performed before calculating the partial atomic charges. The 3D coordinates
  949     are not modified during the calculation.
  950     
  951     A Psi4 XYZ format geometry string is automatically generated for each molecule
  952     in input file. It contains atom symbols and 3D coordinates for each atom in a
  953     molecule. In addition, the formal charge and spin multiplicity are present in the
  954     the geometry string. These values are either retrieved from molecule properties
  955     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
  956     molecule.
  957 
  958     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
  959 
  960     The supported output file formats are: SD (.sdf, .sd)
  961 
  962 Options:
  963     -b, --basisSet <text>  [default: auto]
  964         Basis set to use for calculating single point energy before partial atomic
  965         charges. Default: 6-31+G** for sulfur containing molecules; Otherwise,
  966         6-31G** [ Ref 150 ]. The specified value must be a valid Psi4 basis set.
  967         No validation is performed.
  968         
  969         The following list shows a representative sample of basis sets available
  970         in Psi4:
  971             
  972             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
  973             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
  974             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
  975             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
  976             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
  977             
  978     -c, --chargesType <Mulliken, Lowdin, or RESP>  [default: Mulliken]
  979         Type of partial atomic charges to calculate. Possible values: Mulliken, Lowdin,
  980         or RESP [ Ref 158 ]. Multiprocessing is not supported during the calculation
  981         of RSEP charges. In addition, the RSEP calculation relies on the presence of
  982         the RESP Psi4 Plugin in your environment.
  983     --chargesRespParams <Name,Value,...>  [default: auto]
  984         A comma delimited list of parameter name and value  pairs for calculating
  985         RESP [ Ref 158 ] charges. A space is used as a delimiter for multiple values in
  986         a name and value pair. The supported parameter names, along with
  987         their default values, are shown below:
  988             
  989             maxIter, 25
  990             restrainHydrogens, no
  991             removeGridFiles, yes
  992             respA, 0.0005
  993             respB, 0.1
  994             tolerance, 1e-5
  995             vdwRadii, auto
  996             vdwScaleFactors, 1.4 1.6 1.8 2.0
  997             vdwPointDensity, 1.0
  998             
  999         maxIter: Maximum number of iterations to perform during charge fitting.
 1000         
 1001         restrainHydrogens: Restrain hydrogens during charge fitting.
 1002         
 1003         removeGridFiles: Keep or remove the following ESP grid and output files:
 1004         1_default_grid.dat, 1_default_grid_esp.dat, results.out. The output
 1005         files are removed by default. You may optionally keep the output files. The
 1006         output files are automatically renamed to the following file for 'No' value of
 1007         'removeGridFiles': Mol<MolNum>_grid.dat, Mol<MolNum>_grid_esp.dat,
 1008         Mol<MolNum>_resp_results.out.
 1009         
 1010         respA: Scale factor defining the  asymptotic limits of the strength of the
 1011         restraint.
 1012         
 1013         respB: The 'tightness' of the hyperbola around its minimum for the
 1014         restraint.
 1015         
 1016         tolerance: Tolerance for charges during charge fitting to the ESP.
 1017         
 1018         vdwRadii: vdw radii for elements in angstroms. It's a space delimited list of
 1019         element symbol and radius value pairs. The default list is shown below:
 1020             
 1021             H 1.20 He 1.20 Li 1.37 Be 1.45 B 1.45 C 1.50 N 1.50 O 1.40 F 1.35
 1022             Ne 1.30 Na 1.57 Mg 1.36 Al 1.24 Si 1.17P 1.80 S 1.75 Cl 1.7
 1023             
 1024         You may specify all or a subset of element symbol and vdw radius pairs to
 1025         update the default values.
 1026         
 1027         vdwScaleFactors: The vdw radii are scaled by the scale factors to set the
 1028         grid points at the shells for calculating the ESP using quantum methodology.
 1029         The default number of shells is 4 and corresponds to the number of vdw
 1030         scale factors.The 'shell' points are written to a grid file for calculating the ESP.
 1031         
 1032         vdwPointDensity: Approximate number of points to generate per square
 1033         angstrom surface area.
 1034     --chargesSDFormat <AtomAliases or DataField>  [default: AtomAliases]
 1035         Format for writing out partial atomic charges to SD file. Possible values:
 1036         AtomAliases or DataField.
 1037         
 1038         The charges are stored as atom property named 'molFileAlias' for
 1039         'AtomAliases' format and may be retrieved using the RDKit function
 1040         'GetProp' for atoms: Aotm.GetProp('molFileAliases').
 1041         
 1042         The charges are stored under a data field label specified using
 1043         '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the
 1044         RDKit function 'GetProp' for molecules.
 1045     -d, --dataFieldLabel <text>  [default: auto]
 1046         Data field label to use for storing charged in SD file during 'DataField' value
 1047         of '-c, --chargesSDFormat'. Default: Psi4_<ChargesType>_Charges (a.u.)
 1048     -e, --examples
 1049         Print examples.
 1050     -h, --help
 1051         Print this help message.
 1052     -i, --infile <infile>
 1053         Input file name.
 1054     --infileParams <Name,Value,...>  [default: auto]
 1055         A comma delimited list of parameter name and value pairs for reading
 1056         molecules from files. The supported parameter names for different file
 1057         formats, along with their default values, are shown below:
 1058             
 1059             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1060             
 1061     -m, --methodName <text>  [default: auto]
 1062         Method to use for calculating single point energy before partial atomic
 1063         charges. Default: B3LYP [ Ref 150 ]. The specified value must be a valid
 1064         Psi4 method name. No validation is performed.
 1065         
 1066         The following list shows a representative sample of methods available
 1067         in Psi4:
 1068             
 1069             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
 1070             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
 1071             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
 1072             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
 1073             
 1074     --mp <yes or no>  [default: no]
 1075         Use multiprocessing.
 1076          
 1077         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1078         function employing lazy RDKit data iterable. This allows processing of
 1079         arbitrary large data sets without any additional requirements memory.
 1080         
 1081         All input data may be optionally loaded into memory by mp.Pool.map()
 1082         before starting worker processes in a process pool by setting the value
 1083         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1084         
 1085         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1086         data mode may adversely impact the performance. The '--mpParams' section
 1087         provides additional information to tune the value of 'chunkSize'.
 1088     --mpParams <Name,Value,...>  [default: auto]
 1089         A comma delimited list of parameter name and value pairs to configure
 1090         multiprocessing.
 1091         
 1092         The supported parameter names along with their default and possible
 1093         values are shown below:
 1094         
 1095             chunkSize, auto
 1096             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1097             numProcesses, auto   [ Default: mp.cpu_count() ]
 1098         
 1099         These parameters are used by the following functions to configure and
 1100         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1101         mp.Pool.imap().
 1102         
 1103         The chunkSize determines chunks of input data passed to each worker
 1104         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1105         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1106         
 1107         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1108         automatically converts RDKit data iterable into a list, loads all data into
 1109         memory, and calculates the default chunkSize using the following method
 1110         as shown in its code:
 1111         
 1112             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1113             if extra: chunkSize += 1
 1114         
 1115         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1116         and 100 data items.
 1117         
 1118         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1119         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1120         data into memory. Consequently, the size of input data is not known a priori.
 1121         It's not possible to estimate an optimal value for the chunkSize. The default 
 1122         chunkSize is set to 1.
 1123         
 1124         The default value for the chunkSize during 'Lazy' data mode may adversely
 1125         impact the performance due to the overhead associated with exchanging
 1126         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1127         a larger value during 'Lazy' input data mode, based on the size of your input
 1128         data and number of processes in the process pool.
 1129         
 1130         The mp.Pool.map() function waits for all worker processes to process all
 1131         the data and return the results. The mp.Pool.imap() function, however,
 1132         returns the the results obtained from worker processes as soon as the
 1133         results become available for specified chunks of data.
 1134         
 1135         The order of data in the results returned by both mp.Pool.map() and 
 1136         mp.Pool.imap() functions always corresponds to the input data.
 1137     -o, --outfile <outfile>
 1138         Output file name.
 1139     --outfileParams <Name,Value,...>  [default: auto]
 1140         A comma delimited list of parameter name and value pairs for writing
 1141         molecules to files. The supported parameter names for different file
 1142         formats, along with their default values, are shown below:
 1143             
 1144             SD: kekulize,yes,forceV3000,no
 1145             
 1146     --overwrite
 1147         Overwrite existing files.
 1148     --precision <number>  [default: 4]
 1149         Floating point precision for writing energy values.
 1150     --psi4OptionsParams <Name,Value,...>  [default: none]
 1151         A comma delimited list of Psi4 option name and value pairs for setting
 1152         global and module options. The names are 'option_name' for global options
 1153         and 'module_name__option_name' for options local to a module. The
 1154         specified option names must be valid Psi4 names. No validation is
 1155         performed.
 1156         
 1157         The specified option name and  value pairs are processed and passed to
 1158         psi4.set_options() as a dictionary. The supported value types are float,
 1159         integer, boolean, or string. The float value string is converted into a float.
 1160         The valid values for a boolean string are yes, no, true, false, on, or off. 
 1161     --psi4RunParams <Name,Value,...>  [default: auto]
 1162         A comma delimited list of parameter name and value pairs for configuring
 1163         Psi4 jobs.
 1164         
 1165         The supported parameter names along with their default and possible
 1166         values are shown below:
 1167              
 1168             MemoryInGB, 1
 1169             NumThreads, 1
 1170             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1171             ScratchDir, auto   [ Possivle values: DirName]
 1172             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1173             
 1174         These parameters control the runtime behavior of Psi4.
 1175         
 1176         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1177         is appended to output file name during multiprocessing as shown:
 1178         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1179         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1180         all Psi4 output.
 1181         
 1182         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1183         of any existing Psi4 before creating new files to append output from
 1184         multiple Psi4 runs.
 1185         
 1186         The option 'ScratchDir' is a directory path to the location of scratch
 1187         files. The default value corresponds to Psi4 default. It may be used to
 1188         override the deafult path.
 1189     -q, --quiet <yes or no>  [default: no]
 1190         Use quiet mode. The warning and information messages will not be printed.
 1191     -r, --reference <text>  [default: auto]
 1192         Reference wave function to use for calculating single point energy before
 1193         partial atomic charges. Default: RHF or UHF. The default values are Restricted
 1194         Hartree-Fock (RHF) for closed-shell molecules with all electrons paired and
 1195         Unrestricted Hartree-Fock (UHF) for open-shell molecules with unpaired electrons.
 1196         
 1197         The specified value must be a valid Psi4 reference wave function. No validation
 1198         is performed. For example: ROHF, CUHF, RKS, etc.
 1199         
 1200         The spin multiplicity determines the default value of reference wave function
 1201         for input molecules. It is calculated from number of free radical electrons using
 1202         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1203         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1204         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1205         over the calculated value of spin multiplicity.
 1206     -w, --workingdir <dir>
 1207         Location of working directory which defaults to the current directory.
 1208 
 1209 Examples:
 1210     To calculate Mulliken partial atomic charges using  B3LYP/6-31G** and
 1211     B3LYP/6-31+G** for non-sulfur and sulfur containing molecules in a SD
 1212     file with 3D structures, use RHF and UHF for closed-shell and open-shell
 1213     molecules, and write a new SD file, type:
 1214 
 1215         % Psi4CalculatePartialCharges.py  -i Psi4Sample3D.sdf 
 1216           -o Psi4Sample3DOut.sdf
 1217 
 1218     To run the first example for calculating RESP charges using a default set of
 1219     parameters for the RESP calculation and write out a SD file, type:
 1220 
 1221         % Psi4CalculatePartialCharges.py  --chargesType RESP
 1222            -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1223 
 1224     To run the first example for calculating RESP charges using an explicit set
 1225     of specific parameters for the RESP calculation and write out a SD file, type:
 1226 
 1227         % Psi4CalculatePartialCharges.py  --chargesType RESP
 1228            --chargesRespParams "respA, 0.0005, respB, 0.1, vdwScaleFactors,
 1229            1.4 1.6 1.8 2.0" -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1230 
 1231     To run the first example in multiprocessing mode on all available CPUs
 1232     without loading all data into memory and write out a SD file, type:
 1233 
 1234         % Psi4CalculatePartialCharges.py --mp yes -i Psi4Sample3D.sdf
 1235           -o Psi4Sample3DOut.sdf
 1236 
 1237     To run the first example in multiprocessing mode on all available CPUs
 1238     by loading all data into memory and write out a SD file, type:
 1239 
 1240         % Psi4CalculatePartialCharges.py  --mp yes --mpParams "inputDataMode,
 1241           InMemory" -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 1242 
 1243     To run the first example in multiprocessing mode on all available CPUs
 1244     without loading all data into memory along with multiple threads for each
 1245     Psi4 run and write out a SD file, type:
 1246 
 1247         % Psi4CalculatePartialCharges.py --mp yes --psi4RunParams "NumThreads,2"
 1248            -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1249 
 1250     To run the first example for writing out charges to a new SD file under a
 1251     datafield instead of storing them as atom property, type:
 1252 
 1253         % Psi4CalculatePartialCharges.py  --chargesSDFormat DataField
 1254           -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 1255 
 1256     To calculate specific partial atomic charges using a specific method  and basis
 1257     set for molecules in a SD ontaining 3D structures and write them out to a specific
 1258     datafield in a new SD file, type:
 1259 
 1260         % Psi4CalculatePartialCharges.py  -c Lowdin -m SCF -b aug-cc-pVDZ
 1261           --chargesSDFormat DataField --dataFieldLabel "Lowdin_Charges"
 1262           -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 1263 
 1264 Author:
 1265     Manish Sud(msud@san.rr.com)
 1266 
 1267 See also:
 1268     Psi4CalculateEnergy.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py
 1269 
 1270 Copyright:
 1271     Copyright (C) 2026 Manish Sud. All rights reserved.
 1272 
 1273     The functionality available in this script is implemented using Psi4, an
 1274     open source quantum chemistry software package, and RDKit, an open
 1275     source toolkit for cheminformatics developed by Greg Landrum.
 1276 
 1277     This file is part of MayaChemTools.
 1278 
 1279     MayaChemTools is free software; you can redistribute it and/or modify it under
 1280     the terms of the GNU Lesser General Public License as published by the Free
 1281     Software Foundation; either version 3 of the License, or (at your option) any
 1282     later version.
 1283 
 1284 """
 1285 
 1286 if __name__ == "__main__":
 1287     main()