MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: Psi4CalculatePartialCharges.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2024 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using Psi4, an
    9 # open source quantum chemistry software package, and RDKit, an open
   10 # source toolkit for cheminformatics developed by Greg Landrum.
   11 #
   12 # This file is part of MayaChemTools.
   13 #
   14 # MayaChemTools is free software; you can redistribute it and/or modify it under
   15 # the terms of the GNU Lesser General Public License as published by the Free
   16 # Software Foundation; either version 3 of the License, or (at your option) any
   17 # later version.
   18 #
   19 # MayaChemTools is distributed in the hope that it will be useful, but without
   20 # any warranty; without even the implied warranty of merchantability of fitness
   21 # for a particular purpose.  See the GNU Lesser General Public License for more
   22 # details.
   23 #
   24 # You should have received a copy of the GNU Lesser General Public License
   25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   27 # Boston, MA, 02111-1307, USA.
   28 #
   29 
   30 from __future__ import print_function
   31 
   32 # Add local python path to the global path and import standard library modules...
   33 import os
   34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   35 import time
   36 import re
   37 import shutil
   38 import multiprocessing as mp
   39 
   40 # Psi4 imports...
   41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None):
   42     sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n")
   43     sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n")
   44     sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n")
   45     sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n")
   46     sys.stderr.write("Psi4 environment and try again.\n\n")
   47 
   48 # RDKit imports...
   49 try:
   50     from rdkit import rdBase
   51     from rdkit import Chem
   52     from rdkit.Chem import AllChem
   53 except ImportError as ErrMsg:
   54     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   55     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   56     sys.exit(1)
   57 
   58 # MayaChemTools imports...
   59 try:
   60     from docopt import docopt
   61     import MiscUtil
   62     import Psi4Util
   63     import RDKitUtil
   64 except ImportError as ErrMsg:
   65     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   66     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   67     sys.exit(1)
   68 
   69 ScriptName = os.path.basename(sys.argv[0])
   70 Options = {}
   71 OptionsInfo = {}
   72 
   73 def main():
   74     """Start execution of the script."""
   75     
   76     MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   77     
   78     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   79     
   80     # Retrieve command line arguments and options...
   81     RetrieveOptions()
   82     
   83     # Process and validate command line arguments and options...
   84     ProcessOptions()
   85     
   86     # Perform actions required by the script...
   87     CalculatePartialCharges()
   88     
   89     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   90     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   91 
   92 def CalculatePartialCharges():
   93     """Calculate partial atomic charges."""
   94     
   95     CheckSupportForRESPCalculations()
   96     
   97     # Setup a molecule reader...
   98     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
   99     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  100     
  101     # Set up a molecule writer...
  102     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  103     if Writer is None:
  104         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  105     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
  106 
  107     MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer)
  108 
  109     if Writer is not None:
  110         Writer.close()
  111     
  112     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  113     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  114     MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount)
  115     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
  116 
  117 def ProcessMolecules(Mols, Writer):
  118     """Process molecules and calculate partial charges."""
  119     
  120     if OptionsInfo["MPMode"]:
  121         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
  122     else:
  123         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
  124 
  125 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
  126     """Process molecules and calculate partial charges using a single process."""
  127     
  128     # Intialize Psi4...
  129     MiscUtil.PrintInfo("\nInitializing Psi4...")
  130     Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True)
  131     OptionsInfo["psi4"] = Psi4Handle
  132 
  133     # Initialize RESP...
  134     OptionsInfo["resp"] = SetupRESP()
  135     
  136     MiscUtil.PrintInfo("\nCalculating partial atomic charges...")
  137     
  138     (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
  139     for Mol in Mols:
  140         MolCount += 1
  141         
  142         if not CheckAndValidateMolecule(Mol, MolCount):
  143             continue
  144         
  145         # Setup a Psi4 molecule...
  146         Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolCount)
  147         if Psi4Mol is None:
  148             continue
  149         
  150         ValidMolCount += 1
  151         
  152         # Retrieve charges...
  153         CalcStatus, PartialCharges = CalculateMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolCount)
  154 
  155         if not CalcStatus:
  156             if not OptionsInfo["QuietMode"]:
  157                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  158                 MiscUtil.PrintWarning("Failed to calculate partial atomic charges for molecule %s" % MolName)
  159             
  160             CalcFailedCount += 1
  161             continue
  162         
  163         WriteMolPartialCharges(Writer, Mol, PartialCharges)
  164     
  165     return (MolCount, ValidMolCount, CalcFailedCount)
  166 
  167 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
  168     """Process molecules and calculate partial charges using a multiprocessing."""
  169 
  170     MiscUtil.PrintInfo("\nCalculating partial atomic charges using multiprocessing...")
  171     
  172     MPParams = OptionsInfo["MPParams"]
  173     
  174     # Setup data for initializing a worker process...
  175     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  176     
  177     # Setup a encoded mols data iterable for a worker process...
  178     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  179     
  180     # Setup process pool along with data initialization for each process...
  181     if not OptionsInfo["QuietMode"]:
  182         MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  183         MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  184     
  185     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  186     
  187     # Start processing...
  188     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  189         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  190     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  191         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  192     else:
  193         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  194 
  195     # Print out Psi4 version in the main process...
  196     MiscUtil.PrintInfo("\nInitializing Psi4...\n")
  197     Psi4Handle  = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False)
  198     OptionsInfo["psi4"] = Psi4Handle
  199     
  200     (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
  201     for Result in Results:
  202         MolCount += 1
  203         MolIndex, EncodedMol, CalcStatus, PartialCharges = Result
  204         
  205         if EncodedMol is None:
  206             continue
  207         
  208         ValidMolCount += 1
  209 
  210         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  211         
  212         if not CalcStatus:
  213             if not OptionsInfo["QuietMode"]:
  214                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  215                 MiscUtil.PrintWarning("Failed to calculate partial atomic charges for molecule %s" % MolName)
  216             
  217             CalcFailedCount += 1
  218             continue
  219         
  220         WriteMolPartialCharges(Writer, Mol, PartialCharges)
  221     
  222     return (MolCount, ValidMolCount, CalcFailedCount)
  223 
  224 def InitializeWorkerProcess(*EncodedArgs):
  225     """Initialize data for a worker process."""
  226     
  227     global Options, OptionsInfo
  228     
  229     if not OptionsInfo["QuietMode"]:
  230         MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  231     
  232     # Decode Options and OptionInfo...
  233     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  234     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  235 
  236     # Psi4 is initialized in the worker process to avoid creation of redundant Psi4
  237     # output files for each process...
  238     OptionsInfo["Psi4Initialized"]  = False
  239     
  240 def InitializePsi4ForWorkerProcess():
  241     """Initialize Psi4 for a worker process."""
  242     
  243     if OptionsInfo["Psi4Initialized"]:
  244         return
  245 
  246     OptionsInfo["Psi4Initialized"] = True
  247     
  248     # Update output file...
  249     OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid())
  250     
  251     # Intialize Psi4...
  252     OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True)
  253     
  254 def WorkerProcess(EncodedMolInfo):
  255     """Process data for a worker process."""
  256     
  257     if not OptionsInfo["Psi4Initialized"]:
  258         InitializePsi4ForWorkerProcess()
  259     
  260     MolIndex, EncodedMol = EncodedMolInfo
  261     
  262     CalcStatus = False
  263     PartialCharges = None
  264     
  265     if EncodedMol is None:
  266         return [MolIndex, None, CalcStatus, PartialCharges]
  267     
  268     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  269     MolCount = MolIndex + 1
  270     
  271     if not CheckAndValidateMolecule(Mol, MolCount):
  272         return [MolIndex, None, CalcStatus, PartialCharges]
  273     
  274     # Setup a Psi4 molecule...
  275     Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], Mol, MolCount)
  276     if Psi4Mol is None:
  277         return [MolIndex, None, CalcStatus, PartialCharges]
  278     
  279     CalcStatus, PartialCharges = CalculateMolPartialCharges(OptionsInfo["psi4"], Psi4Mol, Mol, MolCount)
  280     
  281     return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, PartialCharges]
  282 
  283 def CalculateMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
  284     """Calculate partial atomic charges for a molecule."""
  285 
  286     if OptionsInfo["RESPChargesTypeMode"]:
  287         return CalculateRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum)
  288     else:
  289         return CalculateNonRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum)
  290     
  291 def CalculateNonRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
  292     """Calculate non-RESP partial atomic charges for a molecule."""
  293     
  294     Status = False
  295     PartialCharges = []
  296     
  297     #  Setup reference wave function...
  298     Reference = SetupReferenceWavefunction(Mol)
  299     Psi4Handle.set_options({'Reference': Reference})
  300     
  301     # Setup method name and basis set...
  302     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  303 
  304     # Calculate single point energy to setup a wavefunction...
  305     Status, Energy, WaveFunction = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"])
  306 
  307     if not Status:
  308         PerformPsi4Cleanup(Psi4Handle)
  309         return (False, PartialCharges)
  310     
  311     # Calculate atomic point charges...
  312     try:
  313         Psi4Handle.oeprop(WaveFunction, OptionsInfo["ChargesPropType"])
  314     except Exception as ErrMsg:
  315         if not OptionsInfo["QuietMode"]:
  316             MiscUtil.PrintWarning("Failed to calculate partial atomic charges:\n%s" % ErrMsg)
  317         PerformPsi4Cleanup(Psi4Handle)
  318         return (False, PartialCharges)
  319     
  320     AtomicPointCharges = WaveFunction.atomic_point_charges().np.tolist()
  321     
  322     # Clean up...
  323     PerformPsi4Cleanup(Psi4Handle)
  324     
  325     # Format charges...
  326     PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in AtomicPointCharges]
  327 
  328     return (Status, PartialCharges)
  329 
  330 def CalculateRespMolPartialCharges(Psi4Handle, Psi4Mol, Mol, MolNum):
  331     """Calculate RESP partial atomic charges for a molecule."""
  332 
  333     PartialCharges = []
  334 
  335     RESPHandle = OptionsInfo["resp"]
  336     RESPOptions = OptionsInfo["ChargesRespOtions"]
  337     
  338     # Setup method name and basis set...
  339     MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol)
  340     RESPOptions["METHOD_ESP"] = MethodName
  341     RESPOptions["BASIS_ESP"] = BasisSet
  342 
  343     # Calculate RESP charges...
  344     try:
  345         RESPCalcResults =  RESPHandle.resp([Psi4Mol], RESPOptions)
  346     except Exception as ErrMsg:
  347         if not OptionsInfo["QuietMode"]:
  348             MiscUtil.PrintWarning("Failed to calculate RESP partial atomic charges:\n%s" % ErrMsg)
  349         RemoveRESPGridFiles(MolNum)
  350         return (False, PartialCharges)
  351     
  352     ESPCharges = RESPCalcResults[0]
  353     RESPCharges = RESPCalcResults[1]
  354     
  355     PerformRESPCleanup(MolNum)
  356     
  357     # Format charges...
  358     PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in RESPCharges]
  359     
  360     return (True, PartialCharges)
  361 
  362 def PerformRESPCleanup(MolNum):
  363     """Peform RESP cleanup."""
  364 
  365     if OptionsInfo["ChargesRespParams"]["RemoveGridFiles"]:
  366         RemoveRESPGridFiles(MolNum)
  367     else:
  368         RenameRESPGridFiles(MolNum)
  369 
  370 def RemoveRESPGridFiles(MolNum):
  371     """Remove RESP grid files."""
  372 
  373     try:
  374         for GridFile in ["1_default_grid.dat", "1_default_grid_esp.dat", "results.out"]:
  375             if os.path.isfile(GridFile):
  376                 os.remove(GridFile)
  377     except Exception as ErrMsg:
  378         if not OptionsInfo["QuietMode"]:
  379             MiscUtil.PrintWarning("Failed to remove RESP results/grid files: %s\n" % ErrMsg)
  380 
  381 def RenameRESPGridFiles(MolNum):
  382     """Rename RESP grid files."""
  383 
  384     try:
  385         MolPrefix = "Mol%s" % MolNum
  386         GridFiles = ["1_default_grid.dat", "1_default_grid_esp.dat", "results.out"]
  387         NewGridFiles = ["%s_grid.dat" % MolPrefix, "%s_grid_esp.dat" % MolPrefix, "%s_resp_results.out" % MolPrefix]
  388         for GridFile, NewGridFile in zip(GridFiles, NewGridFiles):
  389             if os.path.isfile(GridFile):
  390                 shutil.move(GridFile, NewGridFile)
  391     except Exception as ErrMsg:
  392         if not OptionsInfo["QuietMode"]:
  393             MiscUtil.PrintWarning("Failed to move RESP results/grid files: %s\n" % ErrMsg)
  394 
  395 def WriteMolPartialCharges(Writer, Mol, PartialCharges):
  396     """Write out partial atomic charges for a molecule."""
  397 
  398     if PartialCharges is None:
  399         return
  400     
  401     if OptionsInfo["AtomAliasesFormatMode"]:
  402         for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges):
  403             Atom.SetProp('molFileAlias', PartialCharge)
  404     else:
  405         ChargesValues = "\n".join(PartialCharges)
  406         Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues)
  407     
  408     Writer.write(Mol)
  409     
  410 def SetupPsi4Mol(Psi4Handle, Mol, MolCount = None):
  411     """Setup a Psi4 molecule object."""
  412     
  413     MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, NoCom = True, NoReorient = True)
  414     
  415     try:
  416         Psi4Mol = Psi4Handle.geometry(MolGeometry)
  417     except Exception as ErrMsg:
  418         Psi4Mol = None
  419         if not OptionsInfo["QuietMode"]:
  420             MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg)
  421             MolName = RDKitUtil.GetMolName(Mol, MolCount)
  422             MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName)
  423     
  424     return Psi4Mol
  425 
  426 def PerformPsi4Cleanup(Psi4Handle):
  427     """Perform clean up."""
  428     
  429     # Clean up after Psi4 run ...
  430     Psi4Handle.core.clean()
  431     
  432     # Clean up any leftover scratch files...
  433     if OptionsInfo["MPMode"]:
  434         Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"])
  435 
  436 def SetupRESP():
  437     """Load resp and return its handle."""
  438     
  439     if not OptionsInfo["RESPChargesTypeMode"]:
  440         return None
  441         
  442     try:
  443         import resp
  444     except ImportError as ErrMsg:
  445         sys.stderr.write("\nFailed to import Psi4 module/package resp: %s\n" % ErrMsg)
  446         sys.stderr.write("Check/update your Psi4 environment and try again.\n\n")
  447         sys.exit(1)
  448 
  449     return resp
  450     
  451 def CheckAndValidateMolecule(Mol, MolCount = None):
  452     """Validate molecule for Psi4 calculations."""
  453     
  454     if Mol is None:
  455         if not OptionsInfo["QuietMode"]:
  456             MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount)
  457         return False
  458     
  459     MolName = RDKitUtil.GetMolName(Mol, MolCount)
  460     if not OptionsInfo["QuietMode"]:
  461         MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName)
  462     
  463     if RDKitUtil.IsMolEmpty(Mol):
  464         if not OptionsInfo["QuietMode"]:
  465             MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName)
  466         return False
  467     
  468     if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)):
  469         if not OptionsInfo["QuietMode"]:
  470             MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName)
  471         return False
  472     
  473     # Check for 3D flag...
  474     if not Mol.GetConformer().Is3D():
  475         if not OptionsInfo["QuietMode"]:
  476             MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName)
  477     
  478     # Check for missing hydrogens...
  479     if RDKitUtil.AreHydrogensMissingInMolecule(Mol):
  480         if not OptionsInfo["QuietMode"]:
  481             MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName)
  482     
  483     return True
  484 
  485 def SetupMethodNameAndBasisSet(Mol):
  486     """Setup method name and basis set."""
  487     
  488     MethodName = OptionsInfo["MethodName"]
  489     if OptionsInfo["MethodNameAuto"]:
  490         MethodName = "B3LYP"
  491     
  492     BasisSet = OptionsInfo["BasisSet"]
  493     if OptionsInfo["BasisSetAuto"]:
  494         BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**"
  495     
  496     return (MethodName, BasisSet)
  497 
  498 def SetupReferenceWavefunction(Mol):
  499     """Setup reference wavefunction."""
  500     
  501     Reference = OptionsInfo["Reference"]
  502     if OptionsInfo["ReferenceAuto"]:
  503         Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF'
  504     
  505     return Reference
  506 
  507 def ProcessOptionChargesRespParameters():
  508     """Process charges RSEP parameters option."""
  509 
  510     ParamsOptionName = "--chargesRespParams"
  511     ParamsOptionValue = Options["--chargesRespParams"]
  512 
  513     VDWScaleFactors = [1.4, 1.6, 1.8, 2.0]
  514     VDWRadii = {'H': 1.20, 'HE': 1.20, 'LI': 1.37, 'BE': 1.45, 'B': 1.45, 'C': 1.50, 'N': 1.50, 'O': 1.40, 'F': 1.35, 'NE': 1.30, 'NA': 1.57, 'MG': 1.36, 'AL': 1.24, 'SI': 1.17, 'P': 1.80, 'S': 1.75, 'CL': 1.70}
  515     
  516     ParamsInfo = {"MaxIter": 25, "RestrainHydrogens": False, "RemoveGridFiles": True, "RespA": 0.0005, "RespB": 0.1, "Tolerance": 1e-5, "VDWRadii": VDWRadii, "VDWScaleFactors": VDWScaleFactors, "VDWPointDensity": 1.0}
  517 
  518     if re.match("^auto$", ParamsOptionValue, re.I):
  519         SetupChargesRespOptions(ParamsInfo)
  520         return
  521     
  522     # Setup a canonical paramater names...
  523     ValidParamNames = []
  524     CanonicalParamNamesMap = {}
  525     for ParamName in sorted(ParamsInfo):
  526         ValidParamNames.append(ParamName)
  527         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  528     
  529     ParamsOptionValue = ParamsOptionValue.strip()
  530     if not ParamsOptionValue:
  531         MiscUtil.PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
  532     
  533     ParamsOptionValueWords = ParamsOptionValue.split(",")
  534     if len(ParamsOptionValueWords) % 2:
  535         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
  536     
  537     # Validate paramater name and value pairs...
  538     for Index in range(0, len(ParamsOptionValueWords), 2):
  539         Name = ParamsOptionValueWords[Index].strip()
  540         Value = ParamsOptionValueWords[Index + 1].strip()
  541 
  542         CanonicalName = Name.lower()
  543         if  not CanonicalName in CanonicalParamNamesMap:
  544             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
  545 
  546         ParamName = CanonicalParamNamesMap[CanonicalName]
  547         ParamValue = Value
  548         
  549         if re.match("^MaxIter$", ParamName, re.I):
  550             if not MiscUtil.IsInteger(Value):
  551                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option must be an integer." % (Value, Name, ParamsOptionName))
  552             Value = int(Value)
  553             if Value <= 0:
  554                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0" % (Value, Name, ParamsOptionName))
  555             ParamValue = Value
  556         elif re.match("^(RestrainHydrogens|RemoveGridFiles)$", ParamName, re.I):
  557             if not re.match("^(yes|no)$", Value, re.I):
  558                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: yes or no" % (Value, Name, ParamsOptionName))
  559             ParamValue = True if re.match("^yes$", Value, re.I) else False
  560         elif re.match("^(RespA|RespB|VDWPointDensity)$", ParamName, re.I):
  561             if not MiscUtil.IsNumber(Value):
  562                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option must be a float." % (Value, Name, ParamsOptionName))
  563             Value = float(Value)
  564             if Value <= 0:
  565                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0" % (Value, Name, ParamsOptionName))
  566             ParamValue = Value
  567         elif re.match("^Tolerance$", ParamName, re.I):
  568             if not MiscUtil.IsNumber(Value):
  569                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option must be a float." % (Value, Name, ParamsOptionName))
  570             Value = float(Value)
  571             if Value < 0:
  572                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: >= 0" % (Value, Name, ParamsOptionName))
  573             ParamValue = Value
  574         elif re.match("^VDWScaleFactors$", ParamName, re.I):
  575             ScaleFactorsValue = Value.strip()
  576             if not ScaleFactorsValue:
  577                 MiscUtil.PrintError("No parameter value specified for parameter name, %s, using \"%s\" option." % (Name, ParamsOptionName))
  578             ScaleFactorsWords = ScaleFactorsValue.split()
  579             
  580             ScaleFactors = []
  581             LastScaleFactor = 0.0
  582             for ScaleFactor in ScaleFactorsWords:
  583                 if not MiscUtil.IsNumber(ScaleFactor):
  584                     MiscUtil.PrintError("The value, %s, in parameter value, %s, specified for parameter name, %s, using \"%s\" option must be a float." % (ScaleFactor, Value, Name, ParamsOptionName))
  585                 ScaleFactor = float(ScaleFactor)
  586                 if ScaleFactor <= 0:
  587                     MiscUtil.PrintError("The value, %s, in parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0" % (ScaleFactor, Value, Name, ParamsOptionName))
  588                 if len(ScaleFactors):
  589                     if ScaleFactor <= LastScaleFactor:
  590                         MiscUtil.PrintError("The value, %s, in parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. It must be greater than the previous value, %s, specified for the scale factor." % (ScaleFactor, Value, Name, ParamsOptionName, LastScaleFactor))
  591                         
  592                 LastScaleFactor = ScaleFactor
  593                 ScaleFactors.append(ScaleFactor)
  594             
  595             ParamValue = ScaleFactors
  596         elif re.match("^VDWRadii$", ParamName, re.I):
  597             RadiiValue = Value.strip()
  598             if not RadiiValue:
  599                 MiscUtil.PrintError("No parameter value specified for parameter name, %s, using \"%s\" option." % (Name, ParamsOptionName))
  600             RadiiWords = RadiiValue.split()
  601             if len(RadiiWords) % 2:
  602                 MiscUtil.PrintError("The number of space delimited values, %s, in parameter value, %s, specified for parameter name, %s, using \"%s\" option is not valid. It must be an even number." % (len(RadiiWords), Value, Name, ParamsOptionName))
  603             
  604             for RadiiWordsIndex in range(0, len(RadiiWords), 2):
  605                 ElementSymbol = RadiiWords[RadiiWordsIndex].upper()
  606                 VDWRadius = RadiiWords[RadiiWordsIndex + 1]
  607 
  608                 if not MiscUtil.IsNumber(VDWRadius):
  609                     MiscUtil.PrintError("The vdw radius value, %s, in parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid." % (VDWRadius, Value, Name, ParamsOptionName))
  610                     
  611                 if not RDKitUtil.IsValidElementSymbol(ElementSymbol.capitalize()):
  612                     MiscUtil.PrintWarning("The element symbol, %s, in parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid." % (ElementSymbol, Value, Name, ParamsOptionName))
  613                 VDWRadii[ElementSymbol] = float(VDWRadius)
  614             
  615             ParamValue = VDWRadii
  616         else:
  617             ParamValue = Value
  618             
  619         # Set value...
  620         ParamsInfo[ParamName] = ParamValue
  621     
  622     SetupChargesRespOptions(ParamsInfo)
  623 
  624 def SetupChargesRespOptions(ParamsInfo):
  625     """Setup options for calculating RESP charges."""
  626 
  627     # Initialize ESP options...
  628     ChargesRespOtions = {}
  629     
  630     ChargesRespOtions["METHOD_ESP"] = None
  631     ChargesRespOtions["BASIS_ESP"] = None
  632 
  633     # Setup ESP options...
  634     ParamNameToESPOptionID = {"MaxIter": "MAX_IT", "RespA": "RESP_A", "RespB": "RESP_B", "Tolerance": "TOLER", "VDWRadii": "VDW_RADII", "VDWScaleFactors": "VDW_SCALE_FACTORS", "VDWPointDensity": "VDW_POINT_DENSITY"}
  635     
  636     for ParamName in ParamNameToESPOptionID:
  637         ESPOptionID = ParamNameToESPOptionID[ParamName]
  638         ChargesRespOtions[ESPOptionID] = ParamsInfo[ParamName]
  639 
  640     # Setup IHFREE option...
  641     ChargesRespOtions["IHFREE"] = False if ParamsInfo["RestrainHydrogens"] else True
  642 
  643     OptionsInfo["ChargesRespParams"] = ParamsInfo
  644     OptionsInfo["ChargesRespOtions"] = ChargesRespOtions
  645 
  646 def CheckSupportForRESPCalculations():
  647     """Check support for RESP calculations."""
  648 
  649     if not OptionsInfo["MPMode"]:
  650         return
  651     
  652     if not OptionsInfo["RESPChargesTypeMode"]:
  653         return
  654     
  655     MiscUtil.PrintInfo("")
  656     MiscUtil.PrintError("Multiprocessing is not supported during the calculation of RSEP partial atomic\ncharges. The RESP module is not conducive for multiprocessing. The names of the results\nand grid output files are not unique for molecules during the RESP calculations spread\nacross multiple processes.")
  657 
  658 def ProcessOptions():
  659     """Process and validate command line arguments and options."""
  660     
  661     MiscUtil.PrintInfo("Processing options...")
  662     
  663     # Validate options...
  664     ValidateOptions()
  665     
  666     OptionsInfo["Infile"] = Options["--infile"]
  667     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
  668     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
  669     
  670     OptionsInfo["Outfile"] = Options["--outfile"]
  671     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
  672     
  673     OptionsInfo["Overwrite"] = Options["--overwrite"]
  674     
  675     # Method, basis set, and reference wavefunction...
  676     OptionsInfo["BasisSet"] = Options["--basisSet"]
  677     OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False
  678     
  679     OptionsInfo["MethodName"] = Options["--methodName"]
  680     OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False
  681     
  682     OptionsInfo["Reference"] = Options["--reference"]
  683     OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False
  684     
  685     # Run and options parameters...
  686     OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"])
  687     OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"])
  688 
  689     ChargesType = Options["--chargesType"]
  690     ChargesPropType = None
  691     RESPChargesTypeMode = False
  692     if re.match("^Mulliken$", ChargesType, re.I):
  693         ChargesType = 'Mulliken'
  694         ChargesPropType = 'MULLIKEN_CHARGES'
  695     elif re.match("^Lowdin$", ChargesType, re.I):
  696         ChargesType = 'Lowdin'
  697         ChargesPropType = 'LOWDIN_CHARGES'
  698     elif re.match("^RESP$", ChargesType, re.I):
  699         ChargesType = 'RESP'
  700         ChargesPropType = None
  701         RESPChargesTypeMode = True
  702     else:
  703         MiscUtil.PrintError("The value, %s, specified for charge mode is not supported. " % Options["--chargesType"])
  704         
  705     OptionsInfo["ChargesType"] = ChargesType
  706     OptionsInfo["ChargesPropType"] = ChargesPropType
  707     OptionsInfo["RESPChargesTypeMode"] = RESPChargesTypeMode
  708 
  709     ProcessOptionChargesRespParameters()
  710     
  711     AtomAliasesFormatMode = True
  712     if re.match("^DataField", Options["--chargesSDFormat"], re.I):
  713         AtomAliasesFormatMode = False
  714     OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode
  715 
  716     DataFieldLabel = Options["--dataFieldLabel"]
  717     if re.match("^auto$", DataFieldLabel, re.I):
  718         DataFieldLabel = "Psi4_%s_Charges (a.u.)" % Options["--chargesType"]
  719     OptionsInfo["DataFieldLabel"] = DataFieldLabel
  720     
  721     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
  722     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
  723     
  724     OptionsInfo["Precision"] = int(Options["--precision"])
  725     
  726     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
  727 
  728 def RetrieveOptions():
  729     """Retrieve command line arguments and options."""
  730     
  731     # Get options...
  732     global Options
  733     Options = docopt(_docoptUsage_)
  734     
  735     # Set current working directory to the specified directory...
  736     WorkingDir = Options["--workingdir"]
  737     if WorkingDir:
  738         os.chdir(WorkingDir)
  739     
  740     # Handle examples option...
  741     if "--examples" in Options and Options["--examples"]:
  742         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
  743         sys.exit(0)
  744 
  745 def ValidateOptions():
  746     """Validate option values."""
  747 
  748     MiscUtil.ValidateOptionTextValue("-c, --chargesType", Options["--chargesType"], "Mulliken Lowdin RESP")
  749     MiscUtil.ValidateOptionTextValue("--chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField")
  750     
  751     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
  752     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
  753     
  754     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
  755     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
  756     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
  757     
  758     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
  759     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
  760     
  761     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
  762     
  763 # Setup a usage string for docopt...
  764 _docoptUsage_ = """
  765 Psi4CalculatePartialCharges.py - Calculate partial atomic charges
  766 
  767 Usage:
  768     Psi4CalculatePartialCharges.py [--basisSet <text>] [--chargesType <Mulliken or Lowdin>] [--chargesRespParams <Name,Value,...>]
  769                                    [--chargesSDFormat <AtomAliases or DataField>] [--dataFieldLabel <text>] [--infileParams <Name,Value,...>]
  770                                    [--methodName <text>] [--mp <yes or no>] [--mpParams <Name, Value,...>] [ --outfileParams <Name,Value,...> ]
  771                                    [--overwrite] [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 
  772                                    [--quiet <yes or no>] [--reference <text>] [-w <dir>] -i <infile> -o <outfile> 
  773     Psi4CalculatePartialCharges.py -h | --help | -e | --examples
  774 
  775 Description:
  776     Calculate partial atomic charges for molecules using a specified method name
  777     and basis set. The molecules must have 3D coordinates in input file. The molecular
  778     geometry is not optimized before the calculation. In addition, hydrogens must
  779     be present for all molecules in input file. A single point energy calculation is 
  780     performed before calculating the partial atomic charges. The 3D coordinates
  781     are not modified during the calculation.
  782     
  783     A Psi4 XYZ format geometry string is automatically generated for each molecule
  784     in input file. It contains atom symbols and 3D coordinates for each atom in a
  785     molecule. In addition, the formal charge and spin multiplicity are present in the
  786     the geometry string. These values are either retrieved from molecule properties
  787     named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a
  788     molecule.
  789 
  790     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
  791 
  792     The supported output file formats are: SD (.sdf, .sd)
  793 
  794 Options:
  795     -b, --basisSet <text>  [default: auto]
  796         Basis set to use for calculating single point energy before partial atomic
  797         charges. Default: 6-31+G** for sulfur containing molecules; Otherwise,
  798         6-31G** [ Ref 150 ]. The specified value must be a valid Psi4 basis set.
  799         No validation is performed.
  800         
  801         The following list shows a representative sample of basis sets available
  802         in Psi4:
  803             
  804             STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*,  6-31++G*, 
  805             6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G,
  806             6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**,
  807             cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP,
  808             def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD
  809             
  810     -c, --chargesType <Mulliken, Lowdin, or RESP>  [default: Mulliken]
  811         Type of partial atomic charges to calculate. Possible values: Mulliken, Lowdin,
  812         or RESP [ Ref 158 ]. Multiprocessing is not supported during the calculation
  813         of RSEP charges. In addition, the RSEP calculation relies on the presence of
  814         the RESP Psi4 Plugin in your environment.
  815     --chargesRespParams <Name,Value,...>  [default: auto]
  816         A comma delimited list of parameter name and value  pairs for calculating
  817         RESP [ Ref 158 ] charges. A space is used as a delimiter for multiple values in
  818         a name and value pair. The supported parameter names, along with
  819         their default values, are shown below:
  820             
  821             maxIter, 25
  822             restrainHydrogens, no
  823             removeGridFiles, yes
  824             respA, 0.0005
  825             respB, 0.1
  826             tolerance, 1e-5
  827             vdwRadii, auto
  828             vdwScaleFactors, 1.4 1.6 1.8 2.0
  829             vdwPointDensity, 1.0
  830             
  831         maxIter: Maximum number of iterations to perform during charge fitting.
  832         
  833         restrainHydrogens: Restrain hydrogens during charge fitting.
  834         
  835         removeGridFiles: Keep or remove the following ESP grid and output files:
  836         1_default_grid.dat, 1_default_grid_esp.dat, results.out. The output
  837         files are removed by default. You may optionally keep the output files. The
  838         output files are automatically renamed to the following file for 'No' value of
  839         'removeGridFiles': Mol<MolNum>_grid.dat, Mol<MolNum>_grid_esp.dat,
  840         Mol<MolNum>_resp_results.out.
  841         
  842         respA: Scale factor defining the  asymptotic limits of the strength of the
  843         restraint.
  844         
  845         respB: The 'tightness' of the hyperbola around its minimum for the
  846         restraint.
  847         
  848         tolerance: Tolerance for charges during charge fitting to the ESP.
  849         
  850         vdwRadii: vdw radii for elements in angstroms. It's a space delimited list of
  851         element symbol and radius value pairs. The default list is shown below:
  852             
  853             H 1.20 He 1.20 Li 1.37 Be 1.45 B 1.45 C 1.50 N 1.50 O 1.40 F 1.35
  854             Ne 1.30 Na 1.57 Mg 1.36 Al 1.24 Si 1.17P 1.80 S 1.75 Cl 1.7
  855             
  856         You may specify all or a subset of element symbol and vdw radius pairs to
  857         update the default values.
  858         
  859         vdwScaleFactors: The vdw radii are scaled by the scale factors to set the
  860         grid points at the shells for calculating the ESP using quantum methodology.
  861         The default number of shells is 4 and corresponds to the number of vdw
  862         scale factors.The 'shell' points are written to a grid file for calculating the ESP.
  863         
  864         vdwPointDensity: Approximate number of points to generate per square
  865         angstrom surface area.
  866     --chargesSDFormat <AtomAliases or DataField>  [default: AtomAliases]
  867         Format for writing out partial atomic charges to SD file. Possible values:
  868         AtomAliases or DataField.
  869         
  870         The charges are stored as atom property named 'molFileAlias' for
  871         'AtomAliases' format and may be retrieved using the RDKit function
  872         'GetProp' for atoms: Aotm.GetProp('molFileAliases').
  873         
  874         The charges are stored under a data field label specified using
  875         '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the
  876         RDKit function 'GetProp' for molecules.
  877     -d, --dataFieldLabel <text>  [default: auto]
  878         Data field label to use for storing charged in SD file during 'DataField' value
  879         of '-c, --chargesSDFormat'. Default: Psi4_<ChargesType>_Charges (a.u.)
  880     -e, --examples
  881         Print examples.
  882     -h, --help
  883         Print this help message.
  884     -i, --infile <infile>
  885         Input file name.
  886     --infileParams <Name,Value,...>  [default: auto]
  887         A comma delimited list of parameter name and value pairs for reading
  888         molecules from files. The supported parameter names for different file
  889         formats, along with their default values, are shown below:
  890             
  891             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
  892             
  893     -m, --methodName <text>  [default: auto]
  894         Method to use for calculating single point energy before partial atomic
  895         charges. Default: B3LYP [ Ref 150 ]. The specified value must be a valid
  896         Psi4 method name. No validation is performed.
  897         
  898         The following list shows a representative sample of methods available
  899         in Psi4:
  900             
  901             B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ,
  902             B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ,  HF3c, M05,
  903             M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0,
  904             PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ
  905             
  906     --mp <yes or no>  [default: no]
  907         Use multiprocessing.
  908          
  909         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
  910         function employing lazy RDKit data iterable. This allows processing of
  911         arbitrary large data sets without any additional requirements memory.
  912         
  913         All input data may be optionally loaded into memory by mp.Pool.map()
  914         before starting worker processes in a process pool by setting the value
  915         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
  916         
  917         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
  918         data mode may adversely impact the performance. The '--mpParams' section
  919         provides additional information to tune the value of 'chunkSize'.
  920     --mpParams <Name,Value,...>  [default: auto]
  921         A comma delimited list of parameter name and value pairs to configure
  922         multiprocessing.
  923         
  924         The supported parameter names along with their default and possible
  925         values are shown below:
  926         
  927             chunkSize, auto
  928             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
  929             numProcesses, auto   [ Default: mp.cpu_count() ]
  930         
  931         These parameters are used by the following functions to configure and
  932         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
  933         mp.Pool.imap().
  934         
  935         The chunkSize determines chunks of input data passed to each worker
  936         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
  937         The default value of chunkSize is dependent on the value of 'inputDataMode'.
  938         
  939         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
  940         automatically converts RDKit data iterable into a list, loads all data into
  941         memory, and calculates the default chunkSize using the following method
  942         as shown in its code:
  943         
  944             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
  945             if extra: chunkSize += 1
  946         
  947         For example, the default chunkSize will be 7 for a pool of 4 worker processes
  948         and 100 data items.
  949         
  950         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
  951         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
  952         data into memory. Consequently, the size of input data is not known a priori.
  953         It's not possible to estimate an optimal value for the chunkSize. The default 
  954         chunkSize is set to 1.
  955         
  956         The default value for the chunkSize during 'Lazy' data mode may adversely
  957         impact the performance due to the overhead associated with exchanging
  958         small chunks of data. It is generally a good idea to explicitly set chunkSize to
  959         a larger value during 'Lazy' input data mode, based on the size of your input
  960         data and number of processes in the process pool.
  961         
  962         The mp.Pool.map() function waits for all worker processes to process all
  963         the data and return the results. The mp.Pool.imap() function, however,
  964         returns the the results obtained from worker processes as soon as the
  965         results become available for specified chunks of data.
  966         
  967         The order of data in the results returned by both mp.Pool.map() and 
  968         mp.Pool.imap() functions always corresponds to the input data.
  969     -o, --outfile <outfile>
  970         Output file name.
  971     --outfileParams <Name,Value,...>  [default: auto]
  972         A comma delimited list of parameter name and value pairs for writing
  973         molecules to files. The supported parameter names for different file
  974         formats, along with their default values, are shown below:
  975             
  976             SD: kekulize,yes,forceV3000,no
  977             
  978     --overwrite
  979         Overwrite existing files.
  980     --precision <number>  [default: 4]
  981         Floating point precision for writing energy values.
  982     --psi4OptionsParams <Name,Value,...>  [default: none]
  983         A comma delimited list of Psi4 option name and value pairs for setting
  984         global and module options. The names are 'option_name' for global options
  985         and 'module_name__option_name' for options local to a module. The
  986         specified option names must be valid Psi4 names. No validation is
  987         performed.
  988         
  989         The specified option name and  value pairs are processed and passed to
  990         psi4.set_options() as a dictionary. The supported value types are float,
  991         integer, boolean, or string. The float value string is converted into a float.
  992         The valid values for a boolean string are yes, no, true, false, on, or off. 
  993     --psi4RunParams <Name,Value,...>  [default: auto]
  994         A comma delimited list of parameter name and value pairs for configuring
  995         Psi4 jobs.
  996         
  997         The supported parameter names along with their default and possible
  998         values are shown below:
  999              
 1000             MemoryInGB, 1
 1001             NumThreads, 1
 1002             OutputFile, auto   [ Possible  values: stdout, quiet, or FileName ]
 1003             ScratchDir, auto   [ Possivle values: DirName]
 1004             RemoveOutputFile, yes   [ Possible values: yes, no, true, or false]
 1005             
 1006         These parameters control the runtime behavior of Psi4.
 1007         
 1008         The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID
 1009         is appended to output file name during multiprocessing as shown:
 1010         <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType'
 1011         sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses
 1012         all Psi4 output.
 1013         
 1014         The default 'Yes' value of 'RemoveOutputFile' option forces the removal
 1015         of any existing Psi4 before creating new files to append output from
 1016         multiple Psi4 runs.
 1017         
 1018         The option 'ScratchDir' is a directory path to the location of scratch
 1019         files. The default value corresponds to Psi4 default. It may be used to
 1020         override the deafult path.
 1021     -q, --quiet <yes or no>  [default: no]
 1022         Use quiet mode. The warning and information messages will not be printed.
 1023     -r, --reference <text>  [default: auto]
 1024         Reference wave function to use for calculating single point energy before
 1025         partial atomic charges. Default: RHF or UHF. The default values are Restricted
 1026         Hartree-Fock (RHF) for closed-shell molecules with all electrons paired and
 1027         Unrestricted Hartree-Fock (UHF) for open-shell molecules with unpaired electrons.
 1028         
 1029         The specified value must be a valid Psi4 reference wave function. No validation
 1030         is performed. For example: ROHF, CUHF, RKS, etc.
 1031         
 1032         The spin multiplicity determines the default value of reference wave function
 1033         for input molecules. It is calculated from number of free radical electrons using
 1034         Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total
 1035         electron spin. The total spin is 1/2 the number of free radical electrons in a 
 1036         molecule. The value of 'SpinMultiplicity' molecule property takes precedence
 1037         over the calculated value of spin multiplicity.
 1038     -w, --workingdir <dir>
 1039         Location of working directory which defaults to the current directory.
 1040 
 1041 Examples:
 1042     To calculate Mulliken partial atomic charges using  B3LYP/6-31G** and
 1043     B3LYP/6-31+G** for non-sulfur and sulfur containing molecules in a SD
 1044     file with 3D structures, use RHF and UHF for closed-shell and open-shell
 1045     molecules, and write a new SD file, type:
 1046 
 1047         % Psi4CalculatePartialCharges.py  -i Psi4Sample3D.sdf 
 1048           -o Psi4Sample3DOut.sdf
 1049 
 1050     To run the first example for calculating RESP charges using a default set of
 1051     parameters for the RESP calculation and write out a SD file, type:
 1052 
 1053         % Psi4CalculatePartialCharges.py  --chargesType RESP
 1054            -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1055 
 1056     To run the first example for calculating RESP charges using an explicit set
 1057     of specific parameters for the RESP calculation and write out a SD file, type:
 1058 
 1059         % Psi4CalculatePartialCharges.py  --chargesType RESP
 1060            --chargesRespParams "respA, 0.0005, respB, 0.1, vdwScaleFactors,
 1061            1.4 1.6 1.8 2.0" -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1062 
 1063     To run the first example in multiprocessing mode on all available CPUs
 1064     without loading all data into memory and write out a SD file, type:
 1065 
 1066         % Psi4CalculatePartialCharges.py --mp yes -i Psi4Sample3D.sdf
 1067           -o Psi4Sample3DOut.sdf
 1068 
 1069     To run the first example in multiprocessing mode on all available CPUs
 1070     by loading all data into memory and write out a SD file, type:
 1071 
 1072         % Psi4CalculatePartialCharges.py  --mp yes --mpParams "inputDataMode,
 1073           InMemory" -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 1074 
 1075     To run the first example in multiprocessing mode on all available CPUs
 1076     without loading all data into memory along with multiple threads for each
 1077     Psi4 run and write out a SD file, type:
 1078 
 1079         % Psi4CalculatePartialCharges.py --mp yes --psi4RunParams "NumThreads,2"
 1080            -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf
 1081 
 1082     To run the first example for writing out charges to a new SD file under a
 1083     datafield instead of storing them as atom property, type:
 1084 
 1085         % Psi4CalculatePartialCharges.py  --chargesSDFormat DataField
 1086           -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 1087 
 1088     To calculate specific partial atomic charges using a specific method  and basis
 1089     set for molecules in a SD ontaining 3D structures and write them out to a specific
 1090     datafield in a new SD file, type:
 1091 
 1092         % Psi4CalculatePartialCharges.py  -c Lowdin -m SCF -b aug-cc-pVDZ
 1093           --chargesSDFormat DataField --dataFieldLabel "Lowdin_Charges"
 1094           -i Psi4Sample3D.sdf  -o Psi4Sample3DOut.sdf
 1095 
 1096 Author:
 1097     Manish Sud(msud@san.rr.com)
 1098 
 1099 See also:
 1100     Psi4CalculateEnergy.py, Psi4PerformMinimization.py, Psi4GenerateConformers.py
 1101 
 1102 Copyright:
 1103     Copyright (C) 2024 Manish Sud. All rights reserved.
 1104 
 1105     The functionality available in this script is implemented using Psi4, an
 1106     open source quantum chemistry software package, and RDKit, an open
 1107     source toolkit for cheminformatics developed by Greg Landrum.
 1108 
 1109     This file is part of MayaChemTools.
 1110 
 1111     MayaChemTools is free software; you can redistribute it and/or modify it under
 1112     the terms of the GNU Lesser General Public License as published by the Free
 1113     Software Foundation; either version 3 of the License, or (at your option) any
 1114     later version.
 1115 
 1116 """
 1117 
 1118 if __name__ == "__main__":
 1119     main()