MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitCalculatePartialCharges.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43     from rdkit.Chem import rdPartialCharges
  44 except ImportError as ErrMsg:
  45     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  46     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  47     sys.exit(1)
  48 
  49 # RDKit dependency imports...
  50 import numpy
  51 
  52 # MayaChemTools imports...
  53 try:
  54     from docopt import docopt
  55     import MiscUtil
  56     import RDKitUtil
  57 except ImportError as ErrMsg:
  58     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  59     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  60     sys.exit(1)
  61 
  62 ScriptName = os.path.basename(sys.argv[0])
  63 Options = {}
  64 OptionsInfo = {}
  65 
  66 def main():
  67     """Start execution of the script."""
  68     
  69     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  70     
  71     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  72     
  73     # Retrieve command line arguments and options...
  74     RetrieveOptions()
  75     
  76     # Process and validate command line arguments and options...
  77     ProcessOptions()
  78     
  79     # Perform actions required by the script...
  80     CalculatePartialCharges()
  81     
  82     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  83     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  84 
  85 def CalculatePartialCharges():
  86     """Calculate partial atomic charges."""
  87 
  88     # Setup a molecule reader...
  89     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  90     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  91     
  92     # Set up a molecule writer...
  93     Writer = SetupMoleculeWriter()
  94         
  95     MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer)
  96     
  97     if Writer is not None:
  98         Writer.close()
  99         
 100     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 101     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 102     MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount)
 103     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
 104 
 105 def ProcessMolecules(Mols, Writer):
 106     """Process molecules and calculate partial charges."""
 107 
 108     if OptionsInfo["MPMode"]:
 109         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
 110     else:
 111         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
 112 
 113 def ProcessMoleculesUsingSingleProcess(Mols,  Writer):
 114     """Process molecules and calculate partial charges using a single process."""
 115     
 116     MiscUtil.PrintInfo("Calculating partial atomic charges...")
 117     
 118     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 119     
 120     MolCount, ValidMolCount, CalcFailedCount = [0] * 3
 121     for Mol in Mols:
 122         MolCount += 1
 123         if Mol is None:
 124             continue
 125         
 126         if RDKitUtil.IsMolEmpty(Mol):
 127             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 128             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 129             continue
 130         ValidMolCount += 1
 131 
 132         MolWithHs = Chem.AddHs(Mol)
 133         
 134         # Retrieve charges...
 135         CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount)
 136         if not CalcStatus:
 137             CalcFailedCount += 1
 138             continue
 139         
 140         # Write out charges...
 141         WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords)
 142         
 143     return (MolCount, ValidMolCount, CalcFailedCount)
 144     
 145 def ProcessMoleculesUsingMultipleProcesses(Mols,  Writer):
 146     """Process molecules and calculate partial charges using a multiprocessing."""
 147     
 148     MiscUtil.PrintInfo("Calculating partial atomic charges using multiprocessing...")
 149     
 150     MPParams = OptionsInfo["MPParams"]
 151     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 152     
 153     # Setup data for initializing a worker process...
 154     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 155 
 156     # Setup a encoded mols data iterable for a worker process...
 157     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 158 
 159     # Setup process pool along with data initialization for each process...
 160     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 161     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 162     
 163     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 164     
 165     # Start processing...
 166     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 167         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 168     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 169         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 170     else:
 171         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 172     
 173     (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
 174     for Result in Results:
 175         MolCount += 1
 176         MolIndex, EncodedMol, CalcStatus, PartialCharges = Result
 177         
 178         if EncodedMol is None:
 179             continue
 180         ValidMolCount += 1
 181         
 182         if not CalcStatus:
 183             CalcFailedCount += 1
 184             continue
 185 
 186         MolWithHs = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 187         
 188         # Write out charges...
 189         WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords)
 190     
 191     return (MolCount, ValidMolCount, CalcFailedCount)
 192     
 193 def InitializeWorkerProcess(*EncodedArgs):
 194     """Initialize data for a worker process."""
 195 
 196     global Options, OptionsInfo
 197     
 198     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 199 
 200     # Decode Options and OptionInfo...
 201     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 202     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 203 
 204 def WorkerProcess(EncodedMolInfo):
 205     """Process data for a worker process."""
 206     
 207     MolIndex, EncodedMol = EncodedMolInfo
 208 
 209     if EncodedMol is None:
 210         return [MolIndex, None, False, None]
 211         
 212     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 213     if RDKitUtil.IsMolEmpty(Mol):
 214         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 215         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 216         return [MolIndex, None, False, None]
 217         
 218     MolWithHs = Chem.AddHs(Mol)
 219     EncodedMolWithHs = RDKitUtil.MolToBase64EncodedMolString(MolWithHs, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps)
 220     
 221     # Retrieve charges...
 222     CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, (MolIndex + 1))
 223     
 224     return [MolIndex, EncodedMolWithHs, CalcStatus, PartialCharges]
 225 
 226 def CalculateMolPartialCharges(Mol, MolCount):
 227     """Calculate partial atomic charges for a molecule."""
 228     
 229     PartialCharges = []
 230     if OptionsInfo["MMFFChargesMode"]:
 231         if  AllChem.MMFFHasAllMoleculeParams(Mol):
 232             MMFFProp = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"])
 233             PartialCharges = [MMFFProp.GetMMFFPartialCharge(AtomIndex) for AtomIndex in range(Mol.GetNumAtoms())]
 234         else:
 235             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 236             MiscUtil.PrintWarning("Failed to calculate MMFF partial charges for molecule, %s: Missing forcefield parameters" % MolName)
 237             return (False, PartialCharges)
 238     else:
 239         rdPartialCharges.ComputeGasteigerCharges(Mol, nIter = OptionsInfo["NumIters"], throwOnParamFailure = OptionsInfo["AllowParamFailure"])
 240         PartialCharges = [Atom.GetProp("_GasteigerCharge") for Atom in Mol.GetAtoms()]
 241     
 242     # Format charges...
 243     PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in PartialCharges]
 244     
 245     return (True, PartialCharges)
 246     
 247 def WriteMolPartialCharges(Writer, Mol, PartialCharges, Compute2DCoords):
 248     """Write out partial atomic charges for a molecule."""
 249 
 250     if PartialCharges is None:
 251         return
 252     
 253     if OptionsInfo["AtomAliasesFormatMode"]:
 254         for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges):
 255             Atom.SetProp('molFileAlias', PartialCharge)
 256     else:
 257         ChargesValues = "\n".join(PartialCharges)
 258         Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues)
 259     
 260     if Compute2DCoords:
 261         AllChem.Compute2DCoords(Mol)
 262     
 263     Writer.write(Mol)
 264     
 265 def SetupMoleculeWriter():
 266     """Setup a molecule writer."""
 267     
 268     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 269     if Writer is None:
 270         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 271     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
 272     
 273     return Writer
 274 
 275 def ProcessOptions():
 276     """Process and validate command line arguments and options."""
 277     
 278     MiscUtil.PrintInfo("Processing options...")
 279     
 280     # Validate options...
 281     ValidateOptions()
 282 
 283     AllowParamFailure = True
 284     if re.match("^No", Options["--allowParamFailure"], re.I):
 285         AllowParamFailure = False
 286     OptionsInfo["AllowParamFailure"] = AllowParamFailure
 287     
 288     AtomAliasesFormatMode = True
 289     if re.match("^DataField", Options["--chargesSDFormat"], re.I):
 290         AtomAliasesFormatMode = False
 291     OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode
 292 
 293     OptionsInfo["DataFieldLabel"] = Options["--dataFieldLabel"]
 294     
 295     MMFFChargesMode = False
 296     if re.match("^MMFF", Options["--mode"], re.I):
 297         MMFFChargesMode = True
 298     OptionsInfo["Mode"] = Options["--mode"]
 299     OptionsInfo["MMFFChargesMode"] = MMFFChargesMode
 300     
 301     MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--mmffVariant"], re.I) else "MMFF94s"
 302     OptionsInfo["MMFFVariant"] = MMFFVariant
 303     
 304     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 305     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 306     
 307     OptionsInfo["Infile"] = Options["--infile"]
 308     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 309     
 310     OptionsInfo["Outfile"] = Options["--outfile"]
 311     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 312     
 313     OptionsInfo["Overwrite"] = Options["--overwrite"]
 314     
 315     OptionsInfo["NumIters"] = int(Options["--numIters"])
 316     OptionsInfo["Precision"] = int(Options["--precision"])
 317 
 318 def RetrieveOptions():
 319     """Retrieve command line arguments and options."""
 320     
 321     # Get options...
 322     global Options
 323     Options = docopt(_docoptUsage_)
 324     
 325     # Set current working directory to the specified directory...
 326     WorkingDir = Options["--workingdir"]
 327     if WorkingDir:
 328         os.chdir(WorkingDir)
 329     
 330     # Handle examples option...
 331     if "--examples" in Options and Options["--examples"]:
 332         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 333         sys.exit(0)
 334         
 335 def ValidateOptions():
 336     """Validate option values."""
 337 
 338     MiscUtil.ValidateOptionTextValue("-a, --allowParamFailure", Options["--allowParamFailure"], "yes no")
 339     MiscUtil.ValidateOptionTextValue("-c, --chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField")
 340     
 341     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "Gasteiger MMFF")
 342     MiscUtil.ValidateOptionTextValue(" --mmffVariant", Options["--mmffVariant"], "MMFF94 MMFF94s")
 343     
 344     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 345     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt")
 346     
 347     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 348     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 349     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 350 
 351     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 352     
 353     MiscUtil.ValidateOptionIntegerValue("-n, --numIters", Options["--numIters"], {">": 0})
 354     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 355         
 356 # Setup a usage string for docopt...
 357 _docoptUsage_ = """
 358 RDKitCalculatePartialCharges.py - Calculate partial atomic charges
 359 
 360 Usage:
 361     RDKitCalculatePartialCharges.py [--allowParamFailure <yes or no>]
 362                                           [--chargesSDFormat <AtomAliases or DataField>]  [--dataFieldLabel <text>]
 363                                           [--infileParams <Name,Value,...>] [--mode <Gasteiger or MMFF>]
 364                                           [--mmffVariant <MMFF94 or MMFF94s>] [--mp <yes or no>]
 365                                           [--mpParams <Name,Value,...>] [--numIters <number>]
 366                                           [--outfileParams <Name,Value,...>] [--precision <number>] [--overwrite]
 367                                           [-w <dir>] -i <infile> -o <outfile> 
 368     RDKitCalculatePartialCharges.py -h | --help | -e | --examples
 369 
 370 Description:
 371     Calculate partial charges for atoms in molecules and write them out to a SD file.
 372     The hydrogens are automatically added to molecules before calculating partial
 373     charges.
 374 
 375     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 376     .txt, .csv, .tsv)
 377 
 378     The supported output file format are: SD File (.sdf, .sd)
 379 
 380 Options:
 381     -a, --allowParamFailure <yes or no>  [default: yes]
 382         Allow calculation of Gasteiger partial charges to proceed for molecules
 383         containing atoms with unknown parameters. The atoms with unknown
 384         parameters are removed from the calculations by setting their values to
 385         zero.
 386     -c, --chargesSDFormat <AtomAliases or DataField>  [default: AtomAliases]
 387         Format for writing out partial atomic charges to SD file. Possible values:
 388         AtomAliases or DataField.
 389         
 390         The charges are stored as atom property named 'molFileAlias' for
 391         'AtomAliases' format and may be retrieved using the RDKit function
 392         'GetProp' for atoms: Aotm.GetProp('molFileAliases').
 393         
 394         The charges are stored under a data field label specified using
 395         '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the
 396         RDKit function 'GetProp' for molecules.
 397     -d, --dataFieldLabel <text>  [default: PartialCharges]
 398         Data field label to use for storing charged in SD file during 'DataField' value
 399         of '-c, --chargesSDFormat'.
 400     -e, --examples
 401         Print examples.
 402     -h, --help
 403         Print this help message.
 404     -i, --infile <infile>
 405         Input file name.
 406     --infileParams <Name,Value,...>  [default: auto]
 407         A comma delimited list of parameter name and value pairs for reading
 408         molecules from files. The supported parameter names for different file
 409         formats, along with their default values, are shown below:
 410             
 411             SD, MOL: sanitize,yes,strictParsing,yes
 412             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 413                 smilesTitleLine,auto,sanitize,yes
 414             
 415         Possible values for smilesDelimiter: space, comma or tab.
 416     -m, --mode <Gasteiger or MMFF>  [default: Gasteiger]
 417         Type of partial atomic charges to calculate. Possible values: Gasteiger
 418         [ Ref 138 ] or Merk Molecular Mechanics Fore Field (MMFF) [ Ref 83-87 ].
 419     --mmffVariant <MMFF94 or MMFF94s>  [default: MMFF94]
 420         Variant of MMFF forcefield to use for calculating charges.
 421     --mp <yes or no>  [default: no]
 422         Use multiprocessing.
 423          
 424         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 425         function employing lazy RDKit data iterable. This allows processing of
 426         arbitrary large data sets without any additional requirements memory.
 427         
 428         All input data may be optionally loaded into memory by mp.Pool.map()
 429         before starting worker processes in a process pool by setting the value
 430         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 431         
 432         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 433         data mode may adversely impact the performance. The '--mpParams' section
 434         provides additional information to tune the value of 'chunkSize'.
 435     --mpParams <Name,Value,...>  [default: auto]
 436         A comma delimited list of parameter name and value pairs to configure
 437         multiprocessing.
 438         
 439         The supported parameter names along with their default and possible
 440         values are shown below:
 441         
 442             chunkSize, auto
 443             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 444             numProcesses, auto   [ Default: mp.cpu_count() ]
 445         
 446         These parameters are used by the following functions to configure and
 447         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 448         mp.Pool.imap().
 449         
 450         The chunkSize determines chunks of input data passed to each worker
 451         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 452         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 453         
 454         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 455         automatically converts RDKit data iterable into a list, loads all data into
 456         memory, and calculates the default chunkSize using the following method
 457         as shown in its code:
 458         
 459             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 460             if extra: chunkSize += 1
 461         
 462         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 463         and 100 data items.
 464         
 465         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 466         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 467         data into memory. Consequently, the size of input data is not known a priori.
 468         It's not possible to estimate an optimal value for the chunkSize. The default 
 469         chunkSize is set to 1.
 470         
 471         The default value for the chunkSize during 'Lazy' data mode may adversely
 472         impact the performance due to the overhead associated with exchanging
 473         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 474         a larger value during 'Lazy' input data mode, based on the size of your input
 475         data and number of processes in the process pool.
 476         
 477         The mp.Pool.map() function waits for all worker processes to process all
 478         the data and return the results. The mp.Pool.imap() function, however,
 479         returns the the results obtained from worker processes as soon as the
 480         results become available for specified chunks of data.
 481         
 482         The order of data in the results returned by both mp.Pool.map() and 
 483         mp.Pool.imap() functions always corresponds to the input data.
 484     -n, --numIters <number>  [default: 12]
 485         Number of iterations to perform during calculation of Gasteiger charges.
 486     -o, --outfile <outfile>
 487         Output file name.
 488     --outfileParams <Name,Value,...>  [default: auto]
 489         A comma delimited list of parameter name and value pairs for writing
 490         molecules to files. The supported parameter names for different file
 491         formats, along with their default values, are shown below:
 492             
 493             SD: compute2DCoords,auto,kekulize,yes,forceV3000,no
 494             
 495         Default value for compute2DCoords: yes for SMILES input file; no for all other
 496         file types.
 497     -p, --precision <number>  [default: 3]
 498         Floating point precision for writing the calculated partial atomic charges.
 499     --overwrite
 500         Overwrite existing files.
 501     -w, --workingdir <dir>
 502         Location of working directory which defaults to the current directory.
 503 
 504 Examples:
 505     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 506     file and write them out to a SD file as atom aliases, type:
 507 
 508         % RDKitCalculatePartialCharges.py  -i Sample.smi -o SampleOut.sdf
 509 
 510     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 511     file in multiprocessing mode on all available CPUs without loading all data
 512     into memory, and and write them out to a SD file as atom aliases, type:
 513 
 514         % RDKitCalculatePartialCharges.py  --mp yes -i Sample.smi
 515           -o SampleOut.sdf
 516 
 517     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 518     file in multiprocessing mode on all available CPUs by loading all data
 519     into memory, and and write them out to a SD file as atom aliases, type:
 520 
 521         % RDKitCalculatePartialCharges.py  --mp yes --mpParams
 522           "inputDataMode,InMemory" -i Sample.smi -o SampleOut.sdf
 523 
 524     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 525     file in multiprocessing mode on specific number of CPUs without loading
 526     all data into memory, and and write them out to a SD file as atom aliases,
 527     type:
 528 
 529         % RDKitCalculatePartialCharges.py  --mp yes --mpParams
 530           "inputDataMode,InMemory,numProcesses,4,chunkSize,8"
 531           -i Sample.smi -o SampleOut.sdf
 532 
 533     To calculate MMFF forcefield partial atomic charges for molecules in a SD
 534     file and write them out to a SD file under 'PartialCharges' data field, type:
 535 
 536         % RDKitCalculatePartialCharges.py  -m MMFF -c DataField -i Sample.sdf
 537           -o SampleOut.sdf
 538 
 539     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 540     file and write them out to a SD file under a data field named 'GasteigerCharges',
 541     type:
 542 
 543         % RDKitCalculatePartialCharges.py  -m Gasteiger -c DataField
 544           -d GasteigerCharges -p 4 -i Sample.smi -o SampleOut.sdf
 545 
 546     To calculate Gasteiger partial atomic charges for molecules in a CSV SMILES
 547     file, SMILES strings in column 1, name in column 2, and write out a SD file
 548     containing charges as atom aliases, type:
 549 
 550         % RDKitCalculatePartialCharges.py --infileParams
 551           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 552           smilesNameColumn,2" --outfileParams "compute2DCoords,yes"
 553           -i SampleSMILES.csv -o SampleOut.sdf
 554 
 555 Author:
 556     Manish Sud(msud@san.rr.com)
 557 
 558 See also:
 559     RDKitCalculateMolecularDescriptors.py, RDKitCalculateRMSD.py,
 560     RDKitCompareMoleculeShapes.py, RDKitConvertFileFormat.py,
 561 
 562 Copyright:
 563     Copyright (C) 2024 Manish Sud. All rights reserved.
 564 
 565     The functionality available in this script is implemented using RDKit, an
 566     open source toolkit for cheminformatics developed by Greg Landrum.
 567 
 568     This file is part of MayaChemTools.
 569 
 570     MayaChemTools is free software; you can redistribute it and/or modify it under
 571     the terms of the GNU Lesser General Public License as published by the Free
 572     Software Foundation; either version 3 of the License, or (at your option) any
 573     later version.
 574 
 575 """
 576 
 577 if __name__ == "__main__":
 578     main()