MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitCalculatePartialCharges.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2026 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 import os
  32 import sys
  33 import time
  34 import re
  35 import multiprocessing as mp
  36 
  37 # RDKit imports...
  38 try:
  39     from rdkit import rdBase
  40     from rdkit import Chem
  41     from rdkit.Chem import AllChem
  42     from rdkit.Chem import rdPartialCharges
  43 except ImportError as ErrMsg:
  44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  46     sys.exit(1)
  47 
  48 # MayaChemTools imports...
  49 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  50 try:
  51     from docopt import docopt
  52     import MiscUtil
  53     import RDKitUtil
  54 except ImportError as ErrMsg:
  55     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  56     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  57     sys.exit(1)
  58 
  59 ScriptName = os.path.basename(sys.argv[0])
  60 Options = {}
  61 OptionsInfo = {}
  62 
  63 
  64 def main():
  65     """Start execution of the script."""
  66 
  67     MiscUtil.PrintInfo(
  68         "\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
  69         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
  70     )
  71 
  72     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  73 
  74     # Retrieve command line arguments and options...
  75     RetrieveOptions()
  76 
  77     # Process and validate command line arguments and options...
  78     ProcessOptions()
  79 
  80     # Perform actions required by the script...
  81     CalculatePartialCharges()
  82 
  83     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  84     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  85 
  86 
  87 def CalculatePartialCharges():
  88     """Calculate partial atomic charges."""
  89 
  90     # Setup a molecule reader...
  91     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  92     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  93 
  94     # Set up a molecule writer...
  95     Writer = SetupMoleculeWriter()
  96 
  97     MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer)
  98 
  99     if Writer is not None:
 100         Writer.close()
 101 
 102     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 103     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 104     MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount)
 105     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
 106 
 107 
 108 def ProcessMolecules(Mols, Writer):
 109     """Process molecules and calculate partial charges."""
 110 
 111     if OptionsInfo["MPMode"]:
 112         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
 113     else:
 114         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
 115 
 116 
 117 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
 118     """Process molecules and calculate partial charges using a single process."""
 119 
 120     MiscUtil.PrintInfo("Calculating partial atomic charges...")
 121 
 122     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 123 
 124     MolCount, ValidMolCount, CalcFailedCount = [0] * 3
 125     for Mol in Mols:
 126         MolCount += 1
 127         if Mol is None:
 128             continue
 129 
 130         if RDKitUtil.IsMolEmpty(Mol):
 131             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 132             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 133             continue
 134         ValidMolCount += 1
 135 
 136         MolWithHs = Chem.AddHs(Mol)
 137 
 138         # Retrieve charges...
 139         CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount)
 140         if not CalcStatus:
 141             CalcFailedCount += 1
 142             continue
 143 
 144         # Write out charges...
 145         WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords)
 146 
 147     return (MolCount, ValidMolCount, CalcFailedCount)
 148 
 149 
 150 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
 151     """Process molecules and calculate partial charges using a multiprocessing."""
 152 
 153     MiscUtil.PrintInfo("Calculating partial atomic charges using multiprocessing...")
 154 
 155     MPParams = OptionsInfo["MPParams"]
 156     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 157 
 158     # Setup data for initializing a worker process...
 159     InitializeWorkerProcessArgs = (
 160         MiscUtil.ObjectToBase64EncodedString(Options),
 161         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
 162     )
 163 
 164     # Setup a encoded mols data iterable for a worker process...
 165     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 166 
 167     # Setup process pool along with data initialization for each process...
 168     MiscUtil.PrintInfo(
 169         "\nConfiguring multiprocessing using %s method..."
 170         % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
 171     )
 172     MiscUtil.PrintInfo(
 173         "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
 174         % (
 175             MPParams["NumProcesses"],
 176             MPParams["InputDataMode"],
 177             ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
 178         )
 179     )
 180 
 181     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 182 
 183     # Start processing...
 184     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 185         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 186     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 187         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 188     else:
 189         MiscUtil.PrintError(
 190             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
 191         )
 192 
 193     (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
 194     for Result in Results:
 195         MolCount += 1
 196         MolIndex, EncodedMol, CalcStatus, PartialCharges = Result
 197 
 198         if EncodedMol is None:
 199             continue
 200         ValidMolCount += 1
 201 
 202         if not CalcStatus:
 203             CalcFailedCount += 1
 204             continue
 205 
 206         MolWithHs = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 207 
 208         # Write out charges...
 209         WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords)
 210 
 211     return (MolCount, ValidMolCount, CalcFailedCount)
 212 
 213 
 214 def InitializeWorkerProcess(*EncodedArgs):
 215     """Initialize data for a worker process."""
 216 
 217     global Options, OptionsInfo
 218 
 219     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 220 
 221     # Decode Options and OptionInfo...
 222     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 223     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 224 
 225 
 226 def WorkerProcess(EncodedMolInfo):
 227     """Process data for a worker process."""
 228 
 229     MolIndex, EncodedMol = EncodedMolInfo
 230 
 231     if EncodedMol is None:
 232         return [MolIndex, None, False, None]
 233 
 234     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 235     if RDKitUtil.IsMolEmpty(Mol):
 236         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 237         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 238         return [MolIndex, None, False, None]
 239 
 240     MolWithHs = Chem.AddHs(Mol)
 241     EncodedMolWithHs = RDKitUtil.MolToBase64EncodedMolString(
 242         MolWithHs, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps
 243     )
 244 
 245     # Retrieve charges...
 246     CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, (MolIndex + 1))
 247 
 248     return [MolIndex, EncodedMolWithHs, CalcStatus, PartialCharges]
 249 
 250 
 251 def CalculateMolPartialCharges(Mol, MolCount):
 252     """Calculate partial atomic charges for a molecule."""
 253 
 254     PartialCharges = []
 255     if OptionsInfo["MMFFChargesMode"]:
 256         if AllChem.MMFFHasAllMoleculeParams(Mol):
 257             MMFFProp = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant=OptionsInfo["MMFFVariant"])
 258             PartialCharges = [MMFFProp.GetMMFFPartialCharge(AtomIndex) for AtomIndex in range(Mol.GetNumAtoms())]
 259         else:
 260             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 261             MiscUtil.PrintWarning(
 262                 "Failed to calculate MMFF partial charges for molecule, %s: Missing forcefield parameters" % MolName
 263             )
 264             return (False, PartialCharges)
 265     else:
 266         rdPartialCharges.ComputeGasteigerCharges(
 267             Mol, nIter=OptionsInfo["NumIters"], throwOnParamFailure=OptionsInfo["AllowParamFailure"]
 268         )
 269         PartialCharges = [Atom.GetProp("_GasteigerCharge") for Atom in Mol.GetAtoms()]
 270 
 271     # Format charges...
 272     PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in PartialCharges]
 273 
 274     return (True, PartialCharges)
 275 
 276 
 277 def WriteMolPartialCharges(Writer, Mol, PartialCharges, Compute2DCoords):
 278     """Write out partial atomic charges for a molecule."""
 279 
 280     if PartialCharges is None:
 281         return
 282 
 283     if OptionsInfo["AtomAliasesFormatMode"]:
 284         for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges):
 285             Atom.SetProp("molFileAlias", PartialCharge)
 286     else:
 287         ChargesValues = "\n".join(PartialCharges)
 288         Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues)
 289 
 290     if Compute2DCoords:
 291         AllChem.Compute2DCoords(Mol)
 292 
 293     Writer.write(Mol)
 294 
 295 
 296 def SetupMoleculeWriter():
 297     """Setup a molecule writer."""
 298 
 299     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 300     if Writer is None:
 301         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 302     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
 303 
 304     return Writer
 305 
 306 
 307 def ProcessOptions():
 308     """Process and validate command line arguments and options."""
 309 
 310     MiscUtil.PrintInfo("Processing options...")
 311 
 312     # Validate options...
 313     ValidateOptions()
 314 
 315     AllowParamFailure = True
 316     if re.match("^No", Options["--allowParamFailure"], re.I):
 317         AllowParamFailure = False
 318     OptionsInfo["AllowParamFailure"] = AllowParamFailure
 319 
 320     AtomAliasesFormatMode = True
 321     if re.match("^DataField", Options["--chargesSDFormat"], re.I):
 322         AtomAliasesFormatMode = False
 323     OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode
 324 
 325     OptionsInfo["DataFieldLabel"] = Options["--dataFieldLabel"]
 326 
 327     MMFFChargesMode = False
 328     if re.match("^MMFF", Options["--mode"], re.I):
 329         MMFFChargesMode = True
 330     OptionsInfo["Mode"] = Options["--mode"]
 331     OptionsInfo["MMFFChargesMode"] = MMFFChargesMode
 332 
 333     MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--mmffVariant"], re.I) else "MMFF94s"
 334     OptionsInfo["MMFFVariant"] = MMFFVariant
 335 
 336     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 337     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 338 
 339     OptionsInfo["Infile"] = Options["--infile"]
 340     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
 341         "--infileParams", Options["--infileParams"], Options["--infile"]
 342     )
 343 
 344     OptionsInfo["Outfile"] = Options["--outfile"]
 345     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
 346         "--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]
 347     )
 348 
 349     OptionsInfo["Overwrite"] = Options["--overwrite"]
 350 
 351     OptionsInfo["NumIters"] = int(Options["--numIters"])
 352     OptionsInfo["Precision"] = int(Options["--precision"])
 353 
 354 
 355 def RetrieveOptions():
 356     """Retrieve command line arguments and options."""
 357 
 358     # Get options...
 359     global Options
 360     Options = docopt(_docoptUsage_)
 361 
 362     # Set current working directory to the specified directory...
 363     WorkingDir = Options["--workingdir"]
 364     if WorkingDir:
 365         os.chdir(WorkingDir)
 366 
 367     # Handle examples option...
 368     if "--examples" in Options and Options["--examples"]:
 369         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 370         sys.exit(0)
 371 
 372 
 373 def ValidateOptions():
 374     """Validate option values."""
 375 
 376     MiscUtil.ValidateOptionTextValue("-a, --allowParamFailure", Options["--allowParamFailure"], "yes no")
 377     MiscUtil.ValidateOptionTextValue("-c, --chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField")
 378 
 379     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "Gasteiger MMFF")
 380     MiscUtil.ValidateOptionTextValue(" --mmffVariant", Options["--mmffVariant"], "MMFF94 MMFF94s")
 381 
 382     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 383     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt")
 384 
 385     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 386     MiscUtil.ValidateOptionsOutputFileOverwrite(
 387         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
 388     )
 389     MiscUtil.ValidateOptionsDistinctFileNames(
 390         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
 391     )
 392 
 393     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 394 
 395     MiscUtil.ValidateOptionIntegerValue("-n, --numIters", Options["--numIters"], {">": 0})
 396     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 397 
 398 
 399 # Setup a usage string for docopt...
 400 _docoptUsage_ = """
 401 RDKitCalculatePartialCharges.py - Calculate partial atomic charges
 402 
 403 Usage:
 404     RDKitCalculatePartialCharges.py [--allowParamFailure <yes or no>]
 405                                           [--chargesSDFormat <AtomAliases or DataField>]  [--dataFieldLabel <text>]
 406                                           [--infileParams <Name,Value,...>] [--mode <Gasteiger or MMFF>]
 407                                           [--mmffVariant <MMFF94 or MMFF94s>] [--mp <yes or no>]
 408                                           [--mpParams <Name,Value,...>] [--numIters <number>]
 409                                           [--outfileParams <Name,Value,...>] [--precision <number>] [--overwrite]
 410                                           [-w <dir>] -i <infile> -o <outfile> 
 411     RDKitCalculatePartialCharges.py -h | --help | -e | --examples
 412 
 413 Description:
 414     Calculate partial charges for atoms in molecules and write them out to a SD file.
 415     The hydrogens are automatically added to molecules before calculating partial
 416     charges.
 417 
 418     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 419     .txt, .csv, .tsv)
 420 
 421     The supported output file format are: SD File (.sdf, .sd)
 422 
 423 Options:
 424     -a, --allowParamFailure <yes or no>  [default: yes]
 425         Allow calculation of Gasteiger partial charges to proceed for molecules
 426         containing atoms with unknown parameters. The atoms with unknown
 427         parameters are removed from the calculations by setting their values to
 428         zero.
 429     -c, --chargesSDFormat <AtomAliases or DataField>  [default: AtomAliases]
 430         Format for writing out partial atomic charges to SD file. Possible values:
 431         AtomAliases or DataField.
 432         
 433         The charges are stored as atom property named 'molFileAlias' for
 434         'AtomAliases' format and may be retrieved using the RDKit function
 435         'GetProp' for atoms: Aotm.GetProp('molFileAliases').
 436         
 437         The charges are stored under a data field label specified using
 438         '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the
 439         RDKit function 'GetProp' for molecules.
 440     -d, --dataFieldLabel <text>  [default: PartialCharges]
 441         Data field label to use for storing charged in SD file during 'DataField' value
 442         of '-c, --chargesSDFormat'.
 443     -e, --examples
 444         Print examples.
 445     -h, --help
 446         Print this help message.
 447     -i, --infile <infile>
 448         Input file name.
 449     --infileParams <Name,Value,...>  [default: auto]
 450         A comma delimited list of parameter name and value pairs for reading
 451         molecules from files. The supported parameter names for different file
 452         formats, along with their default values, are shown below:
 453             
 454             SD, MOL: sanitize,yes,strictParsing,yes
 455             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 456                 smilesTitleLine,auto,sanitize,yes
 457             
 458         Possible values for smilesDelimiter: space, comma or tab.
 459     -m, --mode <Gasteiger or MMFF>  [default: Gasteiger]
 460         Type of partial atomic charges to calculate. Possible values: Gasteiger
 461         [ Ref 138 ] or Merk Molecular Mechanics Fore Field (MMFF) [ Ref 83-87 ].
 462     --mmffVariant <MMFF94 or MMFF94s>  [default: MMFF94]
 463         Variant of MMFF forcefield to use for calculating charges.
 464     --mp <yes or no>  [default: no]
 465         Use multiprocessing.
 466          
 467         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 468         function employing lazy RDKit data iterable. This allows processing of
 469         arbitrary large data sets without any additional requirements memory.
 470         
 471         All input data may be optionally loaded into memory by mp.Pool.map()
 472         before starting worker processes in a process pool by setting the value
 473         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 474         
 475         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 476         data mode may adversely impact the performance. The '--mpParams' section
 477         provides additional information to tune the value of 'chunkSize'.
 478     --mpParams <Name,Value,...>  [default: auto]
 479         A comma delimited list of parameter name and value pairs to configure
 480         multiprocessing.
 481         
 482         The supported parameter names along with their default and possible
 483         values are shown below:
 484         
 485             chunkSize, auto
 486             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 487             numProcesses, auto   [ Default: mp.cpu_count() ]
 488         
 489         These parameters are used by the following functions to configure and
 490         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 491         mp.Pool.imap().
 492         
 493         The chunkSize determines chunks of input data passed to each worker
 494         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 495         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 496         
 497         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 498         automatically converts RDKit data iterable into a list, loads all data into
 499         memory, and calculates the default chunkSize using the following method
 500         as shown in its code:
 501         
 502             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 503             if extra: chunkSize += 1
 504         
 505         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 506         and 100 data items.
 507         
 508         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 509         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 510         data into memory. Consequently, the size of input data is not known a priori.
 511         It's not possible to estimate an optimal value for the chunkSize. The default 
 512         chunkSize is set to 1.
 513         
 514         The default value for the chunkSize during 'Lazy' data mode may adversely
 515         impact the performance due to the overhead associated with exchanging
 516         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 517         a larger value during 'Lazy' input data mode, based on the size of your input
 518         data and number of processes in the process pool.
 519         
 520         The mp.Pool.map() function waits for all worker processes to process all
 521         the data and return the results. The mp.Pool.imap() function, however,
 522         returns the the results obtained from worker processes as soon as the
 523         results become available for specified chunks of data.
 524         
 525         The order of data in the results returned by both mp.Pool.map() and 
 526         mp.Pool.imap() functions always corresponds to the input data.
 527     -n, --numIters <number>  [default: 12]
 528         Number of iterations to perform during calculation of Gasteiger charges.
 529     -o, --outfile <outfile>
 530         Output file name.
 531     --outfileParams <Name,Value,...>  [default: auto]
 532         A comma delimited list of parameter name and value pairs for writing
 533         molecules to files. The supported parameter names for different file
 534         formats, along with their default values, are shown below:
 535             
 536             SD: compute2DCoords,auto,kekulize,yes,forceV3000,no
 537             
 538         Default value for compute2DCoords: yes for SMILES input file; no for all other
 539         file types.
 540     -p, --precision <number>  [default: 3]
 541         Floating point precision for writing the calculated partial atomic charges.
 542     --overwrite
 543         Overwrite existing files.
 544     -w, --workingdir <dir>
 545         Location of working directory which defaults to the current directory.
 546 
 547 Examples:
 548     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 549     file and write them out to a SD file as atom aliases, type:
 550 
 551         % RDKitCalculatePartialCharges.py  -i Sample.smi -o SampleOut.sdf
 552 
 553     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 554     file in multiprocessing mode on all available CPUs without loading all data
 555     into memory, and and write them out to a SD file as atom aliases, type:
 556 
 557         % RDKitCalculatePartialCharges.py  --mp yes -i Sample.smi
 558           -o SampleOut.sdf
 559 
 560     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 561     file in multiprocessing mode on all available CPUs by loading all data
 562     into memory, and and write them out to a SD file as atom aliases, type:
 563 
 564         % RDKitCalculatePartialCharges.py  --mp yes --mpParams
 565           "inputDataMode,InMemory" -i Sample.smi -o SampleOut.sdf
 566 
 567     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 568     file in multiprocessing mode on specific number of CPUs without loading
 569     all data into memory, and and write them out to a SD file as atom aliases,
 570     type:
 571 
 572         % RDKitCalculatePartialCharges.py  --mp yes --mpParams
 573           "inputDataMode,InMemory,numProcesses,4,chunkSize,8"
 574           -i Sample.smi -o SampleOut.sdf
 575 
 576     To calculate MMFF forcefield partial atomic charges for molecules in a SD
 577     file and write them out to a SD file under 'PartialCharges' data field, type:
 578 
 579         % RDKitCalculatePartialCharges.py  -m MMFF -c DataField -i Sample.sdf
 580           -o SampleOut.sdf
 581 
 582     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 583     file and write them out to a SD file under a data field named 'GasteigerCharges',
 584     type:
 585 
 586         % RDKitCalculatePartialCharges.py  -m Gasteiger -c DataField
 587           -d GasteigerCharges -p 4 -i Sample.smi -o SampleOut.sdf
 588 
 589     To calculate Gasteiger partial atomic charges for molecules in a CSV SMILES
 590     file, SMILES strings in column 1, name in column 2, and write out a SD file
 591     containing charges as atom aliases, type:
 592 
 593         % RDKitCalculatePartialCharges.py --infileParams
 594           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 595           smilesNameColumn,2" --outfileParams "compute2DCoords,yes"
 596           -i SampleSMILES.csv -o SampleOut.sdf
 597 
 598 Author:
 599     Manish Sud(msud@san.rr.com)
 600 
 601 See also:
 602     RDKitCalculateMolecularDescriptors.py, RDKitCalculateRMSD.py,
 603     RDKitCompareMoleculeShapes.py, RDKitConvertFileFormat.py,
 604 
 605 Copyright:
 606     Copyright (C) 2026 Manish Sud. All rights reserved.
 607 
 608     The functionality available in this script is implemented using RDKit, an
 609     open source toolkit for cheminformatics developed by Greg Landrum.
 610 
 611     This file is part of MayaChemTools.
 612 
 613     MayaChemTools is free software; you can redistribute it and/or modify it under
 614     the terms of the GNU Lesser General Public License as published by the Free
 615     Software Foundation; either version 3 of the License, or (at your option) any
 616     later version.
 617 
 618 """
 619 
 620 if __name__ == "__main__":
 621     main()