1 #!/bin/env python 2 # 3 # File: RDKitCalculatePartialCharges.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2026 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 import os 32 import sys 33 import time 34 import re 35 import multiprocessing as mp 36 37 # RDKit imports... 38 try: 39 from rdkit import rdBase 40 from rdkit import Chem 41 from rdkit.Chem import AllChem 42 from rdkit.Chem import rdPartialCharges 43 except ImportError as ErrMsg: 44 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 45 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 46 sys.exit(1) 47 48 # MayaChemTools imports... 49 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 50 try: 51 from docopt import docopt 52 import MiscUtil 53 import RDKitUtil 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 57 sys.exit(1) 58 59 ScriptName = os.path.basename(sys.argv[0]) 60 Options = {} 61 OptionsInfo = {} 62 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo( 68 "\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" 69 % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()) 70 ) 71 72 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 73 74 # Retrieve command line arguments and options... 75 RetrieveOptions() 76 77 # Process and validate command line arguments and options... 78 ProcessOptions() 79 80 # Perform actions required by the script... 81 CalculatePartialCharges() 82 83 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 84 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 85 86 87 def CalculatePartialCharges(): 88 """Calculate partial atomic charges.""" 89 90 # Setup a molecule reader... 91 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 92 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 93 94 # Set up a molecule writer... 95 Writer = SetupMoleculeWriter() 96 97 MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer) 98 99 if Writer is not None: 100 Writer.close() 101 102 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 103 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 104 MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount) 105 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount)) 106 107 108 def ProcessMolecules(Mols, Writer): 109 """Process molecules and calculate partial charges.""" 110 111 if OptionsInfo["MPMode"]: 112 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 113 else: 114 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 115 116 117 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 118 """Process molecules and calculate partial charges using a single process.""" 119 120 MiscUtil.PrintInfo("Calculating partial atomic charges...") 121 122 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] 123 124 MolCount, ValidMolCount, CalcFailedCount = [0] * 3 125 for Mol in Mols: 126 MolCount += 1 127 if Mol is None: 128 continue 129 130 if RDKitUtil.IsMolEmpty(Mol): 131 MolName = RDKitUtil.GetMolName(Mol, MolCount) 132 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 133 continue 134 ValidMolCount += 1 135 136 MolWithHs = Chem.AddHs(Mol) 137 138 # Retrieve charges... 139 CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount) 140 if not CalcStatus: 141 CalcFailedCount += 1 142 continue 143 144 # Write out charges... 145 WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) 146 147 return (MolCount, ValidMolCount, CalcFailedCount) 148 149 150 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 151 """Process molecules and calculate partial charges using a multiprocessing.""" 152 153 MiscUtil.PrintInfo("Calculating partial atomic charges using multiprocessing...") 154 155 MPParams = OptionsInfo["MPParams"] 156 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] 157 158 # Setup data for initializing a worker process... 159 InitializeWorkerProcessArgs = ( 160 MiscUtil.ObjectToBase64EncodedString(Options), 161 MiscUtil.ObjectToBase64EncodedString(OptionsInfo), 162 ) 163 164 # Setup a encoded mols data iterable for a worker process... 165 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 166 167 # Setup process pool along with data initialization for each process... 168 MiscUtil.PrintInfo( 169 "\nConfiguring multiprocessing using %s method..." 170 % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()") 171 ) 172 MiscUtil.PrintInfo( 173 "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" 174 % ( 175 MPParams["NumProcesses"], 176 MPParams["InputDataMode"], 177 ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]), 178 ) 179 ) 180 181 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 182 183 # Start processing... 184 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 185 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 186 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 187 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 188 else: 189 MiscUtil.PrintError( 190 'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"]) 191 ) 192 193 (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3 194 for Result in Results: 195 MolCount += 1 196 MolIndex, EncodedMol, CalcStatus, PartialCharges = Result 197 198 if EncodedMol is None: 199 continue 200 ValidMolCount += 1 201 202 if not CalcStatus: 203 CalcFailedCount += 1 204 continue 205 206 MolWithHs = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 207 208 # Write out charges... 209 WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) 210 211 return (MolCount, ValidMolCount, CalcFailedCount) 212 213 214 def InitializeWorkerProcess(*EncodedArgs): 215 """Initialize data for a worker process.""" 216 217 global Options, OptionsInfo 218 219 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 220 221 # Decode Options and OptionInfo... 222 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 223 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 224 225 226 def WorkerProcess(EncodedMolInfo): 227 """Process data for a worker process.""" 228 229 MolIndex, EncodedMol = EncodedMolInfo 230 231 if EncodedMol is None: 232 return [MolIndex, None, False, None] 233 234 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 235 if RDKitUtil.IsMolEmpty(Mol): 236 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 237 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 238 return [MolIndex, None, False, None] 239 240 MolWithHs = Chem.AddHs(Mol) 241 EncodedMolWithHs = RDKitUtil.MolToBase64EncodedMolString( 242 MolWithHs, PropertyPickleFlags=Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps 243 ) 244 245 # Retrieve charges... 246 CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, (MolIndex + 1)) 247 248 return [MolIndex, EncodedMolWithHs, CalcStatus, PartialCharges] 249 250 251 def CalculateMolPartialCharges(Mol, MolCount): 252 """Calculate partial atomic charges for a molecule.""" 253 254 PartialCharges = [] 255 if OptionsInfo["MMFFChargesMode"]: 256 if AllChem.MMFFHasAllMoleculeParams(Mol): 257 MMFFProp = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant=OptionsInfo["MMFFVariant"]) 258 PartialCharges = [MMFFProp.GetMMFFPartialCharge(AtomIndex) for AtomIndex in range(Mol.GetNumAtoms())] 259 else: 260 MolName = RDKitUtil.GetMolName(Mol, MolCount) 261 MiscUtil.PrintWarning( 262 "Failed to calculate MMFF partial charges for molecule, %s: Missing forcefield parameters" % MolName 263 ) 264 return (False, PartialCharges) 265 else: 266 rdPartialCharges.ComputeGasteigerCharges( 267 Mol, nIter=OptionsInfo["NumIters"], throwOnParamFailure=OptionsInfo["AllowParamFailure"] 268 ) 269 PartialCharges = [Atom.GetProp("_GasteigerCharge") for Atom in Mol.GetAtoms()] 270 271 # Format charges... 272 PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in PartialCharges] 273 274 return (True, PartialCharges) 275 276 277 def WriteMolPartialCharges(Writer, Mol, PartialCharges, Compute2DCoords): 278 """Write out partial atomic charges for a molecule.""" 279 280 if PartialCharges is None: 281 return 282 283 if OptionsInfo["AtomAliasesFormatMode"]: 284 for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges): 285 Atom.SetProp("molFileAlias", PartialCharge) 286 else: 287 ChargesValues = "\n".join(PartialCharges) 288 Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues) 289 290 if Compute2DCoords: 291 AllChem.Compute2DCoords(Mol) 292 293 Writer.write(Mol) 294 295 296 def SetupMoleculeWriter(): 297 """Setup a molecule writer.""" 298 299 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 300 if Writer is None: 301 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 302 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 303 304 return Writer 305 306 307 def ProcessOptions(): 308 """Process and validate command line arguments and options.""" 309 310 MiscUtil.PrintInfo("Processing options...") 311 312 # Validate options... 313 ValidateOptions() 314 315 AllowParamFailure = True 316 if re.match("^No", Options["--allowParamFailure"], re.I): 317 AllowParamFailure = False 318 OptionsInfo["AllowParamFailure"] = AllowParamFailure 319 320 AtomAliasesFormatMode = True 321 if re.match("^DataField", Options["--chargesSDFormat"], re.I): 322 AtomAliasesFormatMode = False 323 OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode 324 325 OptionsInfo["DataFieldLabel"] = Options["--dataFieldLabel"] 326 327 MMFFChargesMode = False 328 if re.match("^MMFF", Options["--mode"], re.I): 329 MMFFChargesMode = True 330 OptionsInfo["Mode"] = Options["--mode"] 331 OptionsInfo["MMFFChargesMode"] = MMFFChargesMode 332 333 MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--mmffVariant"], re.I) else "MMFF94s" 334 OptionsInfo["MMFFVariant"] = MMFFVariant 335 336 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 337 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 338 339 OptionsInfo["Infile"] = Options["--infile"] 340 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters( 341 "--infileParams", Options["--infileParams"], Options["--infile"] 342 ) 343 344 OptionsInfo["Outfile"] = Options["--outfile"] 345 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters( 346 "--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"] 347 ) 348 349 OptionsInfo["Overwrite"] = Options["--overwrite"] 350 351 OptionsInfo["NumIters"] = int(Options["--numIters"]) 352 OptionsInfo["Precision"] = int(Options["--precision"]) 353 354 355 def RetrieveOptions(): 356 """Retrieve command line arguments and options.""" 357 358 # Get options... 359 global Options 360 Options = docopt(_docoptUsage_) 361 362 # Set current working directory to the specified directory... 363 WorkingDir = Options["--workingdir"] 364 if WorkingDir: 365 os.chdir(WorkingDir) 366 367 # Handle examples option... 368 if "--examples" in Options and Options["--examples"]: 369 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 370 sys.exit(0) 371 372 373 def ValidateOptions(): 374 """Validate option values.""" 375 376 MiscUtil.ValidateOptionTextValue("-a, --allowParamFailure", Options["--allowParamFailure"], "yes no") 377 MiscUtil.ValidateOptionTextValue("-c, --chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField") 378 379 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "Gasteiger MMFF") 380 MiscUtil.ValidateOptionTextValue(" --mmffVariant", Options["--mmffVariant"], "MMFF94 MMFF94s") 381 382 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 383 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt") 384 385 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 386 MiscUtil.ValidateOptionsOutputFileOverwrite( 387 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"] 388 ) 389 MiscUtil.ValidateOptionsDistinctFileNames( 390 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"] 391 ) 392 393 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 394 395 MiscUtil.ValidateOptionIntegerValue("-n, --numIters", Options["--numIters"], {">": 0}) 396 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 397 398 399 # Setup a usage string for docopt... 400 _docoptUsage_ = """ 401 RDKitCalculatePartialCharges.py - Calculate partial atomic charges 402 403 Usage: 404 RDKitCalculatePartialCharges.py [--allowParamFailure <yes or no>] 405 [--chargesSDFormat <AtomAliases or DataField>] [--dataFieldLabel <text>] 406 [--infileParams <Name,Value,...>] [--mode <Gasteiger or MMFF>] 407 [--mmffVariant <MMFF94 or MMFF94s>] [--mp <yes or no>] 408 [--mpParams <Name,Value,...>] [--numIters <number>] 409 [--outfileParams <Name,Value,...>] [--precision <number>] [--overwrite] 410 [-w <dir>] -i <infile> -o <outfile> 411 RDKitCalculatePartialCharges.py -h | --help | -e | --examples 412 413 Description: 414 Calculate partial charges for atoms in molecules and write them out to a SD file. 415 The hydrogens are automatically added to molecules before calculating partial 416 charges. 417 418 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 419 .txt, .csv, .tsv) 420 421 The supported output file format are: SD File (.sdf, .sd) 422 423 Options: 424 -a, --allowParamFailure <yes or no> [default: yes] 425 Allow calculation of Gasteiger partial charges to proceed for molecules 426 containing atoms with unknown parameters. The atoms with unknown 427 parameters are removed from the calculations by setting their values to 428 zero. 429 -c, --chargesSDFormat <AtomAliases or DataField> [default: AtomAliases] 430 Format for writing out partial atomic charges to SD file. Possible values: 431 AtomAliases or DataField. 432 433 The charges are stored as atom property named 'molFileAlias' for 434 'AtomAliases' format and may be retrieved using the RDKit function 435 'GetProp' for atoms: Aotm.GetProp('molFileAliases'). 436 437 The charges are stored under a data field label specified using 438 '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the 439 RDKit function 'GetProp' for molecules. 440 -d, --dataFieldLabel <text> [default: PartialCharges] 441 Data field label to use for storing charged in SD file during 'DataField' value 442 of '-c, --chargesSDFormat'. 443 -e, --examples 444 Print examples. 445 -h, --help 446 Print this help message. 447 -i, --infile <infile> 448 Input file name. 449 --infileParams <Name,Value,...> [default: auto] 450 A comma delimited list of parameter name and value pairs for reading 451 molecules from files. The supported parameter names for different file 452 formats, along with their default values, are shown below: 453 454 SD, MOL: sanitize,yes,strictParsing,yes 455 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 456 smilesTitleLine,auto,sanitize,yes 457 458 Possible values for smilesDelimiter: space, comma or tab. 459 -m, --mode <Gasteiger or MMFF> [default: Gasteiger] 460 Type of partial atomic charges to calculate. Possible values: Gasteiger 461 [ Ref 138 ] or Merk Molecular Mechanics Fore Field (MMFF) [ Ref 83-87 ]. 462 --mmffVariant <MMFF94 or MMFF94s> [default: MMFF94] 463 Variant of MMFF forcefield to use for calculating charges. 464 --mp <yes or no> [default: no] 465 Use multiprocessing. 466 467 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 468 function employing lazy RDKit data iterable. This allows processing of 469 arbitrary large data sets without any additional requirements memory. 470 471 All input data may be optionally loaded into memory by mp.Pool.map() 472 before starting worker processes in a process pool by setting the value 473 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 474 475 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 476 data mode may adversely impact the performance. The '--mpParams' section 477 provides additional information to tune the value of 'chunkSize'. 478 --mpParams <Name,Value,...> [default: auto] 479 A comma delimited list of parameter name and value pairs to configure 480 multiprocessing. 481 482 The supported parameter names along with their default and possible 483 values are shown below: 484 485 chunkSize, auto 486 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 487 numProcesses, auto [ Default: mp.cpu_count() ] 488 489 These parameters are used by the following functions to configure and 490 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 491 mp.Pool.imap(). 492 493 The chunkSize determines chunks of input data passed to each worker 494 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 495 The default value of chunkSize is dependent on the value of 'inputDataMode'. 496 497 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 498 automatically converts RDKit data iterable into a list, loads all data into 499 memory, and calculates the default chunkSize using the following method 500 as shown in its code: 501 502 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 503 if extra: chunkSize += 1 504 505 For example, the default chunkSize will be 7 for a pool of 4 worker processes 506 and 100 data items. 507 508 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 509 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 510 data into memory. Consequently, the size of input data is not known a priori. 511 It's not possible to estimate an optimal value for the chunkSize. The default 512 chunkSize is set to 1. 513 514 The default value for the chunkSize during 'Lazy' data mode may adversely 515 impact the performance due to the overhead associated with exchanging 516 small chunks of data. It is generally a good idea to explicitly set chunkSize to 517 a larger value during 'Lazy' input data mode, based on the size of your input 518 data and number of processes in the process pool. 519 520 The mp.Pool.map() function waits for all worker processes to process all 521 the data and return the results. The mp.Pool.imap() function, however, 522 returns the the results obtained from worker processes as soon as the 523 results become available for specified chunks of data. 524 525 The order of data in the results returned by both mp.Pool.map() and 526 mp.Pool.imap() functions always corresponds to the input data. 527 -n, --numIters <number> [default: 12] 528 Number of iterations to perform during calculation of Gasteiger charges. 529 -o, --outfile <outfile> 530 Output file name. 531 --outfileParams <Name,Value,...> [default: auto] 532 A comma delimited list of parameter name and value pairs for writing 533 molecules to files. The supported parameter names for different file 534 formats, along with their default values, are shown below: 535 536 SD: compute2DCoords,auto,kekulize,yes,forceV3000,no 537 538 Default value for compute2DCoords: yes for SMILES input file; no for all other 539 file types. 540 -p, --precision <number> [default: 3] 541 Floating point precision for writing the calculated partial atomic charges. 542 --overwrite 543 Overwrite existing files. 544 -w, --workingdir <dir> 545 Location of working directory which defaults to the current directory. 546 547 Examples: 548 To calculate Gasteiger partial atomic charges for molecules in a SMILES 549 file and write them out to a SD file as atom aliases, type: 550 551 % RDKitCalculatePartialCharges.py -i Sample.smi -o SampleOut.sdf 552 553 To calculate Gasteiger partial atomic charges for molecules in a SMILES 554 file in multiprocessing mode on all available CPUs without loading all data 555 into memory, and and write them out to a SD file as atom aliases, type: 556 557 % RDKitCalculatePartialCharges.py --mp yes -i Sample.smi 558 -o SampleOut.sdf 559 560 To calculate Gasteiger partial atomic charges for molecules in a SMILES 561 file in multiprocessing mode on all available CPUs by loading all data 562 into memory, and and write them out to a SD file as atom aliases, type: 563 564 % RDKitCalculatePartialCharges.py --mp yes --mpParams 565 "inputDataMode,InMemory" -i Sample.smi -o SampleOut.sdf 566 567 To calculate Gasteiger partial atomic charges for molecules in a SMILES 568 file in multiprocessing mode on specific number of CPUs without loading 569 all data into memory, and and write them out to a SD file as atom aliases, 570 type: 571 572 % RDKitCalculatePartialCharges.py --mp yes --mpParams 573 "inputDataMode,InMemory,numProcesses,4,chunkSize,8" 574 -i Sample.smi -o SampleOut.sdf 575 576 To calculate MMFF forcefield partial atomic charges for molecules in a SD 577 file and write them out to a SD file under 'PartialCharges' data field, type: 578 579 % RDKitCalculatePartialCharges.py -m MMFF -c DataField -i Sample.sdf 580 -o SampleOut.sdf 581 582 To calculate Gasteiger partial atomic charges for molecules in a SMILES 583 file and write them out to a SD file under a data field named 'GasteigerCharges', 584 type: 585 586 % RDKitCalculatePartialCharges.py -m Gasteiger -c DataField 587 -d GasteigerCharges -p 4 -i Sample.smi -o SampleOut.sdf 588 589 To calculate Gasteiger partial atomic charges for molecules in a CSV SMILES 590 file, SMILES strings in column 1, name in column 2, and write out a SD file 591 containing charges as atom aliases, type: 592 593 % RDKitCalculatePartialCharges.py --infileParams 594 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1, 595 smilesNameColumn,2" --outfileParams "compute2DCoords,yes" 596 -i SampleSMILES.csv -o SampleOut.sdf 597 598 Author: 599 Manish Sud(msud@san.rr.com) 600 601 See also: 602 RDKitCalculateMolecularDescriptors.py, RDKitCalculateRMSD.py, 603 RDKitCompareMoleculeShapes.py, RDKitConvertFileFormat.py, 604 605 Copyright: 606 Copyright (C) 2026 Manish Sud. All rights reserved. 607 608 The functionality available in this script is implemented using RDKit, an 609 open source toolkit for cheminformatics developed by Greg Landrum. 610 611 This file is part of MayaChemTools. 612 613 MayaChemTools is free software; you can redistribute it and/or modify it under 614 the terms of the GNU Lesser General Public License as published by the Free 615 Software Foundation; either version 3 of the License, or (at your option) any 616 later version. 617 618 """ 619 620 if __name__ == "__main__": 621 main()