1 #!/bin/env python 2 # 3 # File: RDKitCalculatePartialCharges.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 import multiprocessing as mp 37 38 # RDKit imports... 39 try: 40 from rdkit import rdBase 41 from rdkit import Chem 42 from rdkit.Chem import AllChem 43 from rdkit.Chem import rdPartialCharges 44 except ImportError as ErrMsg: 45 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 46 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 47 sys.exit(1) 48 49 # RDKit dependency imports... 50 import numpy 51 52 # MayaChemTools imports... 53 try: 54 from docopt import docopt 55 import MiscUtil 56 import RDKitUtil 57 except ImportError as ErrMsg: 58 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 59 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 60 sys.exit(1) 61 62 ScriptName = os.path.basename(sys.argv[0]) 63 Options = {} 64 OptionsInfo = {} 65 66 def main(): 67 """Start execution of the script.""" 68 69 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 70 71 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 72 73 # Retrieve command line arguments and options... 74 RetrieveOptions() 75 76 # Process and validate command line arguments and options... 77 ProcessOptions() 78 79 # Perform actions required by the script... 80 CalculatePartialCharges() 81 82 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 83 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 84 85 def CalculatePartialCharges(): 86 """Calculate partial atomic charges.""" 87 88 # Setup a molecule reader... 89 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 90 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 91 92 # Set up a molecule writer... 93 Writer = SetupMoleculeWriter() 94 95 MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer) 96 97 if Writer is not None: 98 Writer.close() 99 100 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 101 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 102 MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount) 103 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount)) 104 105 def ProcessMolecules(Mols, Writer): 106 """Process molecules and calculate partial charges.""" 107 108 if OptionsInfo["MPMode"]: 109 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 110 else: 111 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 112 113 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 114 """Process molecules and calculate partial charges using a single process.""" 115 116 MiscUtil.PrintInfo("Calculating partial atomic charges...") 117 118 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] 119 120 MolCount, ValidMolCount, CalcFailedCount = [0] * 3 121 for Mol in Mols: 122 MolCount += 1 123 if Mol is None: 124 continue 125 126 if RDKitUtil.IsMolEmpty(Mol): 127 MolName = RDKitUtil.GetMolName(Mol, MolCount) 128 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 129 continue 130 ValidMolCount += 1 131 132 MolWithHs = Chem.AddHs(Mol) 133 134 # Retrieve charges... 135 CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount) 136 if not CalcStatus: 137 CalcFailedCount += 1 138 continue 139 140 # Write out charges... 141 WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) 142 143 return (MolCount, ValidMolCount, CalcFailedCount) 144 145 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 146 """Process molecules and calculate partial charges using a multiprocessing.""" 147 148 MiscUtil.PrintInfo("Calculating partial atomic charges using multiprocessing...") 149 150 MPParams = OptionsInfo["MPParams"] 151 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] 152 153 # Setup data for initializing a worker process... 154 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 155 156 # Setup a encoded mols data iterable for a worker process... 157 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 158 159 # Setup process pool along with data initialization for each process... 160 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 161 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 162 163 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 164 165 # Start processing... 166 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 167 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 168 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 169 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 170 else: 171 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 172 173 (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3 174 for Result in Results: 175 MolCount += 1 176 MolIndex, EncodedMol, CalcStatus, PartialCharges = Result 177 178 if EncodedMol is None: 179 continue 180 ValidMolCount += 1 181 182 if not CalcStatus: 183 CalcFailedCount += 1 184 continue 185 186 MolWithHs = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 187 188 # Write out charges... 189 WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords) 190 191 return (MolCount, ValidMolCount, CalcFailedCount) 192 193 def InitializeWorkerProcess(*EncodedArgs): 194 """Initialize data for a worker process.""" 195 196 global Options, OptionsInfo 197 198 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 199 200 # Decode Options and OptionInfo... 201 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 202 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 203 204 def WorkerProcess(EncodedMolInfo): 205 """Process data for a worker process.""" 206 207 MolIndex, EncodedMol = EncodedMolInfo 208 209 if EncodedMol is None: 210 return [MolIndex, None, False, None] 211 212 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 213 if RDKitUtil.IsMolEmpty(Mol): 214 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 215 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 216 return [MolIndex, None, False, None] 217 218 MolWithHs = Chem.AddHs(Mol) 219 EncodedMolWithHs = RDKitUtil.MolToBase64EncodedMolString(MolWithHs, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps) 220 221 # Retrieve charges... 222 CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, (MolIndex + 1)) 223 224 return [MolIndex, EncodedMolWithHs, CalcStatus, PartialCharges] 225 226 def CalculateMolPartialCharges(Mol, MolCount): 227 """Calculate partial atomic charges for a molecule.""" 228 229 PartialCharges = [] 230 if OptionsInfo["MMFFChargesMode"]: 231 if AllChem.MMFFHasAllMoleculeParams(Mol): 232 MMFFProp = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["MMFFVariant"]) 233 PartialCharges = [MMFFProp.GetMMFFPartialCharge(AtomIndex) for AtomIndex in range(Mol.GetNumAtoms())] 234 else: 235 MolName = RDKitUtil.GetMolName(Mol, MolCount) 236 MiscUtil.PrintWarning("Failed to calculate MMFF partial charges for molecule, %s: Missing forcefield parameters" % MolName) 237 return (False, PartialCharges) 238 else: 239 rdPartialCharges.ComputeGasteigerCharges(Mol, nIter = OptionsInfo["NumIters"], throwOnParamFailure = OptionsInfo["AllowParamFailure"]) 240 PartialCharges = [Atom.GetProp("_GasteigerCharge") for Atom in Mol.GetAtoms()] 241 242 # Format charges... 243 PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in PartialCharges] 244 245 return (True, PartialCharges) 246 247 def WriteMolPartialCharges(Writer, Mol, PartialCharges, Compute2DCoords): 248 """Write out partial atomic charges for a molecule.""" 249 250 if PartialCharges is None: 251 return 252 253 if OptionsInfo["AtomAliasesFormatMode"]: 254 for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges): 255 Atom.SetProp('molFileAlias', PartialCharge) 256 else: 257 ChargesValues = "\n".join(PartialCharges) 258 Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues) 259 260 if Compute2DCoords: 261 AllChem.Compute2DCoords(Mol) 262 263 Writer.write(Mol) 264 265 def SetupMoleculeWriter(): 266 """Setup a molecule writer.""" 267 268 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 269 if Writer is None: 270 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 271 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 272 273 return Writer 274 275 def ProcessOptions(): 276 """Process and validate command line arguments and options.""" 277 278 MiscUtil.PrintInfo("Processing options...") 279 280 # Validate options... 281 ValidateOptions() 282 283 AllowParamFailure = True 284 if re.match("^No", Options["--allowParamFailure"], re.I): 285 AllowParamFailure = False 286 OptionsInfo["AllowParamFailure"] = AllowParamFailure 287 288 AtomAliasesFormatMode = True 289 if re.match("^DataField", Options["--chargesSDFormat"], re.I): 290 AtomAliasesFormatMode = False 291 OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode 292 293 OptionsInfo["DataFieldLabel"] = Options["--dataFieldLabel"] 294 295 MMFFChargesMode = False 296 if re.match("^MMFF", Options["--mode"], re.I): 297 MMFFChargesMode = True 298 OptionsInfo["Mode"] = Options["--mode"] 299 OptionsInfo["MMFFChargesMode"] = MMFFChargesMode 300 301 MMFFVariant = "MMFF94" if re.match("^MMFF94$", Options["--mmffVariant"], re.I) else "MMFF94s" 302 OptionsInfo["MMFFVariant"] = MMFFVariant 303 304 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 305 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 306 307 OptionsInfo["Infile"] = Options["--infile"] 308 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) 309 310 OptionsInfo["Outfile"] = Options["--outfile"] 311 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) 312 313 OptionsInfo["Overwrite"] = Options["--overwrite"] 314 315 OptionsInfo["NumIters"] = int(Options["--numIters"]) 316 OptionsInfo["Precision"] = int(Options["--precision"]) 317 318 def RetrieveOptions(): 319 """Retrieve command line arguments and options.""" 320 321 # Get options... 322 global Options 323 Options = docopt(_docoptUsage_) 324 325 # Set current working directory to the specified directory... 326 WorkingDir = Options["--workingdir"] 327 if WorkingDir: 328 os.chdir(WorkingDir) 329 330 # Handle examples option... 331 if "--examples" in Options and Options["--examples"]: 332 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 333 sys.exit(0) 334 335 def ValidateOptions(): 336 """Validate option values.""" 337 338 MiscUtil.ValidateOptionTextValue("-a, --allowParamFailure", Options["--allowParamFailure"], "yes no") 339 MiscUtil.ValidateOptionTextValue("-c, --chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField") 340 341 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "Gasteiger MMFF") 342 MiscUtil.ValidateOptionTextValue(" --mmffVariant", Options["--mmffVariant"], "MMFF94 MMFF94s") 343 344 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 345 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt") 346 347 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 348 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 349 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 350 351 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 352 353 MiscUtil.ValidateOptionIntegerValue("-n, --numIters", Options["--numIters"], {">": 0}) 354 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 355 356 # Setup a usage string for docopt... 357 _docoptUsage_ = """ 358 RDKitCalculatePartialCharges.py - Calculate partial atomic charges 359 360 Usage: 361 RDKitCalculatePartialCharges.py [--allowParamFailure <yes or no>] 362 [--chargesSDFormat <AtomAliases or DataField>] [--dataFieldLabel <text>] 363 [--infileParams <Name,Value,...>] [--mode <Gasteiger or MMFF>] 364 [--mmffVariant <MMFF94 or MMFF94s>] [--mp <yes or no>] 365 [--mpParams <Name,Value,...>] [--numIters <number>] 366 [--outfileParams <Name,Value,...>] [--precision <number>] [--overwrite] 367 [-w <dir>] -i <infile> -o <outfile> 368 RDKitCalculatePartialCharges.py -h | --help | -e | --examples 369 370 Description: 371 Calculate partial charges for atoms in molecules and write them out to a SD file. 372 The hydrogens are automatically added to molecules before calculating partial 373 charges. 374 375 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 376 .txt, .csv, .tsv) 377 378 The supported output file format are: SD File (.sdf, .sd) 379 380 Options: 381 -a, --allowParamFailure <yes or no> [default: yes] 382 Allow calculation of Gasteiger partial charges to proceed for molecules 383 containing atoms with unknown parameters. The atoms with unknown 384 parameters are removed from the calculations by setting their values to 385 zero. 386 -c, --chargesSDFormat <AtomAliases or DataField> [default: AtomAliases] 387 Format for writing out partial atomic charges to SD file. Possible values: 388 AtomAliases or DataField. 389 390 The charges are stored as atom property named 'molFileAlias' for 391 'AtomAliases' format and may be retrieved using the RDKit function 392 'GetProp' for atoms: Aotm.GetProp('molFileAliases'). 393 394 The charges are stored under a data field label specified using 395 '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the 396 RDKit function 'GetProp' for molecules. 397 -d, --dataFieldLabel <text> [default: PartialCharges] 398 Data field label to use for storing charged in SD file during 'DataField' value 399 of '-c, --chargesSDFormat'. 400 -e, --examples 401 Print examples. 402 -h, --help 403 Print this help message. 404 -i, --infile <infile> 405 Input file name. 406 --infileParams <Name,Value,...> [default: auto] 407 A comma delimited list of parameter name and value pairs for reading 408 molecules from files. The supported parameter names for different file 409 formats, along with their default values, are shown below: 410 411 SD, MOL: sanitize,yes,strictParsing,yes 412 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 413 smilesTitleLine,auto,sanitize,yes 414 415 Possible values for smilesDelimiter: space, comma or tab. 416 -m, --mode <Gasteiger or MMFF> [default: Gasteiger] 417 Type of partial atomic charges to calculate. Possible values: Gasteiger 418 [ Ref 138 ] or Merk Molecular Mechanics Fore Field (MMFF) [ Ref 83-87 ]. 419 --mmffVariant <MMFF94 or MMFF94s> [default: MMFF94] 420 Variant of MMFF forcefield to use for calculating charges. 421 --mp <yes or no> [default: no] 422 Use multiprocessing. 423 424 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 425 function employing lazy RDKit data iterable. This allows processing of 426 arbitrary large data sets without any additional requirements memory. 427 428 All input data may be optionally loaded into memory by mp.Pool.map() 429 before starting worker processes in a process pool by setting the value 430 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 431 432 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 433 data mode may adversely impact the performance. The '--mpParams' section 434 provides additional information to tune the value of 'chunkSize'. 435 --mpParams <Name,Value,...> [default: auto] 436 A comma delimited list of parameter name and value pairs to configure 437 multiprocessing. 438 439 The supported parameter names along with their default and possible 440 values are shown below: 441 442 chunkSize, auto 443 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 444 numProcesses, auto [ Default: mp.cpu_count() ] 445 446 These parameters are used by the following functions to configure and 447 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 448 mp.Pool.imap(). 449 450 The chunkSize determines chunks of input data passed to each worker 451 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 452 The default value of chunkSize is dependent on the value of 'inputDataMode'. 453 454 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 455 automatically converts RDKit data iterable into a list, loads all data into 456 memory, and calculates the default chunkSize using the following method 457 as shown in its code: 458 459 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 460 if extra: chunkSize += 1 461 462 For example, the default chunkSize will be 7 for a pool of 4 worker processes 463 and 100 data items. 464 465 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 466 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 467 data into memory. Consequently, the size of input data is not known a priori. 468 It's not possible to estimate an optimal value for the chunkSize. The default 469 chunkSize is set to 1. 470 471 The default value for the chunkSize during 'Lazy' data mode may adversely 472 impact the performance due to the overhead associated with exchanging 473 small chunks of data. It is generally a good idea to explicitly set chunkSize to 474 a larger value during 'Lazy' input data mode, based on the size of your input 475 data and number of processes in the process pool. 476 477 The mp.Pool.map() function waits for all worker processes to process all 478 the data and return the results. The mp.Pool.imap() function, however, 479 returns the the results obtained from worker processes as soon as the 480 results become available for specified chunks of data. 481 482 The order of data in the results returned by both mp.Pool.map() and 483 mp.Pool.imap() functions always corresponds to the input data. 484 -n, --numIters <number> [default: 12] 485 Number of iterations to perform during calculation of Gasteiger charges. 486 -o, --outfile <outfile> 487 Output file name. 488 --outfileParams <Name,Value,...> [default: auto] 489 A comma delimited list of parameter name and value pairs for writing 490 molecules to files. The supported parameter names for different file 491 formats, along with their default values, are shown below: 492 493 SD: compute2DCoords,auto,kekulize,yes,forceV3000,no 494 495 Default value for compute2DCoords: yes for SMILES input file; no for all other 496 file types. 497 -p, --precision <number> [default: 3] 498 Floating point precision for writing the calculated partial atomic charges. 499 --overwrite 500 Overwrite existing files. 501 -w, --workingdir <dir> 502 Location of working directory which defaults to the current directory. 503 504 Examples: 505 To calculate Gasteiger partial atomic charges for molecules in a SMILES 506 file and write them out to a SD file as atom aliases, type: 507 508 % RDKitCalculatePartialCharges.py -i Sample.smi -o SampleOut.sdf 509 510 To calculate Gasteiger partial atomic charges for molecules in a SMILES 511 file in multiprocessing mode on all available CPUs without loading all data 512 into memory, and and write them out to a SD file as atom aliases, type: 513 514 % RDKitCalculatePartialCharges.py --mp yes -i Sample.smi 515 -o SampleOut.sdf 516 517 To calculate Gasteiger partial atomic charges for molecules in a SMILES 518 file in multiprocessing mode on all available CPUs by loading all data 519 into memory, and and write them out to a SD file as atom aliases, type: 520 521 % RDKitCalculatePartialCharges.py --mp yes --mpParams 522 "inputDataMode,InMemory" -i Sample.smi -o SampleOut.sdf 523 524 To calculate Gasteiger partial atomic charges for molecules in a SMILES 525 file in multiprocessing mode on specific number of CPUs without loading 526 all data into memory, and and write them out to a SD file as atom aliases, 527 type: 528 529 % RDKitCalculatePartialCharges.py --mp yes --mpParams 530 "inputDataMode,InMemory,numProcesses,4,chunkSize,8" 531 -i Sample.smi -o SampleOut.sdf 532 533 To calculate MMFF forcefield partial atomic charges for molecules in a SD 534 file and write them out to a SD file under 'PartialCharges' data field, type: 535 536 % RDKitCalculatePartialCharges.py -m MMFF -c DataField -i Sample.sdf 537 -o SampleOut.sdf 538 539 To calculate Gasteiger partial atomic charges for molecules in a SMILES 540 file and write them out to a SD file under a data field named 'GasteigerCharges', 541 type: 542 543 % RDKitCalculatePartialCharges.py -m Gasteiger -c DataField 544 -d GasteigerCharges -p 4 -i Sample.smi -o SampleOut.sdf 545 546 To calculate Gasteiger partial atomic charges for molecules in a CSV SMILES 547 file, SMILES strings in column 1, name in column 2, and write out a SD file 548 containing charges as atom aliases, type: 549 550 % RDKitCalculatePartialCharges.py --infileParams 551 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1, 552 smilesNameColumn,2" --outfileParams "compute2DCoords,yes" 553 -i SampleSMILES.csv -o SampleOut.sdf 554 555 Author: 556 Manish Sud(msud@san.rr.com) 557 558 See also: 559 RDKitCalculateMolecularDescriptors.py, RDKitCalculateRMSD.py, 560 RDKitCompareMoleculeShapes.py, RDKitConvertFileFormat.py, 561 562 Copyright: 563 Copyright (C) 2024 Manish Sud. All rights reserved. 564 565 The functionality available in this script is implemented using RDKit, an 566 open source toolkit for cheminformatics developed by Greg Landrum. 567 568 This file is part of MayaChemTools. 569 570 MayaChemTools is free software; you can redistribute it and/or modify it under 571 the terms of the GNU Lesser General Public License as published by the Free 572 Software Foundation; either version 3 of the License, or (at your option) any 573 later version. 574 575 """ 576 577 if __name__ == "__main__": 578 main()