MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitCalculatePartialCharges.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2019 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43     from rdkit.Chem import rdPartialCharges
  44 except ImportError as ErrMsg:
  45     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  46     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  47     sys.exit(1)
  48 
  49 # RDKit dependency imports...
  50 import numpy
  51 
  52 # MayaChemTools imports...
  53 try:
  54     from docopt import docopt
  55     import MiscUtil
  56     import RDKitUtil
  57 except ImportError as ErrMsg:
  58     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  59     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  60     sys.exit(1)
  61 
  62 ScriptName = os.path.basename(sys.argv[0])
  63 Options = {}
  64 OptionsInfo = {}
  65 
  66 DescriptorNamesMap = {}
  67 
  68 def main():
  69     """Start execution of the script"""
  70     
  71     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
  72     
  73     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  74     
  75     # Retrieve command line arguments and options...
  76     RetrieveOptions()
  77     
  78     # Process and validate command line arguments and options...
  79     ProcessOptions()
  80     
  81     # Perform actions required by the script...
  82     CalculatePartialCharges()
  83     
  84     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  85     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  86 
  87 def CalculatePartialCharges():
  88     """Calculate partial atomic charges."""
  89 
  90     # Setup a molecule reader...
  91     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  92     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  93     
  94     # Set up a molecule writer...
  95     Writer = SetupMoleculeWriter()
  96         
  97     MolCount, ValidMolCount, CalcFailedCount = ProcessMolecules(Mols, Writer)
  98     
  99     if Writer is not None:
 100         Writer.close()
 101         
 102     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 103     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 104     MiscUtil.PrintInfo("Number of molecules failed during calculation of partial charges: %d" % CalcFailedCount)
 105     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CalcFailedCount))
 106 
 107 def ProcessMolecules(Mols, Writer):
 108     """Process molecules and calculate partial charges."""
 109 
 110     if OptionsInfo["MPMode"]:
 111         return ProcessMoleculesUsingMultipleProcesses(Mols, Writer)
 112     else:
 113         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
 114 
 115 def ProcessMoleculesUsingSingleProcess(Mols,  Writer):
 116     """Process molecules and calculate partial charges using a single process. """
 117     
 118     MiscUtil.PrintInfo("Calculating partial atomic charges...")
 119     
 120     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 121     
 122     MolCount, ValidMolCount, CalcFailedCount = [0] * 3
 123     for Mol in Mols:
 124         MolCount += 1
 125         if Mol is None:
 126             continue
 127         
 128         if RDKitUtil.IsMolEmpty(Mol):
 129             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 130             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 131             continue
 132         ValidMolCount += 1
 133 
 134         MolWithHs = Chem.AddHs(Mol)
 135         
 136         # Retrieve charges...
 137         CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, MolCount)
 138         if not CalcStatus:
 139             CalcFailedCount += 1
 140             continue
 141         
 142         # Write out charges...
 143         WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords)
 144         
 145     return (MolCount, ValidMolCount, CalcFailedCount)
 146     
 147 def ProcessMoleculesUsingMultipleProcesses(Mols,  Writer):
 148     """Process molecules and calculate partial charges using a multiprocessing. """
 149     
 150     MiscUtil.PrintInfo("Calculating partial atomic charges using multiprocessing...")
 151     
 152     MPParams = OptionsInfo["MPParams"]
 153     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 154     
 155     # Setup data for initializing a worker process...
 156     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 157 
 158     # Setup a encoded mols data iterable for a worker process...
 159     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 160 
 161     # Setup process pool along with data initialization for each process...
 162     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 163     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 164     
 165     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 166     
 167     # Start processing...
 168     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 169         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 170     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 171         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 172     else:
 173         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 174     
 175     (MolCount, ValidMolCount, CalcFailedCount) = [0] * 3
 176     for Result in Results:
 177         MolCount += 1
 178         MolIndex, EncodedMol, CalcStatus, PartialCharges = Result
 179         
 180         if EncodedMol is None:
 181             continue
 182         ValidMolCount += 1
 183         
 184         if not CalcStatus:
 185             CalcFailedCount += 1
 186             continue
 187 
 188         MolWithHs = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 189         
 190         # Write out charges...
 191         WriteMolPartialCharges(Writer, MolWithHs, PartialCharges, Compute2DCoords)
 192     
 193     return (MolCount, ValidMolCount, CalcFailedCount)
 194     
 195 def InitializeWorkerProcess(*EncodedArgs):
 196     """Initialize data for a worker process."""
 197 
 198     global Options, OptionsInfo
 199     
 200     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 201 
 202     # Decode Options and OptionInfo...
 203     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 204     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 205 
 206 def WorkerProcess(EncodedMolInfo):
 207     """Process data for a worker process."""
 208     
 209     MolIndex, EncodedMol = EncodedMolInfo
 210 
 211     if EncodedMol is None:
 212         return [MolIndex, None, False, None]
 213         
 214     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 215     if RDKitUtil.IsMolEmpty(Mol):
 216         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 217         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 218         return [MolIndex, None, False, None]
 219         
 220     MolWithHs = Chem.AddHs(Mol)
 221     EncodedMolWithHs = RDKitUtil.MolToBase64EncodedMolString(MolWithHs, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps)
 222     
 223     # Retrieve charges...
 224     CalcStatus, PartialCharges = CalculateMolPartialCharges(MolWithHs, (MolIndex + 1))
 225     
 226     return [MolIndex, EncodedMolWithHs, CalcStatus, PartialCharges]
 227 
 228 def CalculateMolPartialCharges(Mol, MolCount):
 229     """Calculate partial atomic charges for a molecule."""
 230     
 231     PartialCharges = []
 232     if OptionsInfo["MMFFChargesMode"]:
 233         if  AllChem.MMFFHasAllMoleculeParams(Mol):
 234             MMFFProp = AllChem.MMFFGetMoleculeProperties(Mol)
 235             PartialCharges = [MMFFProp.GetMMFFPartialCharge(AtomIndex) for AtomIndex in range(Mol.GetNumAtoms())]
 236         else:
 237             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 238             MiscUtil.PrintWarning("Failed to calculate MMFF partial charges for molecule, %s: Missing forcefield parameters" % MolName)
 239             return (False, PartialCharges)
 240     else:
 241         rdPartialCharges.ComputeGasteigerCharges(Mol, nIter = OptionsInfo["NumIters"], throwOnParamFailure = OptionsInfo["AllowParamFailure"])
 242         PartialCharges = [Atom.GetProp("_GasteigerCharge") for Atom in Mol.GetAtoms()]
 243     
 244     # Format charges...
 245     PartialCharges = ["%.*f" % (OptionsInfo["Precision"], float(Value)) for Value in PartialCharges]
 246     
 247     return (True, PartialCharges)
 248     
 249 def WriteMolPartialCharges(Writer, Mol, PartialCharges, Compute2DCoords):
 250     """Write out partial atomic charges for a molecule."""
 251 
 252     if PartialCharges is None:
 253         return
 254     
 255     if OptionsInfo["AtomAliasesFormatMode"]:
 256         for Atom, PartialCharge in zip(Mol.GetAtoms(), PartialCharges):
 257             Atom.SetProp('molFileAlias', PartialCharge)
 258     else:
 259         ChargesValues = "\n".join(PartialCharges)
 260         Mol.SetProp(OptionsInfo["DataFieldLabel"], ChargesValues)
 261     
 262     if Compute2DCoords:
 263         AllChem.Compute2DCoords(Mol)
 264     
 265     Writer.write(Mol)
 266     
 267 def SetupMoleculeWriter():
 268     """Setup a molecule writer."""
 269     
 270     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 271     if Writer is None:
 272         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 273     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
 274     
 275     return Writer
 276 
 277 def ProcessOptions():
 278     """Process and validate command line arguments and options"""
 279     
 280     MiscUtil.PrintInfo("Processing options...")
 281     
 282     # Validate options...
 283     ValidateOptions()
 284 
 285     AllowParamFailure = True
 286     if re.match("^No", Options["--allowParamFailure"], re.I):
 287         AllowParamFailure = False
 288     OptionsInfo["AllowParamFailure"] = AllowParamFailure
 289     
 290     AtomAliasesFormatMode = True
 291     if re.match("^DataField", Options["--chargesSDFormat"], re.I):
 292         AtomAliasesFormatMode = False
 293     OptionsInfo["AtomAliasesFormatMode"] = AtomAliasesFormatMode
 294 
 295     OptionsInfo["DataFieldLabel"] = Options["--dataFieldLabel"]
 296     
 297     MMFFChargesMode = False
 298     if re.match("^MMFF", Options["--mode"], re.I):
 299         MMFFChargesMode = True
 300     OptionsInfo["Mode"] = Options["--mode"]
 301     OptionsInfo["MMFFChargesMode"] = MMFFChargesMode
 302     
 303     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 304     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 305     
 306     OptionsInfo["Infile"] = Options["--infile"]
 307     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 308     
 309     OptionsInfo["Outfile"] = Options["--outfile"]
 310     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 311     
 312     OptionsInfo["Overwrite"] = Options["--overwrite"]
 313     
 314     OptionsInfo["NumIters"] = int(Options["--numIters"])
 315     OptionsInfo["Precision"] = int(Options["--precision"])
 316 
 317 def RetrieveOptions():
 318     """Retrieve command line arguments and options"""
 319     
 320     # Get options...
 321     global Options
 322     Options = docopt(_docoptUsage_)
 323     
 324     # Set current working directory to the specified directory...
 325     WorkingDir = Options["--workingdir"]
 326     if WorkingDir:
 327         os.chdir(WorkingDir)
 328     
 329     # Handle examples option...
 330     if "--examples" in Options and Options["--examples"]:
 331         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 332         sys.exit(0)
 333         
 334 def ValidateOptions():
 335     """Validate option values"""
 336 
 337     MiscUtil.ValidateOptionTextValue("-a, --allowParamFailure", Options["--allowParamFailure"], "yes no")
 338     MiscUtil.ValidateOptionTextValue("-c, --chargesSDFormat", Options["--chargesSDFormat"], "AtomAliases DataField")
 339     
 340     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "Gasteiger MMFF")
 341     
 342     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 343     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt")
 344     
 345     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 346     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 347     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 348 
 349     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 350     
 351     MiscUtil.ValidateOptionIntegerValue("-n, --numIters", Options["--numIters"], {">": 0})
 352     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 353         
 354 # Setup a usage string for docopt...
 355 _docoptUsage_ = """
 356 RDKitCalculatePartialCharges.py - Calculate partial atomic charges
 357 
 358 Usage:
 359     RDKitCalculatePartialCharges.py [--allowParamFailure <yes or no>]
 360                                           [--chargesSDFormat <AtomAliases or DataField>]  [--dataFieldLabel <text>]
 361                                           [--infileParams <Name,Value,...>] [--mode <Gasteiger or MMFF>]
 362                                           [--mp <yes or no>] [--mpParams <Name.Value,...>] [--numIters <number>]
 363                                           [--outfileParams <Name,Value,...>] [--precision <number>] [--overwrite]
 364                                           [-w <dir>] -i <infile> -o <outfile> 
 365     RDKitCalculatePartialCharges.py -h | --help | -e | --examples
 366 
 367 Description:
 368     Calculate partial charges for atoms in molecules and write them out to a SD file.
 369     The hydrogens are automatically added to molecules before calculating partial
 370     charges.
 371 
 372     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 373     .txt, .csv, .tsv)
 374 
 375     The supported output file format are: SD File (.sdf, .sd)
 376 
 377 Options:
 378     -a, --allowParamFailure <yes or no>  [default: yes]
 379         Allow calculation of Gasteiger partial charges to proceed for molecules
 380         containing atoms with unknown parameters. The atoms with unknown
 381         parameters are removed from the calculations by setting their values to
 382         zero.
 383     -c, --chargesSDFormat <AtomAliases or DataField>  [default: AtomAliases]
 384         Format for writing out partial atomic charges to SD file. Possible values:
 385         AtomAliases or DataField.
 386         
 387         The charges are stored as atom property named 'molFileAlias' for
 388         'AtomAliases' format and may be retrieved using the RDKit function
 389         'GetProp' for atoms: Aotm.GetProp('molFileAliases').
 390         
 391         The charges are stored under a data field label speciefied using
 392         '-d, --dataFieldLabel' for 'DataField' format and may be retrieved using the
 393         RDKit function 'GetProp' for molecues.
 394     -d, --dataFieldLabel <text>  [default: PartialCharges]
 395         Data field label to use for storing charged in SD file during 'DataField' value
 396         of '-c, --chargesSDFormat'.
 397     -e, --examples
 398         Print examples.
 399     -h, --help
 400         Print this help message.
 401     -i, --infile <infile>
 402         Input file name.
 403     --infileParams <Name,Value,...>  [default: auto]
 404         A comma delimited list of parameter name and value pairs for reading
 405         molecules from files. The supported parameter names for different file
 406         formats, along with their default values, are shown below:
 407             
 408             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 409             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 410                 smilesTitleLine,auto,sanitize,yes
 411             
 412         Possible values for smilesDelimiter: space, comma or tab.
 413     -m, --mode <Gasteiger or MMFF>  [default: Gasteiger]
 414         Type of partial atomic charges to calculate. Possible values: Gasteiger
 415         [ Ref 138 ] or Merk Molecular Mechanics Fore Field (MMFF) [ Ref 83-87 ].
 416     --mp <yes or no>  [default: no]
 417         Use multiprocessing.
 418          
 419         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 420         function employing lazy RDKit data iterable. This allows processing of
 421         arbitrary large data sets without any additional requirements memory.
 422         
 423         All input data may be optionally loaded into memory by mp.Pool.map()
 424         before starting worker processes in a process pool by setting the value
 425         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 426         
 427         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 428         data mode may adversely impact the performance. The '--mpParams' section
 429         provides additional information to tune the value of 'chunkSize'.
 430     --mpParams <Name,Value,...>  [default: auto]
 431         A comma delimited list of parameter name and value pairs for to
 432         configure multiprocessing.
 433         
 434         The supported parameter names along with their default and possible
 435         values are shown below:
 436         
 437             chunkSize, auto
 438             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 439             numProcesses, auto   [ Default: mp.cpu_count() ]
 440         
 441         These parameters are used by the following functions to configure and
 442         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 443         mp.Pool.imap().
 444         
 445         The chunkSize determines chunks of input data passed to each worker
 446         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 447         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 448         
 449         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 450         automatically converts RDKit data iterable into a list, loads all data into
 451         memory, and calculates the default chunkSize using the following method
 452         as shown in its code:
 453         
 454             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 455             if extra: chunkSize += 1
 456         
 457         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 458         and 100 data items.
 459         
 460         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 461         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 462         data into memory. Consequently, the size of input data is not known a priori.
 463         It's not possible to estimate an optimal value for the chunkSize. The default 
 464         chunkSize is set to 1.
 465         
 466         The default value for the chunkSize during 'Lazy' data mode may adversely
 467         impact the performance due to the overhead associated with exchanging
 468         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 469         a larger value during 'Lazy' input data mode, based on the size of your input
 470         data and number of processes in the process pool.
 471         
 472         The mp.Pool.map() function waits for all worker processes to process all
 473         the data and return the results. The mp.Pool.imap() function, however,
 474         returns the the results obtained from worker processes as soon as the
 475         results become available for specified chunks of data.
 476         
 477         The order of data in the results returned by both mp.Pool.map() and 
 478         mp.Pool.imap() functions always corresponds to the input data.
 479     -n, --numIters <number>  [default: 12]
 480         Number of iterations to perform during calculation of Gasteiger charges.
 481     -o, --outfile <outfile>
 482         Output file name.
 483     --outfileParams <Name,Value,...>  [default: auto]
 484         A comma delimited list of parameter name and value pairs for writing
 485         molecules to files. The supported parameter names for different file
 486         formats, along with their default values, are shown below:
 487             
 488             SD: compute2DCoords,auto,kekulize,no
 489             
 490         Default value for compute2DCoords: yes for SMILES input file; no for all other
 491         file types.
 492     -p, --precision <number>  [default: 3]
 493         Floating point precision for writing the calculated partial atomic charges.
 494     --overwrite
 495         Overwrite existing files.
 496     -w, --workingdir <dir>
 497         Location of working directory which defaults to the current directory.
 498 
 499 Examples:
 500     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 501     file and write them out to a SD file as atom aliases, type:
 502 
 503         % RDKitCalculatePartialCharges.py  -i Sample.smi -o SampleOut.sdf
 504 
 505     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 506     file in multiprocessing mode on all available CPUs without loading all data
 507     into memory, and and write them out to a SD file as atom aliases, type:
 508 
 509         % RDKitCalculatePartialCharges.py  --mp yes -i Sample.smi
 510           -o SampleOut.sdf
 511 
 512     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 513     file in multiprocessing mode on all available CPUs by loading all data
 514     into memory, and and write them out to a SD file as atom aliases, type:
 515 
 516         % RDKitCalculatePartialCharges.py  --mp yes --mpParams
 517           "inputDataMode,InMemory" -i Sample.smi -o SampleOut.sdf
 518 
 519     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 520     file in multiprocessing mode on specific number of CPUs without loading
 521     all data into memory, and and write them out to a SD file as atom aliases,
 522     type:
 523 
 524         % RDKitCalculatePartialCharges.py  --mp yes --mpParams
 525           "inputDataMode,InMemory,numProcesses,4,chunkSize,8"
 526           -i Sample.smi -o SampleOut.sdf
 527 
 528     To calculate MMFF forcefield partial atomic charges for molecules in a SD
 529     file and write them out to a SD file under 'PartialCharges' data field, type:
 530 
 531         % RDKitCalculatePartialCharges.py  -m MMFF -c DataField -i Sample.sdf
 532           -o SampleOut.sdf
 533 
 534     To calculate Gasteiger partial atomic charges for molecules in a SMILES
 535     file and write them out to a SD file under a data field named 'GasteigerCharges',
 536     type:
 537 
 538         % RDKitCalculatePartialCharges.py  -m Gasteiger -c DataField
 539           -d GasteigerCharges -p 4 -i Sample.smi -o SampleOut.sdf
 540 
 541     To calculate Gasteiger partial atomic charges for molecules in a CSV SMILES
 542     file, SMILES strings in column 1, name in column 2, and write out a SD file
 543     containing charges as atom aliases, type:
 544 
 545         % RDKitCalculatePartialCharges.py --infileParams
 546           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 547           smilesNameColumn,2" --outfileParams "compute2DCoords,yes"
 548           -i SampleSMILES.csv -o SampleOut.sdf
 549 
 550 Author:
 551     Manish Sud(msud@san.rr.com)
 552 
 553 See also:
 554     RDKitCalculateMolecularDescriptors.py, RDKitCalculateRMSD.py,
 555     RDKitCompareMoleculeShapes.py, RDKitConvertFileFormat.py,
 556 
 557 Copyright:
 558     Copyright (C) 2019 Manish Sud. All rights reserved.
 559 
 560     The functionality available in this script is implemented using RDKit, an
 561     open source toolkit for cheminformatics developed by Greg Landrum.
 562 
 563     This file is part of MayaChemTools.
 564 
 565     MayaChemTools is free software; you can redistribute it and/or modify it under
 566     the terms of the GNU Lesser General Public License as published by the Free
 567     Software Foundation; either version 3 of the License, or (at your option) any
 568     later version.
 569 
 570 """
 571 
 572 if __name__ == "__main__":
 573     main()