MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitConvertFileFormat.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2019 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 
  37 # RDKit imports...
  38 try:
  39     from rdkit import rdBase
  40     from rdkit import Chem
  41 except ImportError as ErrMsg:
  42     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  43     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  44     sys.exit(1)
  45 
  46 # MayaChemTools imports...
  47 try:
  48     from docopt import docopt
  49     import MiscUtil
  50     import RDKitUtil
  51 except ImportError as ErrMsg:
  52     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  53     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  54     sys.exit(1)
  55 
  56 ScriptName = os.path.basename(sys.argv[0])
  57 Options = {}
  58 OptionsInfo = {}
  59 
  60 def main():
  61     """Start execution of the script"""
  62     
  63     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
  64     
  65     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  66     
  67     # Retrieve command line arguments and options...
  68     RetrieveOptions()
  69     
  70     # Process and validate command line arguments and options...
  71     ProcessOptions()
  72     
  73     # Perform actions required by the script...
  74     ConvertFileFormat()
  75     
  76     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  77     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  78 
  79 def ConvertFileFormat():
  80     """Convert between  file formats"""
  81     
  82     Infile = OptionsInfo["Infile"]
  83     Outfile = OptionsInfo["Outfile"]
  84     
  85     # Read molecules...
  86     MiscUtil.PrintInfo("\nReading file %s..." % Infile)
  87     Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
  88     MiscUtil.PrintInfo("Total number of molecules: %d" % len(Mols))
  89     
  90     # Write molecules...
  91     MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile)
  92     MolCount, ProcessedMolCount = RDKitUtil.WriteMolecules(Outfile, Mols, **OptionsInfo["OutfileParams"])
  93     
  94     MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
  95     MiscUtil.PrintInfo("Number of molecules processed: %d" % ProcessedMolCount)
  96     MiscUtil.PrintInfo("Number of molecules ignored: %d" % (MolCount - ProcessedMolCount))
  97 
  98 def ProcessOptions():
  99     """Process and validate command line arguments and options"""
 100     
 101     MiscUtil.PrintInfo("Processing options...")
 102     
 103     # Validate options...
 104     ValidateOptions()
 105     
 106     # Process and setup options for RDKit functions...
 107     OptionsInfo["Infile"] = Options["--infile"]
 108     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 109     
 110     OptionsInfo["Outfile"] = Options["--outfile"]
 111     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 112     
 113     OptionsInfo["Overwrite"] = Options["--overwrite"]
 114 
 115 def RetrieveOptions():
 116     """Retrieve command line arguments and options"""
 117     
 118     # Get options...
 119     global Options
 120     Options = docopt(_docoptUsage_)
 121     
 122     # Set current working directory to the specified directory...
 123     WorkingDir = Options["--workingdir"]
 124     if WorkingDir:
 125         os.chdir(WorkingDir)
 126     
 127     # Handle examples option...
 128     if "--examples" in Options and Options["--examples"]:
 129         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 130         sys.exit(0)
 131 
 132 def ValidateOptions():
 133     """Validate option values"""
 134     
 135     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 136     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv mol2 pdb")
 137     
 138     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd mol smi pdb")
 139     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 140     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 141     
 142 # Setup a usage string for docopt...
 143 _docoptUsage_ = """
 144 RDKitConvertFileFormat.py - Convert between molecular file formats
 145 
 146 Usage:
 147     RDKitConvertFileFormat.py [--infileParams <Name,Value,...>]
 148                               [ --outfileParams <Name,Value,...> ] [--overwrite]
 149                               [-w <dir>] -i <infile> -o <outfile>
 150     RDKitConvertFileFormat.py -h | --help | -e | --examples
 151 
 152 Description:
 153     Convert between molecular file formats.
 154 
 155     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 156     .txt, .csv, .tsv), MOL2 (.mol2), PDB (.pdb)
 157 
 158     The supported output file formats are: SD (.sdf, .sd), SMILES (.smi), PDB (.pdb)
 159 
 160 Options:
 161     -e, --examples
 162         Print examples.
 163     -h, --help
 164         Print this help message.
 165     -i, --infile <infile>
 166         Input file name.
 167     --infileParams <Name,Value,...>  [default: auto]
 168         A comma delimited list of parameter name and value pairs for reading
 169         molecules from files. The supported parameter names for different file
 170         formats, along with their default values, are shown below:
 171             
 172             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 173             MOL2: removeHydrogens,yes,sanitize,yes
 174             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 175                 smilesTitleLine,auto,sanitize,yes
 176             PDB: removeHydrogens,yes,sanitize,yes
 177             
 178         Possible values for smilesDelimiter: space, comma or tab.
 179     -o, --outfile <outfile>
 180         Output file name.
 181     --outfileParams <Name,Value,...>  [default: auto]
 182         A comma delimited list of parameter name and value pairs for writing
 183         molecules to files. The supported parameter names for different file
 184         formats, along with their default values, are shown below:
 185             
 186             SD: compute2DCoords,auto,kekulize,no
 187             SMILES: kekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 188                 smilesTitleLine,yes
 189             
 190         Default value for compute2DCoords: yes for SMILES input file; no for all other
 191         file types.
 192     --overwrite
 193         Overwrite existing files.
 194     -w, --workingdir <dir>
 195         Location of working directory which defaults to the current directory.
 196 
 197 Examples:
 198     To convert a SD file  into a isomeric SMILES file, type:
 199 
 200         % RDKitConvertFileFormat.py -i Sample.sdf -o SampleOut.smi
 201 
 202     To convert a SD file into a non isomeric SMILES file, type
 203 
 204         % RDKitConvertFileFormat.py --outfileParams "smilesIsomeric,no"
 205           -i Sample.sdf -o SampleOut.smi
 206 
 207     To convert a SMILES file into a SD file along with calculation of 2D
 208     coordinates, type:
 209 
 210         % RDKitConvertFileFormat.py -i Sample.smi -o SampleOut.sdf
 211 
 212     To convert a MDL MOL file into a PDB file, type:
 213 
 214         % RDKitConvertFileFormat.py -i Sample.mol -o SampleOut.pdb
 215 
 216     To convert a CSV SMILES file  with column headers, SMILES strings
 217     in column 1, and name in column 2 into a SD file containing 2D coordinates, type:
 218 
 219         % RDKitConvertFileFormat.py --infileParams "smilesDelimiter,comma,
 220           smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2" -i Sample.csv
 221           -o SampleOut.sdf
 222 
 223 Author:
 224     Manish Sud(msud@san.rr.com)
 225 
 226 See also:
 227     RDKitDrawMolecules.py, RDKitRemoveDuplicateMolecules.py, RDKitSearchFunctionalGroups.py,
 228     RDKitSearchSMARTS.py
 229 
 230 Copyright:
 231     Copyright (C) 2019 Manish Sud. All rights reserved.
 232 
 233     The functionality available in this script is implemented using RDKit, an
 234     open source toolkit for cheminformatics developed by Greg Landrum.
 235 
 236     This file is part of MayaChemTools.
 237 
 238     MayaChemTools is free software; you can redistribute it and/or modify it under
 239     the terms of the GNU Lesser General Public License as published by the Free
 240     Software Foundation; either version 3 of the License, or (at your option) any
 241     later version.
 242 
 243 """
 244 
 245 if __name__ == "__main__":
 246     main()