MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitConvertFileFormat.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 
  37 # RDKit imports...
  38 try:
  39     from rdkit import rdBase
  40     from rdkit import Chem
  41 except ImportError as ErrMsg:
  42     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  43     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  44     sys.exit(1)
  45 
  46 # MayaChemTools imports...
  47 try:
  48     from docopt import docopt
  49     import MiscUtil
  50     import RDKitUtil
  51 except ImportError as ErrMsg:
  52     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  53     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  54     sys.exit(1)
  55 
  56 ScriptName = os.path.basename(sys.argv[0])
  57 Options = {}
  58 OptionsInfo = {}
  59 
  60 def main():
  61     """Start execution of the script."""
  62     
  63     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  64     
  65     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  66     
  67     # Retrieve command line arguments and options...
  68     RetrieveOptions()
  69     
  70     # Process and validate command line arguments and options...
  71     ProcessOptions()
  72     
  73     # Perform actions required by the script...
  74     ConvertFileFormat()
  75     
  76     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  77     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  78 
  79 def ConvertFileFormat():
  80     """Convert between  file formats."""
  81     
  82     Infile = OptionsInfo["Infile"]
  83     Outfile = OptionsInfo["Outfile"]
  84     
  85     # Read molecules...
  86     MiscUtil.PrintInfo("\nReading file %s..." % Infile)
  87     Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"])
  88     
  89     # Write molecules...
  90     MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile)
  91     MolCount, ProcessedMolCount = RDKitUtil.WriteMolecules(Outfile, Mols, **OptionsInfo["OutfileParams"])
  92     
  93     MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
  94     MiscUtil.PrintInfo("Number of molecules processed: %d" % ProcessedMolCount)
  95     MiscUtil.PrintInfo("Number of molecules ignored: %d" % (MolCount - ProcessedMolCount))
  96 
  97 def ProcessOptions():
  98     """Process and validate command line arguments and options."""
  99     
 100     MiscUtil.PrintInfo("Processing options...")
 101     
 102     # Validate options...
 103     ValidateOptions()
 104     
 105     # Process and setup options for RDKit functions...
 106     OptionsInfo["Infile"] = Options["--infile"]
 107     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 108     
 109     OptionsInfo["Outfile"] = Options["--outfile"]
 110     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 111     
 112     OptionsInfo["Overwrite"] = Options["--overwrite"]
 113 
 114 def RetrieveOptions():
 115     """Retrieve command line arguments and options."""
 116     
 117     # Get options...
 118     global Options
 119     Options = docopt(_docoptUsage_)
 120     
 121     # Set current working directory to the specified directory...
 122     WorkingDir = Options["--workingdir"]
 123     if WorkingDir:
 124         os.chdir(WorkingDir)
 125     
 126     # Handle examples option...
 127     if "--examples" in Options and Options["--examples"]:
 128         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 129         sys.exit(0)
 130 
 131 def ValidateOptions():
 132     """Validate option values."""
 133     
 134     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 135     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv mol2 pdb")
 136     
 137     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd mol smi pdb")
 138     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 139     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 140     
 141 # Setup a usage string for docopt...
 142 _docoptUsage_ = """
 143 RDKitConvertFileFormat.py - Convert between molecular file formats
 144 
 145 Usage:
 146     RDKitConvertFileFormat.py [--infileParams <Name,Value,...>]
 147                               [ --outfileParams <Name,Value,...> ] [--overwrite]
 148                               [-w <dir>] -i <infile> -o <outfile>
 149     RDKitConvertFileFormat.py -h | --help | -e | --examples
 150 
 151 Description:
 152     Convert between molecular file formats.
 153 
 154     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
 155     .txt, .csv, .tsv), MOL2 (.mol2), PDB (.pdb)
 156 
 157     The supported output file formats are: SD (.sdf, .sd), SMILES (.smi), PDB (.pdb)
 158 
 159 Options:
 160     -e, --examples
 161         Print examples.
 162     -h, --help
 163         Print this help message.
 164     -i, --infile <infile>
 165         Input file name.
 166     --infileParams <Name,Value,...>  [default: auto]
 167         A comma delimited list of parameter name and value pairs for reading
 168         molecules from files. The supported parameter names for different file
 169         formats, along with their default values, are shown below:
 170             
 171             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 172             MOL2: removeHydrogens,yes,sanitize,yes
 173             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 174                 smilesTitleLine,auto,sanitize,yes
 175             PDB: removeHydrogens,yes,sanitize,yes
 176             
 177         Possible values for smilesDelimiter: space, comma or tab.
 178     -o, --outfile <outfile>
 179         Output file name.
 180     --outfileParams <Name,Value,...>  [default: auto]
 181         A comma delimited list of parameter name and value pairs for writing
 182         molecules to files. The supported parameter names for different file
 183         formats, along with their default values, are shown below:
 184             
 185             SD: compute2DCoords,auto,kekulize,yes,forceV3000,no
 186             SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 187                 smilesTitleLine,yes,smilesMolName,yes,smilesMolProps,no
 188             
 189         Default value for compute2DCoords: yes for SMILES input file; no for all other
 190         file types.
 191     --overwrite
 192         Overwrite existing files.
 193     -w, --workingdir <dir>
 194         Location of working directory which defaults to the current directory.
 195 
 196 Examples:
 197     To convert a SD file  into a isomeric SMILES file, type:
 198 
 199         % RDKitConvertFileFormat.py -i Sample.sdf -o SampleOut.smi
 200 
 201     To convert a SD file into a non isomeric SMILES file, type
 202 
 203         % RDKitConvertFileFormat.py --outfileParams "smilesIsomeric,no"
 204           -i Sample.sdf -o SampleOut.smi
 205 
 206     To convert a SMILES file into a SD file along with calculation of 2D
 207     coordinates, type:
 208 
 209         % RDKitConvertFileFormat.py -i Sample.smi -o SampleOut.sdf
 210 
 211     To convert a MDL MOL file into a PDB file, type:
 212 
 213         % RDKitConvertFileFormat.py -i Sample.mol -o SampleOut.pdb
 214 
 215     To convert a CSV SMILES file  with column headers, SMILES strings
 216     in column 1, and name in column 2 into a SD file containing 2D coordinates, type:
 217 
 218         % RDKitConvertFileFormat.py --infileParams "smilesDelimiter,comma,
 219           smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2" -i Sample.csv
 220           -o SampleOut.sdf
 221 
 222 Author:
 223     Manish Sud(msud@san.rr.com)
 224 
 225 See also:
 226     RDKitDrawMolecules.py, RDKitRemoveDuplicateMolecules.py, RDKitSearchFunctionalGroups.py,
 227     RDKitSearchSMARTS.py
 228 
 229 Copyright:
 230     Copyright (C) 2024 Manish Sud. All rights reserved.
 231 
 232     The functionality available in this script is implemented using RDKit, an
 233     open source toolkit for cheminformatics developed by Greg Landrum.
 234 
 235     This file is part of MayaChemTools.
 236 
 237     MayaChemTools is free software; you can redistribute it and/or modify it under
 238     the terms of the GNU Lesser General Public License as published by the Free
 239     Software Foundation; either version 3 of the License, or (at your option) any
 240     later version.
 241 
 242 """
 243 
 244 if __name__ == "__main__":
 245     main()