1 #!/bin/env python 2 # 3 # File: RDKitConvertFileFormat.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 37 # RDKit imports... 38 try: 39 from rdkit import rdBase 40 from rdkit import Chem 41 except ImportError as ErrMsg: 42 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 43 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 44 sys.exit(1) 45 46 # MayaChemTools imports... 47 try: 48 from docopt import docopt 49 import MiscUtil 50 import RDKitUtil 51 except ImportError as ErrMsg: 52 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 53 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 54 sys.exit(1) 55 56 ScriptName = os.path.basename(sys.argv[0]) 57 Options = {} 58 OptionsInfo = {} 59 60 def main(): 61 """Start execution of the script.""" 62 63 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 64 65 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 66 67 # Retrieve command line arguments and options... 68 RetrieveOptions() 69 70 # Process and validate command line arguments and options... 71 ProcessOptions() 72 73 # Perform actions required by the script... 74 ConvertFileFormat() 75 76 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 77 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 78 79 def ConvertFileFormat(): 80 """Convert between file formats.""" 81 82 Infile = OptionsInfo["Infile"] 83 Outfile = OptionsInfo["Outfile"] 84 85 # Read molecules... 86 MiscUtil.PrintInfo("\nReading file %s..." % Infile) 87 Mols = RDKitUtil.ReadMolecules(Infile, **OptionsInfo["InfileParams"]) 88 89 # Write molecules... 90 MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile) 91 MolCount, ProcessedMolCount = RDKitUtil.WriteMolecules(Outfile, Mols, **OptionsInfo["OutfileParams"]) 92 93 MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) 94 MiscUtil.PrintInfo("Number of molecules processed: %d" % ProcessedMolCount) 95 MiscUtil.PrintInfo("Number of molecules ignored: %d" % (MolCount - ProcessedMolCount)) 96 97 def ProcessOptions(): 98 """Process and validate command line arguments and options.""" 99 100 MiscUtil.PrintInfo("Processing options...") 101 102 # Validate options... 103 ValidateOptions() 104 105 # Process and setup options for RDKit functions... 106 OptionsInfo["Infile"] = Options["--infile"] 107 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) 108 109 OptionsInfo["Outfile"] = Options["--outfile"] 110 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) 111 112 OptionsInfo["Overwrite"] = Options["--overwrite"] 113 114 def RetrieveOptions(): 115 """Retrieve command line arguments and options.""" 116 117 # Get options... 118 global Options 119 Options = docopt(_docoptUsage_) 120 121 # Set current working directory to the specified directory... 122 WorkingDir = Options["--workingdir"] 123 if WorkingDir: 124 os.chdir(WorkingDir) 125 126 # Handle examples option... 127 if "--examples" in Options and Options["--examples"]: 128 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 129 sys.exit(0) 130 131 def ValidateOptions(): 132 """Validate option values.""" 133 134 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 135 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv mol2 pdb") 136 137 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd mol smi pdb") 138 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 139 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 140 141 # Setup a usage string for docopt... 142 _docoptUsage_ = """ 143 RDKitConvertFileFormat.py - Convert between molecular file formats 144 145 Usage: 146 RDKitConvertFileFormat.py [--infileParams <Name,Value,...>] 147 [ --outfileParams <Name,Value,...> ] [--overwrite] 148 [-w <dir>] -i <infile> -o <outfile> 149 RDKitConvertFileFormat.py -h | --help | -e | --examples 150 151 Description: 152 Convert between molecular file formats. 153 154 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 155 .txt, .csv, .tsv), MOL2 (.mol2), PDB (.pdb) 156 157 The supported output file formats are: SD (.sdf, .sd), SMILES (.smi), PDB (.pdb) 158 159 Options: 160 -e, --examples 161 Print examples. 162 -h, --help 163 Print this help message. 164 -i, --infile <infile> 165 Input file name. 166 --infileParams <Name,Value,...> [default: auto] 167 A comma delimited list of parameter name and value pairs for reading 168 molecules from files. The supported parameter names for different file 169 formats, along with their default values, are shown below: 170 171 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 172 MOL2: removeHydrogens,yes,sanitize,yes 173 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 174 smilesTitleLine,auto,sanitize,yes 175 PDB: removeHydrogens,yes,sanitize,yes 176 177 Possible values for smilesDelimiter: space, comma or tab. 178 -o, --outfile <outfile> 179 Output file name. 180 --outfileParams <Name,Value,...> [default: auto] 181 A comma delimited list of parameter name and value pairs for writing 182 molecules to files. The supported parameter names for different file 183 formats, along with their default values, are shown below: 184 185 SD: compute2DCoords,auto,kekulize,yes,forceV3000,no 186 SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes, 187 smilesTitleLine,yes,smilesMolName,yes,smilesMolProps,no 188 189 Default value for compute2DCoords: yes for SMILES input file; no for all other 190 file types. 191 --overwrite 192 Overwrite existing files. 193 -w, --workingdir <dir> 194 Location of working directory which defaults to the current directory. 195 196 Examples: 197 To convert a SD file into a isomeric SMILES file, type: 198 199 % RDKitConvertFileFormat.py -i Sample.sdf -o SampleOut.smi 200 201 To convert a SD file into a non isomeric SMILES file, type 202 203 % RDKitConvertFileFormat.py --outfileParams "smilesIsomeric,no" 204 -i Sample.sdf -o SampleOut.smi 205 206 To convert a SMILES file into a SD file along with calculation of 2D 207 coordinates, type: 208 209 % RDKitConvertFileFormat.py -i Sample.smi -o SampleOut.sdf 210 211 To convert a MDL MOL file into a PDB file, type: 212 213 % RDKitConvertFileFormat.py -i Sample.mol -o SampleOut.pdb 214 215 To convert a CSV SMILES file with column headers, SMILES strings 216 in column 1, and name in column 2 into a SD file containing 2D coordinates, type: 217 218 % RDKitConvertFileFormat.py --infileParams "smilesDelimiter,comma, 219 smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2" -i Sample.csv 220 -o SampleOut.sdf 221 222 Author: 223 Manish Sud(msud@san.rr.com) 224 225 See also: 226 RDKitDrawMolecules.py, RDKitRemoveDuplicateMolecules.py, RDKitSearchFunctionalGroups.py, 227 RDKitSearchSMARTS.py 228 229 Copyright: 230 Copyright (C) 2024 Manish Sud. All rights reserved. 231 232 The functionality available in this script is implemented using RDKit, an 233 open source toolkit for cheminformatics developed by Greg Landrum. 234 235 This file is part of MayaChemTools. 236 237 MayaChemTools is free software; you can redistribute it and/or modify it under 238 the terms of the GNU Lesser General Public License as published by the Free 239 Software Foundation; either version 3 of the License, or (at your option) any 240 later version. 241 242 """ 243 244 if __name__ == "__main__": 245 main()