1 #!/bin/env python 2 # 3 # File: PyMOLSplitChainsAndLigands.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 38 # PyMOL imports... 39 try: 40 import pymol 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(['pymol', '-ckq']) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 try: 52 from docopt import docopt 53 import MiscUtil 54 import PyMOLUtil 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 58 sys.exit(1) 59 60 ScriptName = os.path.basename(sys.argv[0]) 61 Options = {} 62 OptionsInfo = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 SplitChainsAndLigands() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def SplitChainsAndLigands(): 84 """Split input file into output files corresponding to chains and ligands.""" 85 86 MiscUtil.PrintInfo("\nGenerating output files...") 87 88 # Load macromolecule from input file... 89 MolName = OptionsInfo["InfileRoot"] 90 pymol.cmd.load(OptionsInfo["Infile"], MolName) 91 92 for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]: 93 ChainFile = OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"][ChainID] 94 WriteChainFile(MolName, ChainID, ChainFile) 95 96 for LigandID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandIDs"][ChainID]: 97 LigandFile = OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ChainID][LigandID] 98 WriteLigandFile(MolName, ChainID, LigandID, LigandFile) 99 100 # Delete macromolecule... 101 pymol.cmd.delete(MolName) 102 103 def WriteChainFile(MolName, ChainID, ChainFile): 104 """Write chain file.""" 105 106 MiscUtil.PrintInfo("\nGenerating output file %s..." % ChainFile) 107 108 ChainName = "%s_Chain%s" % (MolName, ChainID) 109 110 ChainSelection = "%s and (chain %s)" % (MolName, ChainID) 111 if not OptionsInfo["ChainsMode"]: 112 ChainSelection += " and (not organic)" 113 114 if not OptionsInfo["KeepSolvents"]: 115 ChainSelection += " and (not solvent)" 116 117 if not OptionsInfo["KeepInorganics"]: 118 ChainSelection += " and (not inorganic)" 119 120 ChainSelection = "(%s)" % ChainSelection 121 MiscUtil.PrintInfo("Chain selection: %s" % ChainSelection) 122 123 pymol.cmd.create(ChainName, ChainSelection) 124 pymol.cmd.save(ChainFile, ChainName) 125 pymol.cmd.delete(ChainName) 126 127 if not os.path.exists(ChainFile): 128 MiscUtil.PrintWarning("Failed to generate Chain file, %s..." % (ChainFile)) 129 130 def WriteLigandFile(MolName, ChainID, LigandID, LigandFile): 131 """Write ligand file.""" 132 133 MiscUtil.PrintInfo("\nGenerating output file %s..." % LigandFile) 134 135 LigandName = "%s_Chain%s_%s" % (MolName, ChainID, LigandID) 136 LigandSelection = "(%s and (chain %s) and organic and (resn %s))" % (MolName, ChainID, LigandID) 137 MiscUtil.PrintInfo("Ligand selection: %s" % LigandSelection) 138 139 pymol.cmd.create(LigandName, LigandSelection) 140 pymol.cmd.save(LigandFile, LigandName) 141 pymol.cmd.delete(LigandName) 142 143 if not os.path.exists(LigandFile): 144 MiscUtil.PrintWarning("Failed to generate ligand file, %s..." % (LigandFile)) 145 146 def ProcessChainAndLigandIDs(): 147 """Process chain and ligand IDs.""" 148 149 MolName = OptionsInfo["InfileRoot"] 150 ChainsAndLigandsInfo = PyMOLUtil.GetChainsAndLigandsInfo(OptionsInfo["Infile"], MolName) 151 OptionsInfo["ChainsAndLigandsInfo"] = ChainsAndLigandsInfo 152 153 MiscUtil.PrintInfo("\nProcessing specified chain and ligand IDs for input file %s..." % OptionsInfo["Infile"]) 154 155 SpecifiedChainsAndLigandsInfo = PyMOLUtil.ProcessChainsAndLigandsOptionsInfo(ChainsAndLigandsInfo, "-c, --chainIDs", OptionsInfo["ChainIDs"], "-l, --ligandIDs", OptionsInfo["LigandIDs"]) 156 OptionsInfo["SpecifiedChainsAndLigandsInfo"] = SpecifiedChainsAndLigandsInfo 157 158 CheckPresenceOfValidLigandIDs(ChainsAndLigandsInfo, SpecifiedChainsAndLigandsInfo) 159 160 def CheckPresenceOfValidLigandIDs(ChainsAndLigandsInfo, SpecifiedChainsAndLigandsInfo): 161 """Check presence of valid ligand IDs.""" 162 163 MiscUtil.PrintInfo("\nSpecified chain IDs: %s" % (", ".join(SpecifiedChainsAndLigandsInfo["ChainIDs"]))) 164 165 for ChainID in SpecifiedChainsAndLigandsInfo["ChainIDs"]: 166 if len (SpecifiedChainsAndLigandsInfo["LigandIDs"][ChainID]): 167 MiscUtil.PrintInfo("Chain ID: %s; Specified LigandIDs: %s" % (ChainID, ", ".join(SpecifiedChainsAndLigandsInfo["LigandIDs"][ChainID]))) 168 else: 169 MiscUtil.PrintInfo("Chain IDs: %s; Specified LigandIDs: None" % (ChainID)) 170 MiscUtil.PrintWarning("No valid ligand IDs found for chain ID, %s." % (ChainID)) 171 172 def SetupChainAndLigandOutfiles(): 173 """Setup output file names for chains and ligands.""" 174 175 OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"] = {} 176 OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"] = {} 177 178 InfileRoot = OptionsInfo["InfileRoot"] 179 LigandFileExt = OptionsInfo["LigandFileExt"] 180 181 for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]: 182 ChainOutfileRoot = "%s_Chain%s" % (InfileRoot, ChainID) 183 ChainOutfile = "%s.pdb" % (ChainOutfileRoot) 184 OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"][ChainID] = ChainOutfile 185 if os.path.exists(ChainOutfile): 186 if not OptionsInfo["Overwrite"]: 187 MiscUtil.PrintError("The chain output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (ChainOutfile)) 188 189 OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ChainID] = {} 190 for LigandID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandIDs"][ChainID]: 191 LigandOutfile = "%s_%s.%s" % (ChainOutfileRoot, LigandID, LigandFileExt) 192 OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ChainID][LigandID] = LigandOutfile 193 if os.path.exists(LigandOutfile): 194 if not OptionsInfo["Overwrite"]: 195 MiscUtil.PrintError("The ligand output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (LigandOutfile)) 196 197 def ProcessOptions(): 198 """Process and validate command line arguments and options.""" 199 200 MiscUtil.PrintInfo("Processing options...") 201 202 # Validate options... 203 ValidateOptions() 204 205 OptionsInfo["Mode"] = Options["--mode"] 206 OptionsInfo["ChainsMode"] = False 207 if re.match("^Chains$", OptionsInfo["Mode"], re.I): 208 OptionsInfo["ChainsMode"] = True 209 210 OptionsInfo["LigandFileFormat"] = Options["--ligandFileFormat"] 211 LigandFileExt = "mol" 212 if re.match("^PDB$", OptionsInfo["LigandFileFormat"], re.I): 213 LigandFileExt = "pdb" 214 elif re.match("^(SD|SDF)$", OptionsInfo["LigandFileFormat"], re.I): 215 LigandFileExt = "sdf" 216 elif re.match("^MOL$", OptionsInfo["LigandFileFormat"], re.I): 217 LigandFileExt = "mol" 218 OptionsInfo["LigandFileExt"] = LigandFileExt 219 220 OptionsInfo["KeepInorganics"] = True if re.match("^Yes$", Options["--keepInorganics"], re.I) else False 221 OptionsInfo["KeepSolvents"] = True if re.match("^Yes$", Options["--keepSolvents"], re.I) else False 222 223 OptionsInfo["Infile"] = Options["--infile"] 224 FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"]) 225 OptionsInfo["InfileRoot"] = FileName 226 227 OptionsInfo["Overwrite"] = Options["--overwrite"] 228 229 OptionsInfo["ChainIDs"] = Options["--chainIDs"] 230 OptionsInfo["LigandIDs"] = Options["--ligandIDs"] 231 ProcessChainAndLigandIDs() 232 233 SetupChainAndLigandOutfiles() 234 235 def RetrieveOptions(): 236 """Retrieve command line arguments and options.""" 237 238 # Get options... 239 global Options 240 Options = docopt(_docoptUsage_) 241 242 # Set current working directory to the specified directory... 243 WorkingDir = Options["--workingdir"] 244 if WorkingDir: 245 os.chdir(WorkingDir) 246 247 # Handle examples option... 248 if "--examples" in Options and Options["--examples"]: 249 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 250 sys.exit(0) 251 252 def ValidateOptions(): 253 """Validate option value.s""" 254 255 MiscUtil.ValidateOptionTextValue("--ligandFileFormat", Options["--ligandFileFormat"], "PDB SDF SD MDLMOL") 256 257 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "Chains ChainsLigands") 258 259 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 260 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb cif") 261 262 MiscUtil.ValidateOptionTextValue("--keepInorganics", Options["--keepInorganics"], "yes no") 263 MiscUtil.ValidateOptionTextValue("--keepSolvents", Options["--keepSolvents"], "yes no") 264 265 # Setup a usage string for docopt... 266 _docoptUsage_ = """ 267 PyMOLSplitChainsAndLigands.py - Split macromolecule into chains and ligands 268 269 Usage: 270 PyMOLSplitChainsAndLigands.py [--chainIDs <First, All or ID1,ID2...>] 271 [--ligandIDs <Largest, All or ID1,ID2...>] [--ligandFileFormat <PDB, SDF, MDLMOL>] 272 [--mode <Chains or ChainsLigands>] [--keepInorganics <yes or no>] 273 [--keepSolvents <yes or no>] [--overwrite] [-w <dir>] -i <infile> 274 PyMOLSplitChainsAndLigands.py -h | --help | -e | --examples 275 276 Description: 277 Spit a macromolecule into chains and ligands, and write them out to different 278 files. The solvents and inorganic molecules may be optionally removed from 279 chains. You may also skip the generation of ligand files and write out a chain 280 along with associated ligands into the same chain file. 281 282 The supported input file format is: PDB (.pdb), CIF (.cif) 283 284 The supported output file formats are: Chains - PDB (.pdb); Ligands: PDB (.pdb), 285 SD file (.sdf, .sd), MDL MOL (.mol) 286 287 The names of the output files are automatically generated from the name of 288 input file as shown below: 289 290 Chains: <InfileRoot>_<ChainID>.pdb 291 Ligands: <InfileRoot>_<ChainID>.{pdb,sdf,sd,mol} 292 293 Options: 294 -c, --chainIDs <First, All or ID1,ID2...> [default: All] 295 List of chain IDs for splitting input file. Possible values: First, All, 296 or a comma delimited list of chain IDs. The default is to use 297 all chain IDs in input file. 298 -e, --examples 299 Print examples. 300 -h, --help 301 Print this help message. 302 -i, --infile <infile> 303 Input file name. 304 -l, --ligandIDs <Largest, All or ID1,ID2...> [default: Largest] 305 List of ligand IDs present in chains for splitting input file. Possible 306 values: Largest, All, or a comma delimited list of ligand IDs. The default 307 is to use the largest ligand present in all or specified chains in input file. 308 This option is ignored during 'Chains' value of '--mode' option. 309 310 Ligands are identified using organic selection operator available in PyMOL. 311 It'll also identify buffer molecules as ligands. The largest ligand contains 312 the highest number of heavy atoms. 313 --ligandFileFormat <PDB, SDF, MDLMOL> [default: SDF] 314 Ligand file format. 315 -m, --mode <Chains or ChainsLigands> [default: ChainsLigands] 316 Split input file into chains or chains and ligands. The ligands are kept 317 together with chains in the output files for 'Chains' mode. Separate files 318 are generated for ligands during 'ChainsAndLigands' mode. 319 --keepInorganics <yes or no> [default: yes] 320 Keep inorganic molecules during splitting of input file and write them to 321 output files. The inorganic molecules are identified using inorganic selection 322 operator available in PyMOL. 323 --keepSolvents <yes or no> [default: yes] 324 Keep solvent molecules during splitting of input file and write them to 325 output files. The solvent molecules are identified using solvent selection 326 operator available in PyMOL. 327 --overwrite 328 Overwrite existing files. 329 -w, --workingdir <dir> 330 Location of working directory which defaults to the current directory. 331 332 Examples: 333 To split a macromolecule into the first chain and the largest ligand in the 334 first chain along with solvent and inorganic molecules, and write chain PDB 335 and ligand SDF files, type: 336 337 % PyMOLSplitChainsAndLigands.py -i Sample3.pdb 338 339 To split a macromolecule into all chains and all ligands across all chains 340 along with solvent and inorganic molecules, and write out corresponding 341 chain and ligand files, type: 342 343 % PyMOLSplitChainsAndLigands.py -i Sample3.pdb -c All -l All 344 345 To split a macromolecule into all chains along with any associated ligands 346 without any solvent and inorganic molecules, and write corresponding 347 PDB files for chains and skipping generation of any ligand files, type: 348 349 % PyMOLSplitChainsAndLigands.py -c all -m Chains --keepSolvents no 350 --keepInorganics no -i Sample3.pdb 351 352 To split a macromolecule into a specific chain and a specific ligand in the 353 chain along with solvent and inorganic molecules, and write chain PDB 354 and ligand MDLMOL files, type: 355 356 % PyMOLSplitChainsAndLigands.py -c E -l ADP --ligandFileFormat MDLMOL 357 -i Sample3.pdb 358 359 Author: 360 Manish Sud(msud@san.rr.com) 361 362 See also: 363 PyMOLAlignChains.py, PyMOLVisualizeMacromolecules.py 364 365 Copyright: 366 Copyright (C) 2024 Manish Sud. All rights reserved. 367 368 The functionality available in this script is implemented using PyMOL, a 369 molecular visualization system on an open source foundation originally 370 developed by Warren DeLano. 371 372 This file is part of MayaChemTools. 373 374 MayaChemTools is free software; you can redistribute it and/or modify it under 375 the terms of the GNU Lesser General Public License as published by the Free 376 Software Foundation; either version 3 of the License, or (at your option) any 377 later version. 378 379 """ 380 381 if __name__ == "__main__": 382 main()