MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLSplitChainsAndLigands.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 # Add local python path to the global path and import standard library modules...
  33 import os
  34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  35 import time
  36 import re
  37 
  38 # PyMOL imports...
  39 try:
  40     import pymol
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(['pymol', '-ckq'])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 try:
  52     from docopt import docopt
  53     import MiscUtil
  54     import PyMOLUtil
  55 except ImportError as ErrMsg:
  56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  58     sys.exit(1)
  59 
  60 ScriptName = os.path.basename(sys.argv[0])
  61 Options = {}
  62 OptionsInfo = {}
  63 
  64 def main():
  65     """Start execution of the script."""
  66     
  67     MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76 
  77     # Perform actions required by the script...
  78     SplitChainsAndLigands()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def SplitChainsAndLigands():
  84     """Split input file into output files corresponding to chains and ligands."""
  85 
  86     MiscUtil.PrintInfo("\nGenerating output files...")
  87 
  88     # Load macromolecule from input file...
  89     MolName = OptionsInfo["InfileRoot"]
  90     pymol.cmd.load(OptionsInfo["Infile"], MolName)
  91     
  92     for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]:
  93         ChainFile = OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"][ChainID]
  94         WriteChainFile(MolName, ChainID, ChainFile)
  95         
  96         for LigandID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandIDs"][ChainID]:
  97             LigandFile = OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ChainID][LigandID]
  98             WriteLigandFile(MolName, ChainID, LigandID, LigandFile)
  99     
 100     # Delete macromolecule...
 101     pymol.cmd.delete(MolName)
 102 
 103 def WriteChainFile(MolName, ChainID, ChainFile):
 104     """Write chain file."""
 105 
 106     MiscUtil.PrintInfo("\nGenerating output file %s..." % ChainFile)
 107     
 108     ChainName = "%s_Chain%s" % (MolName, ChainID)
 109 
 110     ChainSelection = "%s and (chain %s)" % (MolName, ChainID)
 111     if not OptionsInfo["ChainsMode"]:
 112         ChainSelection += " and (not organic)"
 113     
 114     if not OptionsInfo["KeepSolvents"]:
 115         ChainSelection += " and (not solvent)"
 116         
 117     if not OptionsInfo["KeepInorganics"]:
 118         ChainSelection += " and (not inorganic)"
 119 
 120     ChainSelection = "(%s)" % ChainSelection
 121     MiscUtil.PrintInfo("Chain selection: %s" % ChainSelection)
 122     
 123     pymol.cmd.create(ChainName, ChainSelection)
 124     pymol.cmd.save(ChainFile, ChainName)
 125     pymol.cmd.delete(ChainName)
 126     
 127     if not os.path.exists(ChainFile):
 128         MiscUtil.PrintWarning("Failed to generate Chain file, %s..." % (ChainFile))
 129 
 130 def WriteLigandFile(MolName, ChainID, LigandID, LigandFile):
 131     """Write ligand file."""
 132 
 133     MiscUtil.PrintInfo("\nGenerating output file %s..." % LigandFile)
 134     
 135     LigandName = "%s_Chain%s_%s" % (MolName, ChainID, LigandID)
 136     LigandSelection = "(%s and (chain %s) and organic and (resn %s))" % (MolName, ChainID, LigandID)
 137     MiscUtil.PrintInfo("Ligand selection: %s" % LigandSelection)
 138 
 139     pymol.cmd.create(LigandName, LigandSelection)
 140     pymol.cmd.save(LigandFile, LigandName)
 141     pymol.cmd.delete(LigandName)
 142     
 143     if not os.path.exists(LigandFile):
 144         MiscUtil.PrintWarning("Failed to generate ligand file, %s..." % (LigandFile))
 145     
 146 def ProcessChainAndLigandIDs():
 147     """Process chain and ligand IDs."""
 148     
 149     MolName = OptionsInfo["InfileRoot"]
 150     ChainsAndLigandsInfo = PyMOLUtil.GetChainsAndLigandsInfo(OptionsInfo["Infile"], MolName)
 151     OptionsInfo["ChainsAndLigandsInfo"] = ChainsAndLigandsInfo
 152     
 153     MiscUtil.PrintInfo("\nProcessing specified chain and ligand IDs for input file %s..." % OptionsInfo["Infile"])
 154     
 155     SpecifiedChainsAndLigandsInfo = PyMOLUtil.ProcessChainsAndLigandsOptionsInfo(ChainsAndLigandsInfo, "-c, --chainIDs", OptionsInfo["ChainIDs"], "-l, --ligandIDs", OptionsInfo["LigandIDs"])
 156     OptionsInfo["SpecifiedChainsAndLigandsInfo"] = SpecifiedChainsAndLigandsInfo
 157     
 158     CheckPresenceOfValidLigandIDs(ChainsAndLigandsInfo, SpecifiedChainsAndLigandsInfo)
 159     
 160 def CheckPresenceOfValidLigandIDs(ChainsAndLigandsInfo, SpecifiedChainsAndLigandsInfo):
 161     """Check presence of valid ligand IDs."""
 162 
 163     MiscUtil.PrintInfo("\nSpecified chain IDs: %s" % (", ".join(SpecifiedChainsAndLigandsInfo["ChainIDs"])))
 164     
 165     for ChainID in SpecifiedChainsAndLigandsInfo["ChainIDs"]:
 166         if len (SpecifiedChainsAndLigandsInfo["LigandIDs"][ChainID]):
 167             MiscUtil.PrintInfo("Chain ID: %s; Specified LigandIDs: %s" % (ChainID, ", ".join(SpecifiedChainsAndLigandsInfo["LigandIDs"][ChainID])))
 168         else:
 169             MiscUtil.PrintInfo("Chain IDs: %s; Specified LigandIDs: None" % (ChainID))
 170             MiscUtil.PrintWarning("No valid ligand IDs found for chain ID, %s." % (ChainID))
 171 
 172 def SetupChainAndLigandOutfiles():
 173     """Setup output file names for chains and ligands."""
 174 
 175     OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"] = {}
 176     OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"] = {}
 177 
 178     InfileRoot = OptionsInfo["InfileRoot"]
 179     LigandFileExt = OptionsInfo["LigandFileExt"]
 180     
 181     for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]:
 182         ChainOutfileRoot = "%s_Chain%s" % (InfileRoot, ChainID)
 183         ChainOutfile = "%s.pdb" % (ChainOutfileRoot)
 184         OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainOutfiles"][ChainID] = ChainOutfile
 185         if os.path.exists(ChainOutfile):
 186             if not OptionsInfo["Overwrite"]:
 187                 MiscUtil.PrintError("The chain output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (ChainOutfile))
 188         
 189         OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ChainID] = {}
 190         for LigandID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandIDs"][ChainID]:
 191             LigandOutfile = "%s_%s.%s" % (ChainOutfileRoot, LigandID, LigandFileExt)
 192             OptionsInfo["SpecifiedChainsAndLigandsInfo"]["LigandOutfiles"][ChainID][LigandID] = LigandOutfile
 193             if os.path.exists(LigandOutfile):
 194                 if not OptionsInfo["Overwrite"]:
 195                     MiscUtil.PrintError("The ligand output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (LigandOutfile))
 196     
 197 def ProcessOptions():
 198     """Process and validate command line arguments and options."""
 199     
 200     MiscUtil.PrintInfo("Processing options...")
 201     
 202     # Validate options...
 203     ValidateOptions()
 204 
 205     OptionsInfo["Mode"] = Options["--mode"]
 206     OptionsInfo["ChainsMode"] = False
 207     if re.match("^Chains$", OptionsInfo["Mode"], re.I):
 208         OptionsInfo["ChainsMode"] = True
 209     
 210     OptionsInfo["LigandFileFormat"] = Options["--ligandFileFormat"]
 211     LigandFileExt = "mol"
 212     if re.match("^PDB$", OptionsInfo["LigandFileFormat"], re.I):
 213         LigandFileExt = "pdb"
 214     elif re.match("^(SD|SDF)$", OptionsInfo["LigandFileFormat"], re.I):
 215         LigandFileExt = "sdf"
 216     elif re.match("^MOL$", OptionsInfo["LigandFileFormat"], re.I):
 217         LigandFileExt = "mol"
 218     OptionsInfo["LigandFileExt"] = LigandFileExt
 219     
 220     OptionsInfo["KeepInorganics"] = True if re.match("^Yes$", Options["--keepInorganics"], re.I) else False
 221     OptionsInfo["KeepSolvents"] = True if re.match("^Yes$", Options["--keepSolvents"], re.I) else False
 222     
 223     OptionsInfo["Infile"] = Options["--infile"]
 224     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
 225     OptionsInfo["InfileRoot"] = FileName
 226 
 227     OptionsInfo["Overwrite"] = Options["--overwrite"]
 228 
 229     OptionsInfo["ChainIDs"] = Options["--chainIDs"]
 230     OptionsInfo["LigandIDs"] = Options["--ligandIDs"]
 231     ProcessChainAndLigandIDs()
 232 
 233     SetupChainAndLigandOutfiles()
 234 
 235 def RetrieveOptions(): 
 236     """Retrieve command line arguments and options."""
 237     
 238     # Get options...
 239     global Options
 240     Options = docopt(_docoptUsage_)
 241 
 242     # Set current working directory to the specified directory...
 243     WorkingDir = Options["--workingdir"]
 244     if WorkingDir:
 245         os.chdir(WorkingDir)
 246     
 247     # Handle examples option...
 248     if "--examples" in Options and Options["--examples"]:
 249         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 250         sys.exit(0)
 251 
 252 def ValidateOptions():
 253     """Validate option value.s"""
 254     
 255     MiscUtil.ValidateOptionTextValue("--ligandFileFormat", Options["--ligandFileFormat"], "PDB SDF SD MDLMOL")
 256     
 257     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "Chains ChainsLigands")
 258     
 259     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 260     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb cif")
 261 
 262     MiscUtil.ValidateOptionTextValue("--keepInorganics", Options["--keepInorganics"], "yes no")
 263     MiscUtil.ValidateOptionTextValue("--keepSolvents", Options["--keepSolvents"], "yes no")
 264 
 265 # Setup a usage string for docopt...
 266 _docoptUsage_ = """
 267 PyMOLSplitChainsAndLigands.py - Split macromolecule into chains and ligands
 268 
 269 Usage:
 270     PyMOLSplitChainsAndLigands.py [--chainIDs <First, All or ID1,ID2...>]
 271                                   [--ligandIDs <Largest, All or ID1,ID2...>] [--ligandFileFormat <PDB, SDF, MDLMOL>]
 272                                   [--mode <Chains or ChainsLigands>] [--keepInorganics <yes or no>]
 273                                   [--keepSolvents <yes or no>] [--overwrite] [-w <dir>] -i <infile>
 274     PyMOLSplitChainsAndLigands.py -h | --help | -e | --examples
 275 
 276 Description:
 277     Spit a macromolecule into chains and ligands, and write them out to different
 278     files. The solvents and inorganic molecules may be optionally removed from
 279     chains. You may also skip the generation of ligand files and write out a chain
 280     along with associated ligands into the same chain file.
 281  
 282     The supported input file format is:  PDB (.pdb), CIF (.cif)
 283  
 284     The supported output file formats are: Chains - PDB (.pdb); Ligands: PDB (.pdb),
 285     SD file (.sdf, .sd), MDL MOL (.mol)
 286 
 287     The names of the output files are automatically generated from the name of
 288     input file as shown below:
 289     
 290         Chains: <InfileRoot>_<ChainID>.pdb
 291         Ligands: <InfileRoot>_<ChainID>.{pdb,sdf,sd,mol}
 292     
 293 Options:
 294     -c, --chainIDs <First, All or ID1,ID2...>  [default: All]
 295         List of chain IDs for splitting input file. Possible values: First, All,
 296         or a comma delimited list of chain IDs. The default is to use
 297         all chain IDs in input file.
 298     -e, --examples
 299         Print examples.
 300     -h, --help
 301         Print this help message.
 302     -i, --infile <infile>
 303         Input file name.
 304     -l, --ligandIDs <Largest, All or ID1,ID2...>  [default: Largest]
 305         List of ligand IDs present in chains for splitting input file. Possible
 306         values: Largest, All, or a comma delimited list of ligand IDs. The default
 307         is to use the largest ligand present in all or specified chains in input file.
 308         This option is ignored during 'Chains' value of '--mode' option.
 309         
 310         Ligands are identified using organic selection operator available in PyMOL.
 311         It'll also  identify buffer molecules as ligands. The largest ligand contains
 312         the highest number of heavy atoms.
 313     --ligandFileFormat <PDB, SDF, MDLMOL>  [default: SDF]
 314         Ligand file format.
 315     -m, --mode <Chains or ChainsLigands>  [default: ChainsLigands]
 316         Split input file into chains or chains and ligands. The ligands are kept
 317         together with chains in the output files for 'Chains' mode. Separate files
 318         are generated for ligands during 'ChainsAndLigands' mode.
 319     --keepInorganics <yes or no>  [default: yes]
 320         Keep inorganic molecules during splitting of input file and write them to
 321         output files. The inorganic molecules are identified using inorganic selection
 322         operator available in PyMOL.
 323     --keepSolvents <yes or no>  [default: yes]
 324         Keep solvent molecules during splitting of input file and write them to
 325         output files. The solvent molecules are identified using solvent selection
 326         operator available in PyMOL.
 327     --overwrite
 328         Overwrite existing files.
 329     -w, --workingdir <dir>
 330         Location of working directory which defaults to the current directory.
 331 
 332 Examples:
 333     To split a macromolecule into the first chain and the largest ligand in the
 334     first chain along with solvent and inorganic molecules, and write chain PDB
 335     and ligand SDF files, type:
 336 
 337         % PyMOLSplitChainsAndLigands.py -i Sample3.pdb
 338 
 339     To split a macromolecule into all chains and all ligands across all chains
 340     along with solvent and inorganic molecules, and write out corresponding
 341     chain and ligand files, type:
 342 
 343         % PyMOLSplitChainsAndLigands.py -i Sample3.pdb -c All -l All
 344 
 345     To split a macromolecule into all chains along with any associated ligands
 346     without any solvent and inorganic molecules, and write corresponding
 347     PDB files for chains and skipping generation of any ligand files, type:
 348 
 349         % PyMOLSplitChainsAndLigands.py -c all -m Chains --keepSolvents no
 350           --keepInorganics no -i Sample3.pdb
 351 
 352     To split a macromolecule into a specific chain and a specific ligand in the
 353     chain along with solvent and inorganic molecules, and write chain PDB
 354     and ligand MDLMOL files, type:
 355 
 356         % PyMOLSplitChainsAndLigands.py -c E -l ADP --ligandFileFormat MDLMOL
 357           -i Sample3.pdb 
 358 
 359 Author:
 360     Manish Sud(msud@san.rr.com)
 361 
 362 See also:
 363     PyMOLAlignChains.py, PyMOLVisualizeMacromolecules.py
 364 
 365 Copyright:
 366     Copyright (C) 2024 Manish Sud. All rights reserved.
 367 
 368     The functionality available in this script is implemented using PyMOL, a
 369     molecular visualization system on an open source foundation originally
 370     developed by Warren DeLano.
 371 
 372     This file is part of MayaChemTools.
 373 
 374     MayaChemTools is free software; you can redistribute it and/or modify it under
 375     the terms of the GNU Lesser General Public License as published by the Free
 376     Software Foundation; either version 3 of the License, or (at your option) any
 377     later version.
 378 
 379 """
 380 
 381 if __name__ == "__main__":
 382     main()