MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLMutateAminoAcids.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2021 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 # Add local python path to the global path and import standard library modules...
  33 import os
  34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  35 import time
  36 import re
  37 
  38 # PyMOL imports...
  39 try:
  40     import pymol
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(['pymol', '-ckq'])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 try:
  52     from docopt import docopt
  53     import MiscUtil
  54     import PyMOLUtil
  55 except ImportError as ErrMsg:
  56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  58     sys.exit(1)
  59 
  60 ScriptName = os.path.basename(sys.argv[0])
  61 Options = {}
  62 OptionsInfo = {}
  63 
  64 def main():
  65     """Start execution of the script"""
  66     
  67     MiscUtil.PrintInfo("\n%s (PyMOL v%s; %s) Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76 
  77     # Perform actions required by the script...
  78     PerformMutagenesis()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def PerformMutagenesis():
  84     """Mutate specified residues across chains and generate an output file."""
  85 
  86     MiscUtil.PrintInfo("\nApplying mutations...")
  87 
  88     # Load macromolecule from input file...
  89     MolName = OptionsInfo["InfileRoot"]
  90     LoadMolecule(OptionsInfo["Infile"], MolName)
  91     
  92     # Apply mutations...
  93     for Mutation, ChainID, ResName, ResNum, NewResName in OptionsInfo["SpecifiedMutationsInfo"] :
  94         ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName)
  95 
  96     #  Generate output file...
  97     Outfile = OptionsInfo["Outfile"]
  98     MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile)
  99     pymol.cmd.save(Outfile, MolName)
 100     
 101     # Delete macromolecule...
 102     DeleteMolecule(MolName)
 103 
 104 def ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName):
 105     """Apply mutatation. """
 106 
 107     MiscUtil.PrintInfo("\nApplying mutation %s" % Mutation)
 108     
 109     # Setup wizard for amino acid mutagenesis...
 110     pymol.cmd.wizard("mutagenesis")
 111     
 112     pymol.cmd.refresh_wizard()
 113 
 114     # Setup residue to be mutated...
 115     ResSelection = "/%s//%s/%s" % (MolName, ChainID, ResNum)
 116     pymol.cmd.get_wizard().do_select(ResSelection)
 117 
 118     # Setup new mutated residue...
 119     pymol.cmd.get_wizard().set_mode("%s" % NewResName)
 120     
 121     # Mutate...
 122     pymol.cmd.get_wizard().apply()
 123     
 124     # Quit wizard...
 125     pymol.cmd.set_wizard()
 126     
 127 def RetrieveChainsIDs():
 128     """Retrieve chain IDs. """
 129 
 130     MolName = OptionsInfo["InfileRoot"]
 131     Infile = OptionsInfo["Infile"]
 132     
 133     MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile)
 134 
 135     LoadMolecule(Infile, MolName)
 136 
 137     ChainIDs = PyMOLUtil.GetChains(MolName)
 138     
 139     DeleteMolecule(MolName)
 140 
 141     if ChainIDs is None:
 142         ChainIDs = []
 143 
 144     # Print out chain and ligand IDs...
 145     ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None"
 146     MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo)
 147                          
 148     OptionsInfo["ChainIDs"] = ChainIDs
 149     
 150 def ProcessSpecifiedMutations():
 151     """Process specified mutations"""
 152     
 153     MiscUtil.PrintInfo("\nProcessing specified mutations...")
 154 
 155     SpecifiedMutationsInfo = []
 156 
 157     Mutations = re.sub(" ", "", OptionsInfo["Mutations"])
 158     MutationsWords = Mutations.split(",")
 159     if not len(MutationsWords):
 160         MiscUtil.PrintError("The number of comma delimited mutations specified using \"-m, --mutations\" option, \"%s\",  must be > 0." % (OptionsInfo["Mutations"]))
 161 
 162     # Load macromolecule from input file...
 163     MolName = OptionsInfo["InfileRoot"]
 164     LoadMolecule(OptionsInfo["Infile"], MolName)
 165     
 166     FirstMutation = True
 167     CurrentChainID = None
 168     CanonicalMutationMap = {}
 169     MutationsCount, ValidMutationsCount = [0] * 2
 170     
 171     for Mutation in MutationsWords:
 172         MutationsCount += 1
 173         if not len(Mutation):
 174             MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option is empty.\nIgnoring mutation..." % (Mutation))
 175             continue
 176 
 177         CanonicalMutation = Mutation.lower()
 178         if CanonicalMutation in CanonicalMutationMap:
 179             MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option already exist.\nIgnoring mutation..." % (Mutation))
 180             continue
 181         CanonicalMutationMap[CanonicalMutation] = Mutation
 182             
 183         # Match with a chain ID...
 184         MatchedResults = re.match(r"^([a-z0-9]+):([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I)
 185         if not MatchedResults:
 186             # Match without a chain ID...
 187             MatchedResults = re.match(r"^([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I)
 188             
 189         if not MatchedResults:
 190             MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation..." % (Mutation))
 191             continue
 192 
 193         NumOfMatchedGroups =  len(MatchedResults.groups())
 194         if NumOfMatchedGroups == 3:
 195             ResName, ResNum, NewResName = MatchedResults.groups()
 196         elif NumOfMatchedGroups == 4:
 197             CurrentChainID, ResName, ResNum, NewResName = MatchedResults.groups()
 198         else:
 199             MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation..." % (Mutation))
 200             continue
 201         
 202         if FirstMutation:
 203             FirstMutation = False
 204             if  CurrentChainID is None:
 205                 MiscUtil.PrintError("The first mutation, \"%s\", specified using \"-m, --mutations\" option must be colon delimited and contain only two values, the first value corresponding to chain ID" % (Mutation))
 206         
 207         ResName = ResName.upper()
 208         NewResName = NewResName.upper()
 209         
 210         # Is ResNum and ResName present in input file?
 211         SelectionCmd = "%s and chain %s and resi %s and resn %s" % (MolName, CurrentChainID, ResNum, ResName)
 212         ResiduesInfo = PyMOLUtil.GetSelectionResiduesInfo(SelectionCmd)
 213         if (ResiduesInfo is None) or (not len(ResiduesInfo["ResNames"])):
 214             MiscUtil.PrintWarning("The residue name, %s, and residue number, %s, in mutation, \"%s\", specified using \"-m, --mutations\" option appears to be missing in input file.\nIgnoring mutation..." % (ResName, ResNum, Mutation))
 215             continue
 216 
 217         ValidMutationsCount += 1
 218         
 219         # Track mutation information...
 220         SpecifiedMutationsInfo.append([Mutation, CurrentChainID, ResName, ResNum, NewResName])
 221         
 222     # Delete macromolecule...
 223     DeleteMolecule(MolName)
 224 
 225     MiscUtil.PrintInfo("\nTotal number of mutations: %d" % MutationsCount)
 226     MiscUtil.PrintInfo("Number of valid mutations: %d" % ValidMutationsCount)
 227     MiscUtil.PrintInfo("Number of ignored mutations: %d" % (MutationsCount - ValidMutationsCount))
 228     
 229     if not len(SpecifiedMutationsInfo):
 230         MiscUtil.PrintError("No valid mutations, \"%s\" specified using \"-m, --mutations\" option." % (OptionsInfo["Mutations"]))
 231         
 232     OptionsInfo["SpecifiedMutationsInfo"] = SpecifiedMutationsInfo
 233     
 234 def LoadMolecule(Infile, MolName):
 235     """Load input file. """
 236     
 237     pymol.cmd.reinitialize()
 238     pymol.cmd.load(Infile, MolName)
 239     
 240 def DeleteMolecule(MolName):
 241     """Delete molecule."""
 242     
 243     pymol.cmd.delete(MolName)
 244     
 245 def ProcessOptions():
 246     """Process and validate command line arguments and options"""
 247     
 248     MiscUtil.PrintInfo("Processing options...")
 249     
 250     # Validate options...
 251     ValidateOptions()
 252 
 253     OptionsInfo["Infile"] = Options["--infile"]
 254     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
 255     OptionsInfo["InfileRoot"] = FileName
 256 
 257     OptionsInfo["Overwrite"] = Options["--overwrite"]
 258     OptionsInfo["Outfile"] = Options["--outfile"]
 259 
 260     RetrieveChainsIDs()
 261     
 262     Mutations = Options["--mutations"]
 263     if re.match("^None$", Mutations, re.I):
 264         MiscUtil.PrintError("No mutations specified using \"-m, --mutations\" option.")
 265     
 266     OptionsInfo["Mutations"] = Options["--mutations"]
 267     ProcessSpecifiedMutations()
 268 
 269 def RetrieveOptions(): 
 270     """Retrieve command line arguments and options"""
 271     
 272     # Get options...
 273     global Options
 274     Options = docopt(_docoptUsage_)
 275 
 276     # Set current working directory to the specified directory...
 277     WorkingDir = Options["--workingdir"]
 278     if WorkingDir:
 279         os.chdir(WorkingDir)
 280     
 281     # Handle examples option...
 282     if "--examples" in Options and Options["--examples"]:
 283         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 284         sys.exit(0)
 285 
 286 def ValidateOptions():
 287     """Validate option values"""
 288     
 289     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 290     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb")
 291     
 292     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 293     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb")
 294     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 295 
 296 # Setup a usage string for docopt...
 297 _docoptUsage_ = """
 298 PyMOLMutateAminoAcids.py - Mutate amino acids
 299 
 300 Usage:
 301     PyMOLMutateAminoAcids.py [--mutations <Spec1,Spec2,...>]
 302                             [--overwrite] [-w <dir>] -i <infile> -o <outfile>
 303     PyMOLMutateAminoAcids.py -h | --help | -e | --examples
 304 
 305 Description:
 306     Mutate amino acids in macromolecules. The mutations are performed using
 307     protein mutagenesis wizard available in PyMOL.
 308  
 309     The supported input and output file format is: PDB (.pdb)
 310  
 311 Options:
 312     -m, --mutations <Spec1,Spec2,...>  [default: None]
 313         Comma delimited list of specifications for mutating amino acid residues
 314         in proteins.
 315         
 316         The format of mutation specification is as follows:
 317         
 318             <ChainID>:<ResName><ResNum><ResName>,...
 319         
 320         A chain ID in the first specification of a mutation is required. It may be
 321         skipped in subsequent specifications. The most recent chain ID is used
 322         for the missing chain ID. The first reside name corresponds to the residue
 323         to be mutated. The second residue name represents the new residue.
 324         The residue number corresponds to the first residue name and must be
 325         present in the current chain.
 326         
 327         Examples:
 328         
 329             E:LEU49CYS, E:SER53TYR
 330             E:LEU49CYS, SER53TYR
 331             E:LEU49CYS, SER53TYR, I:TYR7SER, ILE11VAL
 332               
 333         The residue names must be valid amino acid names. No validation is
 334         performed before mutating residues via protein mutagenesis wizard
 335         available in PyMOL.
 336     -e, --examples
 337         Print examples.
 338     -h, --help
 339         Print this help message.
 340     -i, --infile <infile>
 341         Input file name.
 342     -o, --outfile <outfile>
 343         Output file name.
 344     --overwrite
 345         Overwrite existing files.
 346     -w, --workingdir <dir>
 347         Location of working directory which defaults to the current directory.
 348 
 349 Examples:
 350     To mutate a single residue in a specific chain and write a PDB file, type:
 351 
 352         % PyMOLMutateAminoAcids.py -m "I:TYR7SER" -i Sample3.pdb
 353           -o Sample3Out.pdb
 354 
 355     To mutate multiple residues in a single chain and write a PDB file, type:
 356 
 357         % PyMOLMutateAminoAcids.py -m "I:TYR7SER, ILE11VAL" -i Sample3.pdb
 358           -o Sample3Out.pdb
 359 
 360     To mutate multiple residues across multiple chains and write a PDB file, type:
 361 
 362         % PyMOLMutateAminoAcids.py -m "E:LEU49CYS,SER53TYR,I:TYR7SER,ILE11VAL"
 363           -i Sample3.pdb -o Sample3Out.pdb
 364 
 365 Author:
 366     Manish Sud(msud@san.rr.com)
 367 
 368 See also:
 369     DownloadPDBFiles.pl, PyMOLMutateNucleicAcids.py,
 370     PyMOLVisualizeMacromolecules.py
 371 
 372 Copyright:
 373     Copyright (C) 2021 Manish Sud. All rights reserved.
 374 
 375     The functionality available in this script is implemented using PyMOL, a
 376     molecular visualization system on an open source foundation originally
 377     developed by Warren DeLano.
 378 
 379     This file is part of MayaChemTools.
 380 
 381     MayaChemTools is free software; you can redistribute it and/or modify it under
 382     the terms of the GNU Lesser General Public License as published by the Free
 383     Software Foundation; either version 3 of the License, or (at your option) any
 384     later version.
 385 
 386 """
 387 
 388 if __name__ == "__main__":
 389     main()