MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLMutateAminoAcids.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 # Add local python path to the global path and import standard library modules...
  33 import os
  34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  35 import time
  36 import re
  37 
  38 # PyMOL imports...
  39 try:
  40     import pymol
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(['pymol', '-ckq'])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 try:
  52     from docopt import docopt
  53     import MiscUtil
  54     import PyMOLUtil
  55 except ImportError as ErrMsg:
  56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  58     sys.exit(1)
  59 
  60 ScriptName = os.path.basename(sys.argv[0])
  61 Options = {}
  62 OptionsInfo = {}
  63 
  64 def main():
  65     """Start execution of the script."""
  66     
  67     MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76 
  77     # Perform actions required by the script...
  78     PerformMutagenesis()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def PerformMutagenesis():
  84     """Mutate specified residues across chains and generate an output file."""
  85 
  86     MiscUtil.PrintInfo("\nApplying mutations...")
  87 
  88     # Load macromolecule from input file...
  89     MolName = OptionsInfo["InfileRoot"]
  90     LoadMolecule(OptionsInfo["Infile"], MolName)
  91     
  92     # Apply mutations...
  93     for Mutation, ChainID, ResName, ResNum, NewResName in OptionsInfo["SpecifiedMutationsInfo"] :
  94         ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName)
  95 
  96     #  Generate output file...
  97     Outfile = OptionsInfo["Outfile"]
  98     MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile)
  99     pymol.cmd.save(Outfile, MolName)
 100     
 101     # Delete macromolecule...
 102     DeleteMolecule(MolName)
 103 
 104 def ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName):
 105     """Apply mutatation."""
 106 
 107     MiscUtil.PrintInfo("\nApplying mutation %s" % Mutation)
 108     
 109     # Setup wizard for amino acid mutagenesis...
 110     pymol.cmd.wizard("mutagenesis")
 111     
 112     pymol.cmd.refresh_wizard()
 113 
 114     # Setup residue to be mutated...
 115     ResSelection = "/%s//%s/%s" % (MolName, ChainID, ResNum)
 116     pymol.cmd.get_wizard().do_select(ResSelection)
 117 
 118     # Setup new mutated residue...
 119     pymol.cmd.get_wizard().set_mode("%s" % NewResName)
 120     
 121     # Mutate...
 122     pymol.cmd.get_wizard().apply()
 123     
 124     # Quit wizard...
 125     pymol.cmd.set_wizard()
 126     
 127 def RetrieveChainsIDs():
 128     """Retrieve chain IDs. """
 129 
 130     MolName = OptionsInfo["InfileRoot"]
 131     Infile = OptionsInfo["Infile"]
 132     
 133     MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile)
 134 
 135     LoadMolecule(Infile, MolName)
 136 
 137     ChainIDs = PyMOLUtil.GetChains(MolName)
 138     
 139     DeleteMolecule(MolName)
 140 
 141     if ChainIDs is None:
 142         ChainIDs = []
 143 
 144     # Print out chain and ligand IDs...
 145     ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None"
 146     MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo)
 147                          
 148     OptionsInfo["ChainIDs"] = ChainIDs
 149     
 150 def ProcessSpecifiedMutations():
 151     """Process specified mutations."""
 152     
 153     MiscUtil.PrintInfo("\nProcessing specified mutations...")
 154 
 155     SpecifiedMutationsInfo = []
 156 
 157     Mutations = re.sub(" ", "", OptionsInfo["Mutations"])
 158     MutationsWords = Mutations.split(",")
 159     if not len(MutationsWords):
 160         MiscUtil.PrintError("The number of comma delimited mutations specified using \"-m, --mutations\" option, \"%s\",  must be > 0." % (OptionsInfo["Mutations"]))
 161 
 162     # Load macromolecule from input file...
 163     MolName = OptionsInfo["InfileRoot"]
 164     LoadMolecule(OptionsInfo["Infile"], MolName)
 165     
 166     FirstMutation = True
 167     CurrentChainID = None
 168     CanonicalMutationMap = {}
 169     MutationsCount, ValidMutationsCount = [0] * 2
 170     
 171     for Mutation in MutationsWords:
 172         MutationsCount += 1
 173         if not len(Mutation):
 174             MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option is empty.\nIgnoring mutation..." % (Mutation))
 175             continue
 176         
 177         # Match with a chain ID...
 178         MatchedResults = re.match(r"^([a-z0-9]+):([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I)
 179         if not MatchedResults:
 180             # Match without a chain ID...
 181             MatchedResults = re.match(r"^([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I)
 182             
 183         if not MatchedResults:
 184             MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation..." % (Mutation))
 185             continue
 186 
 187         NumOfMatchedGroups =  len(MatchedResults.groups())
 188         if NumOfMatchedGroups == 3:
 189             ResName, ResNum, NewResName = MatchedResults.groups()
 190         elif NumOfMatchedGroups == 4:
 191             CurrentChainID, ResName, ResNum, NewResName = MatchedResults.groups()
 192         else:
 193             MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation..." % (Mutation))
 194             continue
 195         
 196         ResName = ResName.upper()
 197         NewResName = NewResName.upper()
 198         
 199         if FirstMutation:
 200             FirstMutation = False
 201             if  CurrentChainID is None:
 202                 MiscUtil.PrintError("The first mutation, \"%s\", specified using \"-m, --mutations\" option must be colon delimited and contain only two values, the first value corresponding to chain ID" % (Mutation))
 203         
 204         # Check for duplicate mutation specifications...
 205         MutationSpec = "%s:%s%s%s" % (CurrentChainID, ResName, ResNum, NewResName)
 206         CanonicalMutation = MutationSpec.lower()
 207         if CanonicalMutation in CanonicalMutationMap:
 208             MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option already exist for the current chain ID %s.\nIgnoring mutation..." % (Mutation, CurrentChainID))
 209             continue
 210         CanonicalMutationMap[CanonicalMutation] = CanonicalMutation
 211         
 212         # Is ResNum and ResName present in input file?
 213         SelectionCmd = "%s and chain %s and resi %s and resn %s" % (MolName, CurrentChainID, ResNum, ResName)
 214         ResiduesInfo = PyMOLUtil.GetSelectionResiduesInfo(SelectionCmd)
 215         if (ResiduesInfo is None) or (not len(ResiduesInfo["ResNames"])):
 216             MiscUtil.PrintWarning("The residue name, %s, and residue number, %s, in mutation, \"%s\", specified using \"-m, --mutations\" option appears to be missing in input file.\nIgnoring mutation..." % (ResName, ResNum, Mutation))
 217             continue
 218 
 219         ValidMutationsCount += 1
 220         
 221         # Track mutation information...
 222         SpecifiedMutationsInfo.append([Mutation, CurrentChainID, ResName, ResNum, NewResName])
 223         
 224     # Delete macromolecule...
 225     DeleteMolecule(MolName)
 226 
 227     MiscUtil.PrintInfo("\nTotal number of mutations: %d" % MutationsCount)
 228     MiscUtil.PrintInfo("Number of valid mutations: %d" % ValidMutationsCount)
 229     MiscUtil.PrintInfo("Number of ignored mutations: %d" % (MutationsCount - ValidMutationsCount))
 230     
 231     if not len(SpecifiedMutationsInfo):
 232         MiscUtil.PrintError("No valid mutations, \"%s\" specified using \"-m, --mutations\" option." % (OptionsInfo["Mutations"]))
 233         
 234     OptionsInfo["SpecifiedMutationsInfo"] = SpecifiedMutationsInfo
 235 
 236 def LoadMolecule(Infile, MolName):
 237     """Load input file."""
 238     
 239     pymol.cmd.reinitialize()
 240     pymol.cmd.load(Infile, MolName)
 241     
 242 def DeleteMolecule(MolName):
 243     """Delete molecule."""
 244     
 245     pymol.cmd.delete(MolName)
 246     
 247 def ProcessOptions():
 248     """Process and validate command line arguments and options."""
 249     
 250     MiscUtil.PrintInfo("Processing options...")
 251     
 252     # Validate options...
 253     ValidateOptions()
 254 
 255     OptionsInfo["Infile"] = Options["--infile"]
 256     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
 257     OptionsInfo["InfileRoot"] = FileName
 258 
 259     OptionsInfo["Overwrite"] = Options["--overwrite"]
 260     OptionsInfo["Outfile"] = Options["--outfile"]
 261 
 262     RetrieveChainsIDs()
 263     
 264     Mutations = Options["--mutations"]
 265     if re.match("^None$", Mutations, re.I):
 266         MiscUtil.PrintError("No mutations specified using \"-m, --mutations\" option.")
 267     
 268     OptionsInfo["Mutations"] = Options["--mutations"]
 269     ProcessSpecifiedMutations()
 270 
 271 def RetrieveOptions(): 
 272     """Retrieve command line arguments and options."""
 273     
 274     # Get options...
 275     global Options
 276     Options = docopt(_docoptUsage_)
 277 
 278     # Set current working directory to the specified directory...
 279     WorkingDir = Options["--workingdir"]
 280     if WorkingDir:
 281         os.chdir(WorkingDir)
 282     
 283     # Handle examples option...
 284     if "--examples" in Options and Options["--examples"]:
 285         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 286         sys.exit(0)
 287 
 288 def ValidateOptions():
 289     """Validate option values"""
 290     
 291     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 292     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb")
 293     
 294     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 295     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb")
 296     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 297 
 298 # Setup a usage string for docopt...
 299 _docoptUsage_ = """
 300 PyMOLMutateAminoAcids.py - Mutate amino acids
 301 
 302 Usage:
 303     PyMOLMutateAminoAcids.py [--mutations <Spec1,Spec2,...>]
 304                             [--overwrite] [-w <dir>] -i <infile> -o <outfile>
 305     PyMOLMutateAminoAcids.py -h | --help | -e | --examples
 306 
 307 Description:
 308     Mutate amino acids in macromolecules. The mutations are performed using
 309     protein mutagenesis wizard available in PyMOL.
 310  
 311     The supported input and output file format is: PDB (.pdb)
 312  
 313 Options:
 314     -m, --mutations <Spec1,Spec2,...>  [default: None]
 315         Comma delimited list of specifications for mutating amino acid residues
 316         in proteins.
 317         
 318         The format of mutation specification is as follows:
 319         
 320             <ChainID>:<ResName><ResNum><ResName>,...
 321         
 322         A chain ID in the first specification of a mutation is required. It may be
 323         skipped in subsequent specifications. The most recent chain ID is used
 324         for the missing chain ID. The first reside name corresponds to the residue
 325         to be mutated. The second residue name represents the new residue.
 326         The residue number corresponds to the first residue name and must be
 327         present in the current chain.
 328         
 329         Examples:
 330         
 331             E:LEU49CYS, E:SER53TYR
 332             E:LEU49CYS, SER53TYR
 333             E:LEU49CYS, SER53TYR, I:TYR7SER, ILE11VAL
 334               
 335         The residue names must be valid amino acid names. No validation is
 336         performed before mutating residues via protein mutagenesis wizard
 337         available in PyMOL.
 338     -e, --examples
 339         Print examples.
 340     -h, --help
 341         Print this help message.
 342     -i, --infile <infile>
 343         Input file name.
 344     -o, --outfile <outfile>
 345         Output file name.
 346     --overwrite
 347         Overwrite existing files.
 348     -w, --workingdir <dir>
 349         Location of working directory which defaults to the current directory.
 350 
 351 Examples:
 352     To mutate a single residue in a specific chain and write a PDB file, type:
 353 
 354         % PyMOLMutateAminoAcids.py -m "I:TYR7SER" -i Sample3.pdb
 355           -o Sample3Out.pdb
 356 
 357     To mutate multiple residues in a single chain and write a PDB file, type:
 358 
 359         % PyMOLMutateAminoAcids.py -m "I:TYR7SER, ILE11VAL" -i Sample3.pdb
 360           -o Sample3Out.pdb
 361 
 362     To mutate multiple residues across multiple chains and write a PDB file, type:
 363 
 364         % PyMOLMutateAminoAcids.py -m "E:LEU49CYS,SER53TYR,I:TYR7SER,ILE11VAL"
 365           -i Sample3.pdb -o Sample3Out.pdb
 366 
 367 Author:
 368     Manish Sud(msud@san.rr.com)
 369 
 370 See also:
 371     DownloadPDBFiles.pl, PyMOLMutateNucleicAcids.py,
 372     PyMOLVisualizeMacromolecules.py
 373 
 374 Copyright:
 375     Copyright (C) 2024 Manish Sud. All rights reserved.
 376 
 377     The functionality available in this script is implemented using PyMOL, a
 378     molecular visualization system on an open source foundation originally
 379     developed by Warren DeLano.
 380 
 381     This file is part of MayaChemTools.
 382 
 383     MayaChemTools is free software; you can redistribute it and/or modify it under
 384     the terms of the GNU Lesser General Public License as published by the Free
 385     Software Foundation; either version 3 of the License, or (at your option) any
 386     later version.
 387 
 388 """
 389 
 390 if __name__ == "__main__":
 391     main()