MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLMutateAminoAcids.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2025 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 import os
  33 import sys
  34 import time
  35 import re
  36 
  37 # PyMOL imports...
  38 try:
  39     import pymol
  40 
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(["pymol", "-ckq"])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  52 try:
  53     from docopt import docopt
  54     import MiscUtil
  55     import PyMOLUtil
  56 except ImportError as ErrMsg:
  57     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  58     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  59     sys.exit(1)
  60 
  61 ScriptName = os.path.basename(sys.argv[0])
  62 Options = {}
  63 OptionsInfo = {}
  64 
  65 
  66 def main():
  67     """Start execution of the script."""
  68 
  69     MiscUtil.PrintInfo(
  70         "\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n"
  71         % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())
  72     )
  73 
  74     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  75 
  76     # Retrieve command line arguments and options...
  77     RetrieveOptions()
  78 
  79     # Process and validate command line arguments and options...
  80     ProcessOptions()
  81 
  82     # Perform actions required by the script...
  83     PerformMutagenesis()
  84 
  85     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  86     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  87 
  88 
  89 def PerformMutagenesis():
  90     """Mutate specified residues across chains and generate an output file."""
  91 
  92     MiscUtil.PrintInfo("\nApplying mutations...")
  93 
  94     # Load macromolecule from input file...
  95     MolName = OptionsInfo["InfileRoot"]
  96     LoadMolecule(OptionsInfo["Infile"], MolName)
  97 
  98     # Apply mutations...
  99     for Mutation, ChainID, ResName, ResNum, NewResName in OptionsInfo["SpecifiedMutationsInfo"]:
 100         ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName)
 101 
 102     #  Generate output file...
 103     Outfile = OptionsInfo["Outfile"]
 104     MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile)
 105     pymol.cmd.save(Outfile, MolName)
 106 
 107     # Delete macromolecule...
 108     DeleteMolecule(MolName)
 109 
 110 
 111 def ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName):
 112     """Apply mutatation."""
 113 
 114     MiscUtil.PrintInfo("\nApplying mutation %s" % Mutation)
 115 
 116     # Setup wizard for amino acid mutagenesis...
 117     pymol.cmd.wizard("mutagenesis")
 118 
 119     pymol.cmd.refresh_wizard()
 120 
 121     # Setup residue to be mutated...
 122     ResSelection = "/%s//%s/%s" % (MolName, ChainID, ResNum)
 123     pymol.cmd.get_wizard().do_select(ResSelection)
 124 
 125     # Setup new mutated residue...
 126     pymol.cmd.get_wizard().set_mode("%s" % NewResName)
 127 
 128     # Mutate...
 129     pymol.cmd.get_wizard().apply()
 130 
 131     # Quit wizard...
 132     pymol.cmd.set_wizard()
 133 
 134 
 135 def RetrieveChainsIDs():
 136     """Retrieve chain IDs."""
 137 
 138     MolName = OptionsInfo["InfileRoot"]
 139     Infile = OptionsInfo["Infile"]
 140 
 141     MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile)
 142 
 143     LoadMolecule(Infile, MolName)
 144 
 145     ChainIDs = PyMOLUtil.GetChains(MolName)
 146 
 147     DeleteMolecule(MolName)
 148 
 149     if ChainIDs is None:
 150         ChainIDs = []
 151 
 152     # Print out chain and ligand IDs...
 153     ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None"
 154     MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo)
 155 
 156     OptionsInfo["ChainIDs"] = ChainIDs
 157 
 158 
 159 def ProcessSpecifiedMutations():
 160     """Process specified mutations."""
 161 
 162     MiscUtil.PrintInfo("\nProcessing specified mutations...")
 163 
 164     SpecifiedMutationsInfo = []
 165 
 166     Mutations = re.sub(" ", "", OptionsInfo["Mutations"])
 167     MutationsWords = Mutations.split(",")
 168     if not len(MutationsWords):
 169         MiscUtil.PrintError(
 170             'The number of comma delimited mutations specified using "-m, --mutations" option, "%s",  must be > 0.'
 171             % (OptionsInfo["Mutations"])
 172         )
 173 
 174     # Load macromolecule from input file...
 175     MolName = OptionsInfo["InfileRoot"]
 176     LoadMolecule(OptionsInfo["Infile"], MolName)
 177 
 178     FirstMutation = True
 179     CurrentChainID = None
 180     CanonicalMutationMap = {}
 181     MutationsCount, ValidMutationsCount = [0] * 2
 182 
 183     for Mutation in MutationsWords:
 184         MutationsCount += 1
 185         if not len(Mutation):
 186             MiscUtil.PrintWarning(
 187                 'The mutation, "%s", specified using "-m, --mutations" option is empty.\nIgnoring mutation...'
 188                 % (Mutation)
 189             )
 190             continue
 191 
 192         # Match with a chain ID...
 193         MatchedResults = re.match(r"^([a-z0-9]+):([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I)
 194         if not MatchedResults:
 195             # Match without a chain ID...
 196             MatchedResults = re.match(r"^([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I)
 197 
 198         if not MatchedResults:
 199             MiscUtil.PrintWarning(
 200                 'The format of mutation, "%s", specified using "-m, --mutations" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation...'
 201                 % (Mutation)
 202             )
 203             continue
 204 
 205         NumOfMatchedGroups = len(MatchedResults.groups())
 206         if NumOfMatchedGroups == 3:
 207             ResName, ResNum, NewResName = MatchedResults.groups()
 208         elif NumOfMatchedGroups == 4:
 209             CurrentChainID, ResName, ResNum, NewResName = MatchedResults.groups()
 210         else:
 211             MiscUtil.PrintWarning(
 212                 'The format of mutation, "%s", specified using "-m, --mutations" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation...'
 213                 % (Mutation)
 214             )
 215             continue
 216 
 217         ResName = ResName.upper()
 218         NewResName = NewResName.upper()
 219 
 220         if FirstMutation:
 221             FirstMutation = False
 222             if CurrentChainID is None:
 223                 MiscUtil.PrintError(
 224                     'The first mutation, "%s", specified using "-m, --mutations" option must be colon delimited and contain only two values, the first value corresponding to chain ID'
 225                     % (Mutation)
 226                 )
 227 
 228         # Check for duplicate mutation specifications...
 229         MutationSpec = "%s:%s%s%s" % (CurrentChainID, ResName, ResNum, NewResName)
 230         CanonicalMutation = MutationSpec.lower()
 231         if CanonicalMutation in CanonicalMutationMap:
 232             MiscUtil.PrintWarning(
 233                 'The mutation, "%s", specified using "-m, --mutations" option already exist for the current chain ID %s.\nIgnoring mutation...'
 234                 % (Mutation, CurrentChainID)
 235             )
 236             continue
 237         CanonicalMutationMap[CanonicalMutation] = CanonicalMutation
 238 
 239         # Is ResNum and ResName present in input file?
 240         SelectionCmd = "%s and chain %s and resi %s and resn %s" % (MolName, CurrentChainID, ResNum, ResName)
 241         ResiduesInfo = PyMOLUtil.GetSelectionResiduesInfo(SelectionCmd)
 242         if (ResiduesInfo is None) or (not len(ResiduesInfo["ResNames"])):
 243             MiscUtil.PrintWarning(
 244                 'The residue name, %s, and residue number, %s, in mutation, "%s", specified using "-m, --mutations" option appears to be missing in input file.\nIgnoring mutation...'
 245                 % (ResName, ResNum, Mutation)
 246             )
 247             continue
 248 
 249         ValidMutationsCount += 1
 250 
 251         # Track mutation information...
 252         SpecifiedMutationsInfo.append([Mutation, CurrentChainID, ResName, ResNum, NewResName])
 253 
 254     # Delete macromolecule...
 255     DeleteMolecule(MolName)
 256 
 257     MiscUtil.PrintInfo("\nTotal number of mutations: %d" % MutationsCount)
 258     MiscUtil.PrintInfo("Number of valid mutations: %d" % ValidMutationsCount)
 259     MiscUtil.PrintInfo("Number of ignored mutations: %d" % (MutationsCount - ValidMutationsCount))
 260 
 261     if not len(SpecifiedMutationsInfo):
 262         MiscUtil.PrintError(
 263             'No valid mutations, "%s" specified using "-m, --mutations" option.' % (OptionsInfo["Mutations"])
 264         )
 265 
 266     OptionsInfo["SpecifiedMutationsInfo"] = SpecifiedMutationsInfo
 267 
 268 
 269 def LoadMolecule(Infile, MolName):
 270     """Load input file."""
 271 
 272     pymol.cmd.reinitialize()
 273     pymol.cmd.load(Infile, MolName)
 274 
 275 
 276 def DeleteMolecule(MolName):
 277     """Delete molecule."""
 278 
 279     pymol.cmd.delete(MolName)
 280 
 281 
 282 def ProcessOptions():
 283     """Process and validate command line arguments and options."""
 284 
 285     MiscUtil.PrintInfo("Processing options...")
 286 
 287     # Validate options...
 288     ValidateOptions()
 289 
 290     OptionsInfo["Infile"] = Options["--infile"]
 291     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
 292     OptionsInfo["InfileRoot"] = FileName
 293 
 294     OptionsInfo["Overwrite"] = Options["--overwrite"]
 295     OptionsInfo["Outfile"] = Options["--outfile"]
 296 
 297     RetrieveChainsIDs()
 298 
 299     Mutations = Options["--mutations"]
 300     if re.match("^None$", Mutations, re.I):
 301         MiscUtil.PrintError('No mutations specified using "-m, --mutations" option.')
 302 
 303     OptionsInfo["Mutations"] = Options["--mutations"]
 304     ProcessSpecifiedMutations()
 305 
 306 
 307 def RetrieveOptions():
 308     """Retrieve command line arguments and options."""
 309 
 310     # Get options...
 311     global Options
 312     Options = docopt(_docoptUsage_)
 313 
 314     # Set current working directory to the specified directory...
 315     WorkingDir = Options["--workingdir"]
 316     if WorkingDir:
 317         os.chdir(WorkingDir)
 318 
 319     # Handle examples option...
 320     if "--examples" in Options and Options["--examples"]:
 321         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 322         sys.exit(0)
 323 
 324 
 325 def ValidateOptions():
 326     """Validate option values"""
 327 
 328     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 329     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb")
 330 
 331     MiscUtil.ValidateOptionsDistinctFileNames(
 332         "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
 333     )
 334     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb")
 335     MiscUtil.ValidateOptionsOutputFileOverwrite(
 336         "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
 337     )
 338 
 339 
 340 # Setup a usage string for docopt...
 341 _docoptUsage_ = """
 342 PyMOLMutateAminoAcids.py - Mutate amino acids
 343 
 344 Usage:
 345     PyMOLMutateAminoAcids.py [--mutations <Spec1,Spec2,...>]
 346                             [--overwrite] [-w <dir>] -i <infile> -o <outfile>
 347     PyMOLMutateAminoAcids.py -h | --help | -e | --examples
 348 
 349 Description:
 350     Mutate amino acids in macromolecules. The mutations are performed using
 351     protein mutagenesis wizard available in PyMOL.
 352  
 353     The supported input and output file format is: PDB (.pdb)
 354  
 355 Options:
 356     -m, --mutations <Spec1,Spec2,...>  [default: None]
 357         Comma delimited list of specifications for mutating amino acid residues
 358         in proteins.
 359         
 360         The format of mutation specification is as follows:
 361         
 362             <ChainID>:<ResName><ResNum><ResName>,...
 363         
 364         A chain ID in the first specification of a mutation is required. It may be
 365         skipped in subsequent specifications. The most recent chain ID is used
 366         for the missing chain ID. The first reside name corresponds to the residue
 367         to be mutated. The second residue name represents the new residue.
 368         The residue number corresponds to the first residue name and must be
 369         present in the current chain.
 370         
 371         Examples:
 372         
 373             E:LEU49CYS, E:SER53TYR
 374             E:LEU49CYS, SER53TYR
 375             E:LEU49CYS, SER53TYR, I:TYR7SER, ILE11VAL
 376               
 377         The residue names must be valid amino acid names. No validation is
 378         performed before mutating residues via protein mutagenesis wizard
 379         available in PyMOL.
 380     -e, --examples
 381         Print examples.
 382     -h, --help
 383         Print this help message.
 384     -i, --infile <infile>
 385         Input file name.
 386     -o, --outfile <outfile>
 387         Output file name.
 388     --overwrite
 389         Overwrite existing files.
 390     -w, --workingdir <dir>
 391         Location of working directory which defaults to the current directory.
 392 
 393 Examples:
 394     To mutate a single residue in a specific chain and write a PDB file, type:
 395 
 396         % PyMOLMutateAminoAcids.py -m "I:TYR7SER" -i Sample3.pdb
 397           -o Sample3Out.pdb
 398 
 399     To mutate multiple residues in a single chain and write a PDB file, type:
 400 
 401         % PyMOLMutateAminoAcids.py -m "I:TYR7SER, ILE11VAL" -i Sample3.pdb
 402           -o Sample3Out.pdb
 403 
 404     To mutate multiple residues across multiple chains and write a PDB file, type:
 405 
 406         % PyMOLMutateAminoAcids.py -m "E:LEU49CYS,SER53TYR,I:TYR7SER,ILE11VAL"
 407           -i Sample3.pdb -o Sample3Out.pdb
 408 
 409 Author:
 410     Manish Sud(msud@san.rr.com)
 411 
 412 See also:
 413     DownloadPDBFiles.pl, PyMOLMutateNucleicAcids.py,
 414     PyMOLVisualizeMacromolecules.py
 415 
 416 Copyright:
 417     Copyright (C) 2025 Manish Sud. All rights reserved.
 418 
 419     The functionality available in this script is implemented using PyMOL, a
 420     molecular visualization system on an open source foundation originally
 421     developed by Warren DeLano.
 422 
 423     This file is part of MayaChemTools.
 424 
 425     MayaChemTools is free software; you can redistribute it and/or modify it under
 426     the terms of the GNU Lesser General Public License as published by the Free
 427     Software Foundation; either version 3 of the License, or (at your option) any
 428     later version.
 429 
 430 """
 431 
 432 if __name__ == "__main__":
 433     main()