MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLMutateNucleicAcids.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 # Add local python path to the global path and import standard library modules...
  33 import os
  34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  35 import time
  36 import re
  37 
  38 # PyMOL imports...
  39 try:
  40     import pymol
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(['pymol', '-ckq'])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 try:
  52     from docopt import docopt
  53     import MiscUtil
  54     import PyMOLUtil
  55 except ImportError as ErrMsg:
  56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  58     sys.exit(1)
  59 
  60 ScriptName = os.path.basename(sys.argv[0])
  61 Options = {}
  62 OptionsInfo = {}
  63 
  64 def main():
  65     """Start execution of the script."""
  66     
  67     MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76 
  77     # Perform actions required by the script...
  78     PerformMutagenesis()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def PerformMutagenesis():
  84     """Mutate specified residues across chains and generate an output file."""
  85 
  86     MiscUtil.PrintInfo("\nApplying mutations...")
  87 
  88     # Load macromolecule from input file...
  89     MolName = OptionsInfo["InfileRoot"]
  90     LoadMolecule(OptionsInfo["Infile"], MolName)
  91 
  92     # Apply mutations...
  93     for Mutation, ChainID, ResNum, NewBaseName in OptionsInfo["SpecifiedMutationsInfo"] :
  94         ApplyMutation(Mutation, MolName, ChainID, ResNum, NewBaseName)
  95 
  96     #  Generate output file...
  97     Outfile = OptionsInfo["Outfile"]
  98     MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile)
  99     pymol.cmd.save(Outfile, MolName)
 100     
 101     # Delete macromolecule...
 102     DeleteMolecule(MolName)
 103 
 104 def ApplyMutation(Mutation, MolName, ChainID, ResNum, NewBaseName):
 105     """Apply mutatation."""
 106 
 107     MiscUtil.PrintInfo("\nApplying mutation %s" % Mutation)
 108     
 109     # Setup wizard for nucleic acids mutagenesis...
 110     try:
 111         pymol.cmd.wizard("nucmutagenesis")
 112     except pymol.CmdException as ErrMsg:
 113         MiscUtil.PrintError("The nucleic acids mutageneis wizard is not available in your PyMOL environment.")
 114     
 115     pymol.cmd.refresh_wizard()
 116 
 117     # Setup residue to be mutated...
 118     ResSelection = "/%s//%s/%s" % (MolName, ChainID, ResNum)
 119     pymol.cmd.get_wizard().do_select(ResSelection)
 120 
 121     # Setup new mutated residue...
 122     pymol.cmd.get_wizard().set_mode("%s" % NewBaseName)
 123     
 124     # Mutate...
 125     pymol.cmd.get_wizard().apply()
 126     
 127     # Quit wizard...
 128     pymol.cmd.set_wizard()
 129     
 130 def RetrieveChainsIDs():
 131     """Retrieve chain IDs."""
 132 
 133     MolName = OptionsInfo["InfileRoot"]
 134     Infile = OptionsInfo["Infile"]
 135     
 136     MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile)
 137 
 138     LoadMolecule(Infile, MolName)
 139 
 140     ChainIDs = PyMOLUtil.GetChains(MolName)
 141     
 142     DeleteMolecule(MolName)
 143 
 144     if ChainIDs is None:
 145         ChainIDs = []
 146 
 147     # Print out chain and ligand IDs...
 148     ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None"
 149     MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo)
 150                          
 151     OptionsInfo["ChainIDs"] = ChainIDs
 152     
 153 def ProcessSpecifiedMutations():
 154     """Process specified mutations."""
 155     
 156     MiscUtil.PrintInfo("\nProcessing specified mutations...")
 157 
 158     CanonicalBaseNameMap = {'ADENINE': 'Adenine', 'CYTOSINE': 'Cytosine', 'GUANINE': 'Guanine', 'THYMINE': 'Thymine', 'URACIL': 'Uracil', 'ADE': 'Adenine', 'CYT': 'Cytosine', 'GUA': 'Guanine', 'THY': 'Thymine', 'URA': 'Uracil'}
 159     
 160     SpecifiedMutationsInfo = []
 161 
 162     Mutations = re.sub(" ", "", OptionsInfo["Mutations"])
 163     MutationsWords = Mutations.split(",")
 164     if not len(MutationsWords):
 165         MiscUtil.PrintError("The number of comma delimited mutations specified using \"-m, --mutations\" option, \"%s\",  must be > 0." % (OptionsInfo["Mutations"]))
 166 
 167     # Load macromolecule from input file...
 168     MolName = OptionsInfo["InfileRoot"]
 169     LoadMolecule(OptionsInfo["Infile"], MolName)
 170     
 171     FirstMutation = True
 172     CurrentChainID = None
 173     CanonicalMutationMap = {}
 174     MutationsCount, ValidMutationsCount = [0] * 2
 175     
 176     for Mutation in MutationsWords:
 177         MutationsCount += 1
 178         if not len(Mutation):
 179             MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option is empty.\nIgnoring mutation..." % (Mutation))
 180             continue
 181         
 182         # Match with a chain ID...
 183         MatchedResults = re.match(r"^([a-z0-9]+):([0-9]+)([a-z]+)$", Mutation, re.I)
 184         if not MatchedResults:
 185             # Match without a chain ID...
 186             MatchedResults = re.match(r"^([0-9]+)([a-z]+)$", Mutation, re.I)
 187             
 188         if not MatchedResults:
 189             MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResNum><BaseName> or <ResNum><BaseName>\nIgnoring mutation..." % (Mutation))
 190             continue
 191 
 192         NumOfMatchedGroups =  len(MatchedResults.groups())
 193         if NumOfMatchedGroups == 2:
 194             ResNum, NewBaseName = MatchedResults.groups()
 195         elif NumOfMatchedGroups == 3:
 196             CurrentChainID, ResNum, NewBaseName = MatchedResults.groups()
 197         else:
 198             MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResNum><BaseName> or <ResNum><BaseName>\nIgnoring mutation..." % (Mutation))
 199             continue
 200         
 201         if FirstMutation:
 202             FirstMutation = False
 203             if  CurrentChainID is None:
 204                 MiscUtil.PrintError("The first mutation, \"%s\", specified using \"-m, --mutations\" option must be colon delimited and contain only two values, the first value corresponding to chain ID" % (Mutation))
 205 
 206         CanonicalBaseName = NewBaseName.upper()
 207         if CanonicalBaseName in CanonicalBaseNameMap:
 208             NewBaseName = CanonicalBaseNameMap[CanonicalBaseName]
 209         
 210         # Check for duplicate mutation specifications...
 211         MutationSpec = "%s:%s%s" % (CurrentChainID, ResNum, NewBaseName)
 212         CanonicalMutation = MutationSpec.lower()
 213         if CanonicalMutation in CanonicalMutationMap:
 214             MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option already exist for the current chain ID %s.\nIgnoring mutation..." % (Mutation, CurrentChainID))
 215             continue
 216         CanonicalMutationMap[CanonicalMutation] = Mutation
 217         
 218         # Is ResNum and BaseName present in input file?
 219         SelectionCmd = "%s and chain %s and resi %s" % (MolName, CurrentChainID, ResNum)
 220         ResiduesInfo = PyMOLUtil.GetSelectionResiduesInfo(SelectionCmd)
 221         if (ResiduesInfo is None) or (not len(ResiduesInfo["ResNames"])):
 222             MiscUtil.PrintWarning("The residue number, %s, in mutation, \"%s\", specified using \"-m, --mutations\" option appears to be missing in input file.\nIgnoring mutation..." % (ResNum, Mutation))
 223             continue
 224 
 225         ValidMutationsCount += 1
 226         
 227         # Track mutation information...
 228         SpecifiedMutationsInfo.append([Mutation, CurrentChainID, ResNum, NewBaseName])
 229         
 230     # Delete macromolecule...
 231     DeleteMolecule(MolName)
 232 
 233     MiscUtil.PrintInfo("\nTotal number of mutations: %d" % MutationsCount)
 234     MiscUtil.PrintInfo("Number of valid mutations: %d" % ValidMutationsCount)
 235     MiscUtil.PrintInfo("Number of ignored mutations: %d" % (MutationsCount - ValidMutationsCount))
 236     
 237     if not len(SpecifiedMutationsInfo):
 238         MiscUtil.PrintError("No valid mutations, \"%s\" specified using \"-m, --mutations\" option." % (OptionsInfo["Mutations"]))
 239         
 240     OptionsInfo["SpecifiedMutationsInfo"] = SpecifiedMutationsInfo
 241 
 242 def LoadMolecule(Infile, MolName):
 243     """Load input file."""
 244     
 245     pymol.cmd.reinitialize()
 246     pymol.cmd.load(Infile, MolName)
 247     
 248 def DeleteMolecule(MolName):
 249     """Delete molecule."""
 250     
 251     pymol.cmd.delete(MolName)
 252     
 253 def ProcessOptions():
 254     """Process and validate command line arguments and options."""
 255     
 256     MiscUtil.PrintInfo("Processing options...")
 257     
 258     # Validate options...
 259     ValidateOptions()
 260 
 261     OptionsInfo["Infile"] = Options["--infile"]
 262     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
 263     OptionsInfo["InfileRoot"] = FileName
 264 
 265     OptionsInfo["Overwrite"] = Options["--overwrite"]
 266     OptionsInfo["Outfile"] = Options["--outfile"]
 267 
 268     RetrieveChainsIDs()
 269     
 270     Mutations = Options["--mutations"]
 271     if re.match("^None$", Mutations, re.I):
 272         MiscUtil.PrintError("No mutations specified using \"-m, --mutations\" option.")
 273     
 274     OptionsInfo["Mutations"] = Options["--mutations"]
 275     ProcessSpecifiedMutations()
 276 
 277 def RetrieveOptions(): 
 278     """Retrieve command line arguments and options."""
 279     
 280     # Get options...
 281     global Options
 282     Options = docopt(_docoptUsage_)
 283 
 284     # Set current working directory to the specified directory...
 285     WorkingDir = Options["--workingdir"]
 286     if WorkingDir:
 287         os.chdir(WorkingDir)
 288     
 289     # Handle examples option...
 290     if "--examples" in Options and Options["--examples"]:
 291         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 292         sys.exit(0)
 293 
 294 def ValidateOptions():
 295     """Validate option values."""
 296     
 297     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 298     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb")
 299     
 300     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 301     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb")
 302     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 303 
 304 # Setup a usage string for docopt...
 305 _docoptUsage_ = """
 306 PyMOLMutateNucleicAcids.py - Mutate nucleic acids
 307 
 308 Usage:
 309     PyMOLMutateNucleicAcids.py [--mutations <Spec1,Spec2,...>]
 310                             [--overwrite] [-w <dir>] -i <infile> -o <outfile>
 311     PyMOLMutateNucleicAcids.py -h | --help | -e | --examples
 312 
 313 Description:
 314     Mutate nucleic acids in macromolecules. The mutations are performed using
 315     nucleic acids mutagenesis wizard available in PyMOL starting V2.2.
 316 
 317     The supported input and output file format is: PDB (.pdb)
 318  
 319 Options:
 320     -m, --mutations <Spec1,Spec2,...>  [default: None]
 321         Comma delimited list of specifications for mutating nucleic acids.
 322         
 323         The format of mutation specification is as follows:
 324         
 325             <ChainID>:<ResNum><BaseName>,...
 326         
 327         A chain ID in the first specification of a mutation is required. It may be
 328         skipped in subsequent specifications. The most recent chain ID is used
 329         for the missing chain ID. The residue number corresponds to the residue
 330         to be mutated and must be present in the current chain. The base name
 331         represents the new base.
 332         
 333         Examples:
 334         
 335             A:9Thy, A:10Thy
 336             A:9Thy,10Thy,11Thy
 337             A:9Thy,10Thy,B:5Ade,6Ade
 338         
 339         The base names must be valid for mutating nucleic acids. No validation
 340         validation is performed before mutating residues via nucleic acids
 341         mutagenesis wizard available in PyMOL. The current version of the
 342         wizard supports the following base names:
 343         
 344             Adenine, Ade
 345             Cytosine, Cyt
 346             Guanine, Gua
 347             Thymine, Thy
 348             Uracil, Ura
 349         
 350     -e, --examples
 351         Print examples.
 352     -h, --help
 353         Print this help message.
 354     -i, --infile <infile>
 355         Input file name.
 356     -o, --outfile <outfile>
 357         Output file name.
 358     --overwrite
 359         Overwrite existing files.
 360     -w, --workingdir <dir>
 361         Location of working directory which defaults to the current directory.
 362 
 363 Examples:
 364     To mutate a single residue in a specific chain and write a PDB file, type:
 365 
 366         % PyMOLMutateNucleicAcids.py -m "A:9Thy" -i Sample9.pdb
 367           -o Sample9Out.pdb
 368 
 369     To mutate multiple residues in a single chain and write a PDB file, type:
 370 
 371         % PyMOLMutateNucleicAcids.py -m "A:9Thy,10Thy,11Thy" -i Sample9.pdb
 372           -o Sample9Out.pdb
 373 
 374     To mutate multiple residues across multiple chains and write a PDB file, type:
 375 
 376         % PyMOLMutateNucleicAcids.py -m "A:9Thy,10Thy,B:5Ade,6Ade"
 377           -i Sample9.pdb -o Sample9Out.pdb
 378 
 379 Author:
 380     Manish Sud(msud@san.rr.com)
 381 
 382 See also:
 383     DownloadPDBFiles.pl, PyMOLMutateAminoAcids.py,
 384     PyMOLVisualizeMacromolecules.py
 385 
 386 Copyright:
 387     Copyright (C) 2024 Manish Sud. All rights reserved.
 388 
 389     The functionality available in this script is implemented using PyMOL, a
 390     molecular visualization system on an open source foundation originally
 391     developed by Warren DeLano.
 392 
 393     This file is part of MayaChemTools.
 394 
 395     MayaChemTools is free software; you can redistribute it and/or modify it under
 396     the terms of the GNU Lesser General Public License as published by the Free
 397     Software Foundation; either version 3 of the License, or (at your option) any
 398     later version.
 399 
 400 """
 401 
 402 if __name__ == "__main__":
 403     main()