1 #!/bin/env python 2 # 3 # File: PyMOLMutateAminoAcids.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 38 # PyMOL imports... 39 try: 40 import pymol 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(['pymol', '-ckq']) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 try: 52 from docopt import docopt 53 import MiscUtil 54 import PyMOLUtil 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 58 sys.exit(1) 59 60 ScriptName = os.path.basename(sys.argv[0]) 61 Options = {} 62 OptionsInfo = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 PerformMutagenesis() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def PerformMutagenesis(): 84 """Mutate specified residues across chains and generate an output file.""" 85 86 MiscUtil.PrintInfo("\nApplying mutations...") 87 88 # Load macromolecule from input file... 89 MolName = OptionsInfo["InfileRoot"] 90 LoadMolecule(OptionsInfo["Infile"], MolName) 91 92 # Apply mutations... 93 for Mutation, ChainID, ResName, ResNum, NewResName in OptionsInfo["SpecifiedMutationsInfo"] : 94 ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName) 95 96 # Generate output file... 97 Outfile = OptionsInfo["Outfile"] 98 MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile) 99 pymol.cmd.save(Outfile, MolName) 100 101 # Delete macromolecule... 102 DeleteMolecule(MolName) 103 104 def ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName): 105 """Apply mutatation.""" 106 107 MiscUtil.PrintInfo("\nApplying mutation %s" % Mutation) 108 109 # Setup wizard for amino acid mutagenesis... 110 pymol.cmd.wizard("mutagenesis") 111 112 pymol.cmd.refresh_wizard() 113 114 # Setup residue to be mutated... 115 ResSelection = "/%s//%s/%s" % (MolName, ChainID, ResNum) 116 pymol.cmd.get_wizard().do_select(ResSelection) 117 118 # Setup new mutated residue... 119 pymol.cmd.get_wizard().set_mode("%s" % NewResName) 120 121 # Mutate... 122 pymol.cmd.get_wizard().apply() 123 124 # Quit wizard... 125 pymol.cmd.set_wizard() 126 127 def RetrieveChainsIDs(): 128 """Retrieve chain IDs. """ 129 130 MolName = OptionsInfo["InfileRoot"] 131 Infile = OptionsInfo["Infile"] 132 133 MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile) 134 135 LoadMolecule(Infile, MolName) 136 137 ChainIDs = PyMOLUtil.GetChains(MolName) 138 139 DeleteMolecule(MolName) 140 141 if ChainIDs is None: 142 ChainIDs = [] 143 144 # Print out chain and ligand IDs... 145 ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None" 146 MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo) 147 148 OptionsInfo["ChainIDs"] = ChainIDs 149 150 def ProcessSpecifiedMutations(): 151 """Process specified mutations.""" 152 153 MiscUtil.PrintInfo("\nProcessing specified mutations...") 154 155 SpecifiedMutationsInfo = [] 156 157 Mutations = re.sub(" ", "", OptionsInfo["Mutations"]) 158 MutationsWords = Mutations.split(",") 159 if not len(MutationsWords): 160 MiscUtil.PrintError("The number of comma delimited mutations specified using \"-m, --mutations\" option, \"%s\", must be > 0." % (OptionsInfo["Mutations"])) 161 162 # Load macromolecule from input file... 163 MolName = OptionsInfo["InfileRoot"] 164 LoadMolecule(OptionsInfo["Infile"], MolName) 165 166 FirstMutation = True 167 CurrentChainID = None 168 CanonicalMutationMap = {} 169 MutationsCount, ValidMutationsCount = [0] * 2 170 171 for Mutation in MutationsWords: 172 MutationsCount += 1 173 if not len(Mutation): 174 MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option is empty.\nIgnoring mutation..." % (Mutation)) 175 continue 176 177 # Match with a chain ID... 178 MatchedResults = re.match(r"^([a-z0-9]+):([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I) 179 if not MatchedResults: 180 # Match without a chain ID... 181 MatchedResults = re.match(r"^([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I) 182 183 if not MatchedResults: 184 MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation..." % (Mutation)) 185 continue 186 187 NumOfMatchedGroups = len(MatchedResults.groups()) 188 if NumOfMatchedGroups == 3: 189 ResName, ResNum, NewResName = MatchedResults.groups() 190 elif NumOfMatchedGroups == 4: 191 CurrentChainID, ResName, ResNum, NewResName = MatchedResults.groups() 192 else: 193 MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation..." % (Mutation)) 194 continue 195 196 ResName = ResName.upper() 197 NewResName = NewResName.upper() 198 199 if FirstMutation: 200 FirstMutation = False 201 if CurrentChainID is None: 202 MiscUtil.PrintError("The first mutation, \"%s\", specified using \"-m, --mutations\" option must be colon delimited and contain only two values, the first value corresponding to chain ID" % (Mutation)) 203 204 # Check for duplicate mutation specifications... 205 MutationSpec = "%s:%s%s%s" % (CurrentChainID, ResName, ResNum, NewResName) 206 CanonicalMutation = MutationSpec.lower() 207 if CanonicalMutation in CanonicalMutationMap: 208 MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option already exist for the current chain ID %s.\nIgnoring mutation..." % (Mutation, CurrentChainID)) 209 continue 210 CanonicalMutationMap[CanonicalMutation] = CanonicalMutation 211 212 # Is ResNum and ResName present in input file? 213 SelectionCmd = "%s and chain %s and resi %s and resn %s" % (MolName, CurrentChainID, ResNum, ResName) 214 ResiduesInfo = PyMOLUtil.GetSelectionResiduesInfo(SelectionCmd) 215 if (ResiduesInfo is None) or (not len(ResiduesInfo["ResNames"])): 216 MiscUtil.PrintWarning("The residue name, %s, and residue number, %s, in mutation, \"%s\", specified using \"-m, --mutations\" option appears to be missing in input file.\nIgnoring mutation..." % (ResName, ResNum, Mutation)) 217 continue 218 219 ValidMutationsCount += 1 220 221 # Track mutation information... 222 SpecifiedMutationsInfo.append([Mutation, CurrentChainID, ResName, ResNum, NewResName]) 223 224 # Delete macromolecule... 225 DeleteMolecule(MolName) 226 227 MiscUtil.PrintInfo("\nTotal number of mutations: %d" % MutationsCount) 228 MiscUtil.PrintInfo("Number of valid mutations: %d" % ValidMutationsCount) 229 MiscUtil.PrintInfo("Number of ignored mutations: %d" % (MutationsCount - ValidMutationsCount)) 230 231 if not len(SpecifiedMutationsInfo): 232 MiscUtil.PrintError("No valid mutations, \"%s\" specified using \"-m, --mutations\" option." % (OptionsInfo["Mutations"])) 233 234 OptionsInfo["SpecifiedMutationsInfo"] = SpecifiedMutationsInfo 235 236 def LoadMolecule(Infile, MolName): 237 """Load input file.""" 238 239 pymol.cmd.reinitialize() 240 pymol.cmd.load(Infile, MolName) 241 242 def DeleteMolecule(MolName): 243 """Delete molecule.""" 244 245 pymol.cmd.delete(MolName) 246 247 def ProcessOptions(): 248 """Process and validate command line arguments and options.""" 249 250 MiscUtil.PrintInfo("Processing options...") 251 252 # Validate options... 253 ValidateOptions() 254 255 OptionsInfo["Infile"] = Options["--infile"] 256 FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"]) 257 OptionsInfo["InfileRoot"] = FileName 258 259 OptionsInfo["Overwrite"] = Options["--overwrite"] 260 OptionsInfo["Outfile"] = Options["--outfile"] 261 262 RetrieveChainsIDs() 263 264 Mutations = Options["--mutations"] 265 if re.match("^None$", Mutations, re.I): 266 MiscUtil.PrintError("No mutations specified using \"-m, --mutations\" option.") 267 268 OptionsInfo["Mutations"] = Options["--mutations"] 269 ProcessSpecifiedMutations() 270 271 def RetrieveOptions(): 272 """Retrieve command line arguments and options.""" 273 274 # Get options... 275 global Options 276 Options = docopt(_docoptUsage_) 277 278 # Set current working directory to the specified directory... 279 WorkingDir = Options["--workingdir"] 280 if WorkingDir: 281 os.chdir(WorkingDir) 282 283 # Handle examples option... 284 if "--examples" in Options and Options["--examples"]: 285 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 286 sys.exit(0) 287 288 def ValidateOptions(): 289 """Validate option values""" 290 291 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 292 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb") 293 294 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 295 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb") 296 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 297 298 # Setup a usage string for docopt... 299 _docoptUsage_ = """ 300 PyMOLMutateAminoAcids.py - Mutate amino acids 301 302 Usage: 303 PyMOLMutateAminoAcids.py [--mutations <Spec1,Spec2,...>] 304 [--overwrite] [-w <dir>] -i <infile> -o <outfile> 305 PyMOLMutateAminoAcids.py -h | --help | -e | --examples 306 307 Description: 308 Mutate amino acids in macromolecules. The mutations are performed using 309 protein mutagenesis wizard available in PyMOL. 310 311 The supported input and output file format is: PDB (.pdb) 312 313 Options: 314 -m, --mutations <Spec1,Spec2,...> [default: None] 315 Comma delimited list of specifications for mutating amino acid residues 316 in proteins. 317 318 The format of mutation specification is as follows: 319 320 <ChainID>:<ResName><ResNum><ResName>,... 321 322 A chain ID in the first specification of a mutation is required. It may be 323 skipped in subsequent specifications. The most recent chain ID is used 324 for the missing chain ID. The first reside name corresponds to the residue 325 to be mutated. The second residue name represents the new residue. 326 The residue number corresponds to the first residue name and must be 327 present in the current chain. 328 329 Examples: 330 331 E:LEU49CYS, E:SER53TYR 332 E:LEU49CYS, SER53TYR 333 E:LEU49CYS, SER53TYR, I:TYR7SER, ILE11VAL 334 335 The residue names must be valid amino acid names. No validation is 336 performed before mutating residues via protein mutagenesis wizard 337 available in PyMOL. 338 -e, --examples 339 Print examples. 340 -h, --help 341 Print this help message. 342 -i, --infile <infile> 343 Input file name. 344 -o, --outfile <outfile> 345 Output file name. 346 --overwrite 347 Overwrite existing files. 348 -w, --workingdir <dir> 349 Location of working directory which defaults to the current directory. 350 351 Examples: 352 To mutate a single residue in a specific chain and write a PDB file, type: 353 354 % PyMOLMutateAminoAcids.py -m "I:TYR7SER" -i Sample3.pdb 355 -o Sample3Out.pdb 356 357 To mutate multiple residues in a single chain and write a PDB file, type: 358 359 % PyMOLMutateAminoAcids.py -m "I:TYR7SER, ILE11VAL" -i Sample3.pdb 360 -o Sample3Out.pdb 361 362 To mutate multiple residues across multiple chains and write a PDB file, type: 363 364 % PyMOLMutateAminoAcids.py -m "E:LEU49CYS,SER53TYR,I:TYR7SER,ILE11VAL" 365 -i Sample3.pdb -o Sample3Out.pdb 366 367 Author: 368 Manish Sud(msud@san.rr.com) 369 370 See also: 371 DownloadPDBFiles.pl, PyMOLMutateNucleicAcids.py, 372 PyMOLVisualizeMacromolecules.py 373 374 Copyright: 375 Copyright (C) 2025 Manish Sud. All rights reserved. 376 377 The functionality available in this script is implemented using PyMOL, a 378 molecular visualization system on an open source foundation originally 379 developed by Warren DeLano. 380 381 This file is part of MayaChemTools. 382 383 MayaChemTools is free software; you can redistribute it and/or modify it under 384 the terms of the GNU Lesser General Public License as published by the Free 385 Software Foundation; either version 3 of the License, or (at your option) any 386 later version. 387 388 """ 389 390 if __name__ == "__main__": 391 main()