1 #!/bin/env python 2 # 3 # File: PyMOLMutateAminoAcids.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 import os 33 import sys 34 import time 35 import re 36 37 # PyMOL imports... 38 try: 39 import pymol 40 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(["pymol", "-ckq"]) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 52 try: 53 from docopt import docopt 54 import MiscUtil 55 import PyMOLUtil 56 except ImportError as ErrMsg: 57 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 58 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 59 sys.exit(1) 60 61 ScriptName = os.path.basename(sys.argv[0]) 62 Options = {} 63 OptionsInfo = {} 64 65 66 def main(): 67 """Start execution of the script.""" 68 69 MiscUtil.PrintInfo( 70 "\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" 71 % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()) 72 ) 73 74 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 75 76 # Retrieve command line arguments and options... 77 RetrieveOptions() 78 79 # Process and validate command line arguments and options... 80 ProcessOptions() 81 82 # Perform actions required by the script... 83 PerformMutagenesis() 84 85 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 86 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 87 88 89 def PerformMutagenesis(): 90 """Mutate specified residues across chains and generate an output file.""" 91 92 MiscUtil.PrintInfo("\nApplying mutations...") 93 94 # Load macromolecule from input file... 95 MolName = OptionsInfo["InfileRoot"] 96 LoadMolecule(OptionsInfo["Infile"], MolName) 97 98 # Apply mutations... 99 for Mutation, ChainID, ResName, ResNum, NewResName in OptionsInfo["SpecifiedMutationsInfo"]: 100 ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName) 101 102 # Generate output file... 103 Outfile = OptionsInfo["Outfile"] 104 MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile) 105 pymol.cmd.save(Outfile, MolName) 106 107 # Delete macromolecule... 108 DeleteMolecule(MolName) 109 110 111 def ApplyMutation(Mutation, MolName, ChainID, ResName, ResNum, NewResName): 112 """Apply mutatation.""" 113 114 MiscUtil.PrintInfo("\nApplying mutation %s" % Mutation) 115 116 # Setup wizard for amino acid mutagenesis... 117 pymol.cmd.wizard("mutagenesis") 118 119 pymol.cmd.refresh_wizard() 120 121 # Setup residue to be mutated... 122 ResSelection = "/%s//%s/%s" % (MolName, ChainID, ResNum) 123 pymol.cmd.get_wizard().do_select(ResSelection) 124 125 # Setup new mutated residue... 126 pymol.cmd.get_wizard().set_mode("%s" % NewResName) 127 128 # Mutate... 129 pymol.cmd.get_wizard().apply() 130 131 # Quit wizard... 132 pymol.cmd.set_wizard() 133 134 135 def RetrieveChainsIDs(): 136 """Retrieve chain IDs.""" 137 138 MolName = OptionsInfo["InfileRoot"] 139 Infile = OptionsInfo["Infile"] 140 141 MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile) 142 143 LoadMolecule(Infile, MolName) 144 145 ChainIDs = PyMOLUtil.GetChains(MolName) 146 147 DeleteMolecule(MolName) 148 149 if ChainIDs is None: 150 ChainIDs = [] 151 152 # Print out chain and ligand IDs... 153 ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None" 154 MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo) 155 156 OptionsInfo["ChainIDs"] = ChainIDs 157 158 159 def ProcessSpecifiedMutations(): 160 """Process specified mutations.""" 161 162 MiscUtil.PrintInfo("\nProcessing specified mutations...") 163 164 SpecifiedMutationsInfo = [] 165 166 Mutations = re.sub(" ", "", OptionsInfo["Mutations"]) 167 MutationsWords = Mutations.split(",") 168 if not len(MutationsWords): 169 MiscUtil.PrintError( 170 'The number of comma delimited mutations specified using "-m, --mutations" option, "%s", must be > 0.' 171 % (OptionsInfo["Mutations"]) 172 ) 173 174 # Load macromolecule from input file... 175 MolName = OptionsInfo["InfileRoot"] 176 LoadMolecule(OptionsInfo["Infile"], MolName) 177 178 FirstMutation = True 179 CurrentChainID = None 180 CanonicalMutationMap = {} 181 MutationsCount, ValidMutationsCount = [0] * 2 182 183 for Mutation in MutationsWords: 184 MutationsCount += 1 185 if not len(Mutation): 186 MiscUtil.PrintWarning( 187 'The mutation, "%s", specified using "-m, --mutations" option is empty.\nIgnoring mutation...' 188 % (Mutation) 189 ) 190 continue 191 192 # Match with a chain ID... 193 MatchedResults = re.match(r"^([a-z0-9]+):([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I) 194 if not MatchedResults: 195 # Match without a chain ID... 196 MatchedResults = re.match(r"^([a-z]+)([0-9]+)([a-z]+)$", Mutation, re.I) 197 198 if not MatchedResults: 199 MiscUtil.PrintWarning( 200 'The format of mutation, "%s", specified using "-m, --mutations" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation...' 201 % (Mutation) 202 ) 203 continue 204 205 NumOfMatchedGroups = len(MatchedResults.groups()) 206 if NumOfMatchedGroups == 3: 207 ResName, ResNum, NewResName = MatchedResults.groups() 208 elif NumOfMatchedGroups == 4: 209 CurrentChainID, ResName, ResNum, NewResName = MatchedResults.groups() 210 else: 211 MiscUtil.PrintWarning( 212 'The format of mutation, "%s", specified using "-m, --mutations" option is not valid. Supported format: <ChainID>:<ResName><ResNum><ResName> or <ResName><ResNum><ResName>\nIgnoring mutation...' 213 % (Mutation) 214 ) 215 continue 216 217 ResName = ResName.upper() 218 NewResName = NewResName.upper() 219 220 if FirstMutation: 221 FirstMutation = False 222 if CurrentChainID is None: 223 MiscUtil.PrintError( 224 'The first mutation, "%s", specified using "-m, --mutations" option must be colon delimited and contain only two values, the first value corresponding to chain ID' 225 % (Mutation) 226 ) 227 228 # Check for duplicate mutation specifications... 229 MutationSpec = "%s:%s%s%s" % (CurrentChainID, ResName, ResNum, NewResName) 230 CanonicalMutation = MutationSpec.lower() 231 if CanonicalMutation in CanonicalMutationMap: 232 MiscUtil.PrintWarning( 233 'The mutation, "%s", specified using "-m, --mutations" option already exist for the current chain ID %s.\nIgnoring mutation...' 234 % (Mutation, CurrentChainID) 235 ) 236 continue 237 CanonicalMutationMap[CanonicalMutation] = CanonicalMutation 238 239 # Is ResNum and ResName present in input file? 240 SelectionCmd = "%s and chain %s and resi %s and resn %s" % (MolName, CurrentChainID, ResNum, ResName) 241 ResiduesInfo = PyMOLUtil.GetSelectionResiduesInfo(SelectionCmd) 242 if (ResiduesInfo is None) or (not len(ResiduesInfo["ResNames"])): 243 MiscUtil.PrintWarning( 244 'The residue name, %s, and residue number, %s, in mutation, "%s", specified using "-m, --mutations" option appears to be missing in input file.\nIgnoring mutation...' 245 % (ResName, ResNum, Mutation) 246 ) 247 continue 248 249 ValidMutationsCount += 1 250 251 # Track mutation information... 252 SpecifiedMutationsInfo.append([Mutation, CurrentChainID, ResName, ResNum, NewResName]) 253 254 # Delete macromolecule... 255 DeleteMolecule(MolName) 256 257 MiscUtil.PrintInfo("\nTotal number of mutations: %d" % MutationsCount) 258 MiscUtil.PrintInfo("Number of valid mutations: %d" % ValidMutationsCount) 259 MiscUtil.PrintInfo("Number of ignored mutations: %d" % (MutationsCount - ValidMutationsCount)) 260 261 if not len(SpecifiedMutationsInfo): 262 MiscUtil.PrintError( 263 'No valid mutations, "%s" specified using "-m, --mutations" option.' % (OptionsInfo["Mutations"]) 264 ) 265 266 OptionsInfo["SpecifiedMutationsInfo"] = SpecifiedMutationsInfo 267 268 269 def LoadMolecule(Infile, MolName): 270 """Load input file.""" 271 272 pymol.cmd.reinitialize() 273 pymol.cmd.load(Infile, MolName) 274 275 276 def DeleteMolecule(MolName): 277 """Delete molecule.""" 278 279 pymol.cmd.delete(MolName) 280 281 282 def ProcessOptions(): 283 """Process and validate command line arguments and options.""" 284 285 MiscUtil.PrintInfo("Processing options...") 286 287 # Validate options... 288 ValidateOptions() 289 290 OptionsInfo["Infile"] = Options["--infile"] 291 FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"]) 292 OptionsInfo["InfileRoot"] = FileName 293 294 OptionsInfo["Overwrite"] = Options["--overwrite"] 295 OptionsInfo["Outfile"] = Options["--outfile"] 296 297 RetrieveChainsIDs() 298 299 Mutations = Options["--mutations"] 300 if re.match("^None$", Mutations, re.I): 301 MiscUtil.PrintError('No mutations specified using "-m, --mutations" option.') 302 303 OptionsInfo["Mutations"] = Options["--mutations"] 304 ProcessSpecifiedMutations() 305 306 307 def RetrieveOptions(): 308 """Retrieve command line arguments and options.""" 309 310 # Get options... 311 global Options 312 Options = docopt(_docoptUsage_) 313 314 # Set current working directory to the specified directory... 315 WorkingDir = Options["--workingdir"] 316 if WorkingDir: 317 os.chdir(WorkingDir) 318 319 # Handle examples option... 320 if "--examples" in Options and Options["--examples"]: 321 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 322 sys.exit(0) 323 324 325 def ValidateOptions(): 326 """Validate option values""" 327 328 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 329 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb") 330 331 MiscUtil.ValidateOptionsDistinctFileNames( 332 "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"] 333 ) 334 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb") 335 MiscUtil.ValidateOptionsOutputFileOverwrite( 336 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"] 337 ) 338 339 340 # Setup a usage string for docopt... 341 _docoptUsage_ = """ 342 PyMOLMutateAminoAcids.py - Mutate amino acids 343 344 Usage: 345 PyMOLMutateAminoAcids.py [--mutations <Spec1,Spec2,...>] 346 [--overwrite] [-w <dir>] -i <infile> -o <outfile> 347 PyMOLMutateAminoAcids.py -h | --help | -e | --examples 348 349 Description: 350 Mutate amino acids in macromolecules. The mutations are performed using 351 protein mutagenesis wizard available in PyMOL. 352 353 The supported input and output file format is: PDB (.pdb) 354 355 Options: 356 -m, --mutations <Spec1,Spec2,...> [default: None] 357 Comma delimited list of specifications for mutating amino acid residues 358 in proteins. 359 360 The format of mutation specification is as follows: 361 362 <ChainID>:<ResName><ResNum><ResName>,... 363 364 A chain ID in the first specification of a mutation is required. It may be 365 skipped in subsequent specifications. The most recent chain ID is used 366 for the missing chain ID. The first reside name corresponds to the residue 367 to be mutated. The second residue name represents the new residue. 368 The residue number corresponds to the first residue name and must be 369 present in the current chain. 370 371 Examples: 372 373 E:LEU49CYS, E:SER53TYR 374 E:LEU49CYS, SER53TYR 375 E:LEU49CYS, SER53TYR, I:TYR7SER, ILE11VAL 376 377 The residue names must be valid amino acid names. No validation is 378 performed before mutating residues via protein mutagenesis wizard 379 available in PyMOL. 380 -e, --examples 381 Print examples. 382 -h, --help 383 Print this help message. 384 -i, --infile <infile> 385 Input file name. 386 -o, --outfile <outfile> 387 Output file name. 388 --overwrite 389 Overwrite existing files. 390 -w, --workingdir <dir> 391 Location of working directory which defaults to the current directory. 392 393 Examples: 394 To mutate a single residue in a specific chain and write a PDB file, type: 395 396 % PyMOLMutateAminoAcids.py -m "I:TYR7SER" -i Sample3.pdb 397 -o Sample3Out.pdb 398 399 To mutate multiple residues in a single chain and write a PDB file, type: 400 401 % PyMOLMutateAminoAcids.py -m "I:TYR7SER, ILE11VAL" -i Sample3.pdb 402 -o Sample3Out.pdb 403 404 To mutate multiple residues across multiple chains and write a PDB file, type: 405 406 % PyMOLMutateAminoAcids.py -m "E:LEU49CYS,SER53TYR,I:TYR7SER,ILE11VAL" 407 -i Sample3.pdb -o Sample3Out.pdb 408 409 Author: 410 Manish Sud(msud@san.rr.com) 411 412 See also: 413 DownloadPDBFiles.pl, PyMOLMutateNucleicAcids.py, 414 PyMOLVisualizeMacromolecules.py 415 416 Copyright: 417 Copyright (C) 2025 Manish Sud. All rights reserved. 418 419 The functionality available in this script is implemented using PyMOL, a 420 molecular visualization system on an open source foundation originally 421 developed by Warren DeLano. 422 423 This file is part of MayaChemTools. 424 425 MayaChemTools is free software; you can redistribute it and/or modify it under 426 the terms of the GNU Lesser General Public License as published by the Free 427 Software Foundation; either version 3 of the License, or (at your option) any 428 later version. 429 430 """ 431 432 if __name__ == "__main__": 433 main()