1 #!/bin/env python 2 # 3 # File: PyMOLMutateNucleicAcids.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 38 # PyMOL imports... 39 try: 40 import pymol 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(['pymol', '-ckq']) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 try: 52 from docopt import docopt 53 import MiscUtil 54 import PyMOLUtil 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 58 sys.exit(1) 59 60 ScriptName = os.path.basename(sys.argv[0]) 61 Options = {} 62 OptionsInfo = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 PerformMutagenesis() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def PerformMutagenesis(): 84 """Mutate specified residues across chains and generate an output file.""" 85 86 MiscUtil.PrintInfo("\nApplying mutations...") 87 88 # Load macromolecule from input file... 89 MolName = OptionsInfo["InfileRoot"] 90 LoadMolecule(OptionsInfo["Infile"], MolName) 91 92 # Apply mutations... 93 for Mutation, ChainID, ResNum, NewBaseName in OptionsInfo["SpecifiedMutationsInfo"] : 94 ApplyMutation(Mutation, MolName, ChainID, ResNum, NewBaseName) 95 96 # Generate output file... 97 Outfile = OptionsInfo["Outfile"] 98 MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile) 99 pymol.cmd.save(Outfile, MolName) 100 101 # Delete macromolecule... 102 DeleteMolecule(MolName) 103 104 def ApplyMutation(Mutation, MolName, ChainID, ResNum, NewBaseName): 105 """Apply mutatation.""" 106 107 MiscUtil.PrintInfo("\nApplying mutation %s" % Mutation) 108 109 # Setup wizard for nucleic acids mutagenesis... 110 try: 111 pymol.cmd.wizard("nucmutagenesis") 112 except pymol.CmdException as ErrMsg: 113 MiscUtil.PrintError("The nucleic acids mutageneis wizard is not available in your PyMOL environment.") 114 115 pymol.cmd.refresh_wizard() 116 117 # Setup residue to be mutated... 118 ResSelection = "/%s//%s/%s" % (MolName, ChainID, ResNum) 119 pymol.cmd.get_wizard().do_select(ResSelection) 120 121 # Setup new mutated residue... 122 pymol.cmd.get_wizard().set_mode("%s" % NewBaseName) 123 124 # Mutate... 125 pymol.cmd.get_wizard().apply() 126 127 # Quit wizard... 128 pymol.cmd.set_wizard() 129 130 def RetrieveChainsIDs(): 131 """Retrieve chain IDs.""" 132 133 MolName = OptionsInfo["InfileRoot"] 134 Infile = OptionsInfo["Infile"] 135 136 MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile) 137 138 LoadMolecule(Infile, MolName) 139 140 ChainIDs = PyMOLUtil.GetChains(MolName) 141 142 DeleteMolecule(MolName) 143 144 if ChainIDs is None: 145 ChainIDs = [] 146 147 # Print out chain and ligand IDs... 148 ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None" 149 MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo) 150 151 OptionsInfo["ChainIDs"] = ChainIDs 152 153 def ProcessSpecifiedMutations(): 154 """Process specified mutations.""" 155 156 MiscUtil.PrintInfo("\nProcessing specified mutations...") 157 158 CanonicalBaseNameMap = {'ADENINE': 'Adenine', 'CYTOSINE': 'Cytosine', 'GUANINE': 'Guanine', 'THYMINE': 'Thymine', 'URACIL': 'Uracil', 'ADE': 'Adenine', 'CYT': 'Cytosine', 'GUA': 'Guanine', 'THY': 'Thymine', 'URA': 'Uracil'} 159 160 SpecifiedMutationsInfo = [] 161 162 Mutations = re.sub(" ", "", OptionsInfo["Mutations"]) 163 MutationsWords = Mutations.split(",") 164 if not len(MutationsWords): 165 MiscUtil.PrintError("The number of comma delimited mutations specified using \"-m, --mutations\" option, \"%s\", must be > 0." % (OptionsInfo["Mutations"])) 166 167 # Load macromolecule from input file... 168 MolName = OptionsInfo["InfileRoot"] 169 LoadMolecule(OptionsInfo["Infile"], MolName) 170 171 FirstMutation = True 172 CurrentChainID = None 173 CanonicalMutationMap = {} 174 MutationsCount, ValidMutationsCount = [0] * 2 175 176 for Mutation in MutationsWords: 177 MutationsCount += 1 178 if not len(Mutation): 179 MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option is empty.\nIgnoring mutation..." % (Mutation)) 180 continue 181 182 # Match with a chain ID... 183 MatchedResults = re.match(r"^([a-z0-9]+):([0-9]+)([a-z]+)$", Mutation, re.I) 184 if not MatchedResults: 185 # Match without a chain ID... 186 MatchedResults = re.match(r"^([0-9]+)([a-z]+)$", Mutation, re.I) 187 188 if not MatchedResults: 189 MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResNum><BaseName> or <ResNum><BaseName>\nIgnoring mutation..." % (Mutation)) 190 continue 191 192 NumOfMatchedGroups = len(MatchedResults.groups()) 193 if NumOfMatchedGroups == 2: 194 ResNum, NewBaseName = MatchedResults.groups() 195 elif NumOfMatchedGroups == 3: 196 CurrentChainID, ResNum, NewBaseName = MatchedResults.groups() 197 else: 198 MiscUtil.PrintWarning("The format of mutation, \"%s\", specified using \"-m, --mutations\" option is not valid. Supported format: <ChainID>:<ResNum><BaseName> or <ResNum><BaseName>\nIgnoring mutation..." % (Mutation)) 199 continue 200 201 if FirstMutation: 202 FirstMutation = False 203 if CurrentChainID is None: 204 MiscUtil.PrintError("The first mutation, \"%s\", specified using \"-m, --mutations\" option must be colon delimited and contain only two values, the first value corresponding to chain ID" % (Mutation)) 205 206 CanonicalBaseName = NewBaseName.upper() 207 if CanonicalBaseName in CanonicalBaseNameMap: 208 NewBaseName = CanonicalBaseNameMap[CanonicalBaseName] 209 210 # Check for duplicate mutation specifications... 211 MutationSpec = "%s:%s%s" % (CurrentChainID, ResNum, NewBaseName) 212 CanonicalMutation = MutationSpec.lower() 213 if CanonicalMutation in CanonicalMutationMap: 214 MiscUtil.PrintWarning("The mutation, \"%s\", specified using \"-m, --mutations\" option already exist for the current chain ID %s.\nIgnoring mutation..." % (Mutation, CurrentChainID)) 215 continue 216 CanonicalMutationMap[CanonicalMutation] = Mutation 217 218 # Is ResNum and BaseName present in input file? 219 SelectionCmd = "%s and chain %s and resi %s" % (MolName, CurrentChainID, ResNum) 220 ResiduesInfo = PyMOLUtil.GetSelectionResiduesInfo(SelectionCmd) 221 if (ResiduesInfo is None) or (not len(ResiduesInfo["ResNames"])): 222 MiscUtil.PrintWarning("The residue number, %s, in mutation, \"%s\", specified using \"-m, --mutations\" option appears to be missing in input file.\nIgnoring mutation..." % (ResNum, Mutation)) 223 continue 224 225 ValidMutationsCount += 1 226 227 # Track mutation information... 228 SpecifiedMutationsInfo.append([Mutation, CurrentChainID, ResNum, NewBaseName]) 229 230 # Delete macromolecule... 231 DeleteMolecule(MolName) 232 233 MiscUtil.PrintInfo("\nTotal number of mutations: %d" % MutationsCount) 234 MiscUtil.PrintInfo("Number of valid mutations: %d" % ValidMutationsCount) 235 MiscUtil.PrintInfo("Number of ignored mutations: %d" % (MutationsCount - ValidMutationsCount)) 236 237 if not len(SpecifiedMutationsInfo): 238 MiscUtil.PrintError("No valid mutations, \"%s\" specified using \"-m, --mutations\" option." % (OptionsInfo["Mutations"])) 239 240 OptionsInfo["SpecifiedMutationsInfo"] = SpecifiedMutationsInfo 241 242 def LoadMolecule(Infile, MolName): 243 """Load input file.""" 244 245 pymol.cmd.reinitialize() 246 pymol.cmd.load(Infile, MolName) 247 248 def DeleteMolecule(MolName): 249 """Delete molecule.""" 250 251 pymol.cmd.delete(MolName) 252 253 def ProcessOptions(): 254 """Process and validate command line arguments and options.""" 255 256 MiscUtil.PrintInfo("Processing options...") 257 258 # Validate options... 259 ValidateOptions() 260 261 OptionsInfo["Infile"] = Options["--infile"] 262 FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"]) 263 OptionsInfo["InfileRoot"] = FileName 264 265 OptionsInfo["Overwrite"] = Options["--overwrite"] 266 OptionsInfo["Outfile"] = Options["--outfile"] 267 268 RetrieveChainsIDs() 269 270 Mutations = Options["--mutations"] 271 if re.match("^None$", Mutations, re.I): 272 MiscUtil.PrintError("No mutations specified using \"-m, --mutations\" option.") 273 274 OptionsInfo["Mutations"] = Options["--mutations"] 275 ProcessSpecifiedMutations() 276 277 def RetrieveOptions(): 278 """Retrieve command line arguments and options.""" 279 280 # Get options... 281 global Options 282 Options = docopt(_docoptUsage_) 283 284 # Set current working directory to the specified directory... 285 WorkingDir = Options["--workingdir"] 286 if WorkingDir: 287 os.chdir(WorkingDir) 288 289 # Handle examples option... 290 if "--examples" in Options and Options["--examples"]: 291 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 292 sys.exit(0) 293 294 def ValidateOptions(): 295 """Validate option values.""" 296 297 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 298 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb") 299 300 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 301 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb") 302 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 303 304 # Setup a usage string for docopt... 305 _docoptUsage_ = """ 306 PyMOLMutateNucleicAcids.py - Mutate nucleic acids 307 308 Usage: 309 PyMOLMutateNucleicAcids.py [--mutations <Spec1,Spec2,...>] 310 [--overwrite] [-w <dir>] -i <infile> -o <outfile> 311 PyMOLMutateNucleicAcids.py -h | --help | -e | --examples 312 313 Description: 314 Mutate nucleic acids in macromolecules. The mutations are performed using 315 nucleic acids mutagenesis wizard available in PyMOL starting V2.2. 316 317 The supported input and output file format is: PDB (.pdb) 318 319 Options: 320 -m, --mutations <Spec1,Spec2,...> [default: None] 321 Comma delimited list of specifications for mutating nucleic acids. 322 323 The format of mutation specification is as follows: 324 325 <ChainID>:<ResNum><BaseName>,... 326 327 A chain ID in the first specification of a mutation is required. It may be 328 skipped in subsequent specifications. The most recent chain ID is used 329 for the missing chain ID. The residue number corresponds to the residue 330 to be mutated and must be present in the current chain. The base name 331 represents the new base. 332 333 Examples: 334 335 A:9Thy, A:10Thy 336 A:9Thy,10Thy,11Thy 337 A:9Thy,10Thy,B:5Ade,6Ade 338 339 The base names must be valid for mutating nucleic acids. No validation 340 validation is performed before mutating residues via nucleic acids 341 mutagenesis wizard available in PyMOL. The current version of the 342 wizard supports the following base names: 343 344 Adenine, Ade 345 Cytosine, Cyt 346 Guanine, Gua 347 Thymine, Thy 348 Uracil, Ura 349 350 -e, --examples 351 Print examples. 352 -h, --help 353 Print this help message. 354 -i, --infile <infile> 355 Input file name. 356 -o, --outfile <outfile> 357 Output file name. 358 --overwrite 359 Overwrite existing files. 360 -w, --workingdir <dir> 361 Location of working directory which defaults to the current directory. 362 363 Examples: 364 To mutate a single residue in a specific chain and write a PDB file, type: 365 366 % PyMOLMutateNucleicAcids.py -m "A:9Thy" -i Sample9.pdb 367 -o Sample9Out.pdb 368 369 To mutate multiple residues in a single chain and write a PDB file, type: 370 371 % PyMOLMutateNucleicAcids.py -m "A:9Thy,10Thy,11Thy" -i Sample9.pdb 372 -o Sample9Out.pdb 373 374 To mutate multiple residues across multiple chains and write a PDB file, type: 375 376 % PyMOLMutateNucleicAcids.py -m "A:9Thy,10Thy,B:5Ade,6Ade" 377 -i Sample9.pdb -o Sample9Out.pdb 378 379 Author: 380 Manish Sud(msud@san.rr.com) 381 382 See also: 383 DownloadPDBFiles.pl, PyMOLMutateAminoAcids.py, 384 PyMOLVisualizeMacromolecules.py 385 386 Copyright: 387 Copyright (C) 2025 Manish Sud. All rights reserved. 388 389 The functionality available in this script is implemented using PyMOL, a 390 molecular visualization system on an open source foundation originally 391 developed by Warren DeLano. 392 393 This file is part of MayaChemTools. 394 395 MayaChemTools is free software; you can redistribute it and/or modify it under 396 the terms of the GNU Lesser General Public License as published by the Free 397 Software Foundation; either version 3 of the License, or (at your option) any 398 later version. 399 400 """ 401 402 if __name__ == "__main__": 403 main()