1 #!/bin/env python 2 # 3 # File: PyMOLCalculatePhiPsiAngles.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 38 # PyMOL imports... 39 try: 40 import pymol 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(['pymol', '-ckq']) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 try: 52 from docopt import docopt 53 import MiscUtil 54 import PyMOLUtil 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 58 sys.exit(1) 59 60 ScriptName = os.path.basename(sys.argv[0]) 61 Options = {} 62 OptionsInfo = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 CalculatePhiPsiAngles() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def CalculatePhiPsiAngles(): 84 """Calculate phi and psi angles for macromolecules containing amino acids.""" 85 86 SetupOutputFiles() 87 WriteColumnLabels() 88 89 Infile = OptionsInfo["Infile"] 90 MolName = OptionsInfo["InfileRoot"] 91 92 MiscUtil.PrintInfo("\nCalculating phi and psi torsion angles for input file %s..." % Infile) 93 94 # Load infile 95 pymol.cmd.load(Infile, MolName) 96 97 OutDelim = OptionsInfo["OutDelim"] 98 Precision = OptionsInfo["Precision"] 99 100 # Go over specified chain IDs.. 101 for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]: 102 # Write out information for combined file... 103 PhiPsiInfo = PyMOLUtil.GetPhiPsiResiduesInfo(MolName, ChainID, Categorize = True) 104 OptionsInfo["OutfileResCount"] += len(PhiPsiInfo["ResNums"]) 105 WritePhiPsiInfo(OptionsInfo["OutFH"], MolName, ChainID, PhiPsiInfo, OutDelim, Precision) 106 107 # Write out information for category fies... 108 if OptionsInfo["MultipleOutFiles"]: 109 PhiPsiInfoList = [] 110 GeneralPhiPsiInfo, GlycinePhiPsiInfo, ProlinePhiPsiInfo, PreProlinePhiPsiInfo = PyMOLUtil.GetPhiPsiCategoriesResiduesInfo(MolName, ChainID) 111 PhiPsiInfoList.extend([GeneralPhiPsiInfo, GlycinePhiPsiInfo, ProlinePhiPsiInfo, PreProlinePhiPsiInfo]) 112 113 for Index, Category in enumerate(OptionsInfo["Categories"]): 114 OptionsInfo["CategoriesResCount"][Category] += len(PhiPsiInfoList[Index]["ResNums"]) 115 WritePhiPsiInfo(OptionsInfo["CategoriesOutFHs"][Category], MolName, ChainID, PhiPsiInfoList[Index], OutDelim, Precision) 116 117 # Delete MolName object 118 pymol.cmd.delete(MolName) 119 120 # Close all files... 121 CloseOutputFiles() 122 123 # List number of phi and psi angles in output files... 124 MiscUtil.PrintInfo("\nNumber of phi and psi angles in output file %s: %d" % (OptionsInfo["Outfile"], OptionsInfo["OutfileResCount"])) 125 if OptionsInfo["MultipleOutFiles"]: 126 MiscUtil.PrintInfo("") 127 for Index, Category in enumerate(OptionsInfo["Categories"]): 128 MiscUtil.PrintInfo("Number of phi and psi angles in output file %s: %d" % (OptionsInfo["CategoriesOutfiles"][Category], OptionsInfo["CategoriesResCount"][Category])) 129 130 def WritePhiPsiInfo(OutFH, MolName, ChainID, PhiPsiResiduesInfo, OutDelim, Precision): 131 """Write out phi and psi information.""" 132 133 for ResNum in PhiPsiResiduesInfo["ResNums"]: 134 ResName = PhiPsiResiduesInfo["ResName"][ResNum] 135 Phi = "%.*f" % (Precision, PhiPsiResiduesInfo["Phi"][ResNum]) 136 Psi = "%.*f" % (Precision, PhiPsiResiduesInfo["Psi"][ResNum]) 137 Category = PhiPsiResiduesInfo["Category"][ResNum] 138 139 LineWords = [] 140 if OptionsInfo["OutChainID"]: 141 LineWords.append(ChainID) 142 143 LineWords.extend([ResNum, ResName, Phi, Psi]) 144 145 if OptionsInfo["OutCategory"]: 146 LineWords.append(Category) 147 148 Line = OutDelim.join(LineWords) 149 150 OutFH.write("%s\n" % Line) 151 152 def WriteColumnLabels(): 153 """Write out column labels.""" 154 155 OutDelim = OptionsInfo["OutDelim"] 156 157 ColLabels = [] 158 if OptionsInfo["OutChainID"]: 159 ColLabels.append("ChainID") 160 161 ColLabels.extend(["ResNum", "ResName", "Phi", "Psi"]) 162 163 if OptionsInfo["OutCategory"]: 164 ColLabels.append("Category") 165 166 Line = OutDelim.join(ColLabels) 167 168 # Write column labels for combined file... 169 OutFH = OptionsInfo["OutFH"] 170 OutFH.write("%s\n" % Line) 171 172 if not OptionsInfo["MultipleOutFiles"]: 173 return 174 175 # Write columns labels for category files... 176 for Category in OptionsInfo["Categories"]: 177 CategoryOutFH = OptionsInfo["CategoriesOutFHs"][Category] 178 CategoryOutFH.write("%s\n" % Line) 179 180 def SetupOutputFiles(): 181 """Open output files.""" 182 183 if OptionsInfo["MultipleOutFiles"]: 184 MiscUtil.PrintInfo("\nGenerating output files: %s" % (", ".join(OptionsInfo["OutfilesList"]))) 185 else: 186 MiscUtil.PrintInfo("\nGenerating output file %s..." % (OptionsInfo["Outfile"])) 187 188 # Open combined output file... 189 Outfile = OptionsInfo["Outfile"] 190 OutFH = open(Outfile, "w") 191 if OutFH is None: 192 MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile)) 193 OptionsInfo["OutFH"] = OutFH 194 OptionsInfo["OutfileResCount"] = 0 195 196 if not OptionsInfo["MultipleOutFiles"]: 197 return 198 199 # Open output files for different categories... 200 OptionsInfo["CategoriesOutFHs"] = {} 201 OptionsInfo["CategoriesResCount"] = {} 202 for Category in OptionsInfo["Categories"]: 203 CategoryOutfile = OptionsInfo["CategoriesOutfiles"][Category] 204 CategoryOutFH = open(CategoryOutfile, "w") 205 if CategoryOutfile is None: 206 MiscUtil.PrintError("Couldn't open output file: %s.\n" % (CategoryOutfile)) 207 208 OptionsInfo["CategoriesOutFHs"][Category] = CategoryOutFH 209 OptionsInfo["CategoriesResCount"][Category] = 0 210 211 def CloseOutputFiles(): 212 """Close output files.""" 213 214 OptionsInfo["OutFH"].close() 215 216 if not OptionsInfo["MultipleOutFiles"]: 217 return 218 219 for Category in OptionsInfo["Categories"]: 220 CategoryOutFH = OptionsInfo["CategoriesOutFHs"][Category] 221 CategoryOutFH.close() 222 223 def RetrieveInfileInfo(): 224 """Retrieve information for input file.""" 225 226 Infile = OptionsInfo["Infile"] 227 InfileRoot = OptionsInfo["InfileRoot"] 228 229 ChainsAndLigandsInfo = PyMOLUtil.GetChainsAndLigandsInfo(Infile, InfileRoot) 230 OptionsInfo["ChainsAndLigandsInfo"] = ChainsAndLigandsInfo 231 232 def ProcessChainIDs(): 233 """Process specified chain IDs for infile.""" 234 235 MiscUtil.PrintInfo("\nProcessing specified chain IDs for input file %s..." % OptionsInfo["Infile"]) 236 ChainsAndLigandsInfo = OptionsInfo["ChainsAndLigandsInfo"] 237 SpecifiedChainsAndLigandsInfo = PyMOLUtil.ProcessChainsAndLigandsOptionsInfo(ChainsAndLigandsInfo, "-c, --chainIDs", OptionsInfo["ChainIDs"], None, None) 238 239 OptionsInfo["SpecifiedChainsAndLigandsInfo"] = SpecifiedChainsAndLigandsInfo 240 241 MiscUtil.PrintInfo("Specified chain IDs: %s" % (", ".join(SpecifiedChainsAndLigandsInfo["ChainIDs"]))) 242 243 def SetupCategoryOutfiles(): 244 """Setup output file names for different categories of phi and psi angles.""" 245 246 # Initialize... 247 OptionsInfo["OutfilesList"] = [] 248 OptionsInfo["OutfilesList"].append(OptionsInfo["Outfile"]) 249 250 OptionsInfo["Categories"] = ["General", "Glycine", "Proline", "PreProline"] 251 OptionsInfo["CategoriesOutfiles"] = {} 252 for Category in OptionsInfo["Categories"]: 253 OptionsInfo["CategoriesOutfiles"][Category] = None 254 255 if not OptionsInfo["MultipleOutFiles"]: 256 return 257 258 FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Outfile"]) 259 OutfileRoot = FileName 260 OutfileExt = FileExt 261 262 for Category in OptionsInfo["Categories"]: 263 CategoryOutfile = "%s_%s.%s" % (OutfileRoot, Category, OutfileExt) 264 if os.path.exists(CategoryOutfile): 265 if not OptionsInfo["Overwrite"]: 266 MiscUtil.PrintError("The category output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (CategoryOutfile)) 267 268 OptionsInfo["CategoriesOutfiles"][Category] = CategoryOutfile 269 OptionsInfo["OutfilesList"].append(CategoryOutfile) 270 271 def ProcessOptions(): 272 """Process and validate command line arguments and options.""" 273 274 MiscUtil.PrintInfo("Processing options...") 275 276 # Validate options... 277 ValidateOptions() 278 279 OptionsInfo["Infile"] = Options["--infile"] 280 FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"]) 281 OptionsInfo["InfileRoot"] = FileName 282 283 OptionsInfo["Outfile"] = Options["--outfile"] 284 FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Outfile"]) 285 OptionsInfo["OutfileRoot"] = FileName 286 287 OptionsInfo["Overwrite"] = Options["--overwrite"] 288 289 OptionsInfo["OutDelim"] = " " 290 if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"): 291 OptionsInfo["OutDelim"] = "," 292 elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"): 293 OptionsInfo["OutDelim"] = "\t" 294 else: 295 MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"])) 296 297 OptionsInfo["OutMode"] = Options["--outMode"] 298 OptionsInfo["MultipleOutFiles"] = True if re.match("^MultipleFiles$", OptionsInfo["OutMode"], re.I) else False 299 300 OptionsInfo["OutChainID"] = True if re.match("^Yes$", Options["--outChainID"], re.I) else False 301 OptionsInfo["OutCategory"] = True if re.match("^Yes$", Options["--outCategory"], re.I) else False 302 303 OptionsInfo["Overwrite"] = Options["--overwrite"] 304 OptionsInfo["Precision"] = int(Options["--precision"]) 305 306 RetrieveInfileInfo() 307 308 OptionsInfo["ChainIDs"] = Options["--chainIDs"] 309 ProcessChainIDs() 310 311 SetupCategoryOutfiles() 312 313 def RetrieveOptions(): 314 """Retrieve command line arguments and options.""" 315 316 # Get options... 317 global Options 318 Options = docopt(_docoptUsage_) 319 320 # Set current working directory to the specified directory... 321 WorkingDir = Options["--workingdir"] 322 if WorkingDir: 323 os.chdir(WorkingDir) 324 325 # Handle examples option... 326 if "--examples" in Options and Options["--examples"]: 327 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 328 sys.exit(0) 329 330 def ValidateOptions(): 331 """Validate option values.""" 332 333 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 334 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb cif") 335 336 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "csv tsv txt") 337 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 338 339 MiscUtil.ValidateOptionTextValue("--outMode", Options["--outMode"], "SingleFile MultipleFiles") 340 MiscUtil.ValidateOptionTextValue("--outChainID", Options["--outChainID"], "yes no") 341 MiscUtil.ValidateOptionTextValue("--outCategory", Options["--outCategory"], "yes no") 342 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 343 344 # Setup a usage string for docopt... 345 _docoptUsage_ = """ 346 PyMOLCalculatePhiPsiAngles.py - Calculate phi and psi torsion angles 347 348 Usage: 349 PyMOLCalculatePhiPsiAngles.py [--chainIDs <First, All or ID1,ID2...>] 350 [--outMode <SingleFile or MultipleFies>] [--outChainID <yes or no>] 351 [--outCategory <yes or no>] [--overwrite] [--precision <number>] 352 [-w <dir>] -i <infile> -o <outfile> 353 PyMOLCalculatePhiPsiAngles.py -h | --help | -e | --examples 354 355 Description: 356 Calculate phi and psi torsion angels for amino acid residues present 357 in macromolecules. 358 359 The phi and psi angles are categorized into the following groups 360 corresponding to four types of Ramachandran plots: 361 362 General: All residues except glycine, proline, or pre-proline 363 Glycine: Only glycine residues 364 Proline: Only proline residues 365 Pre-Proline: Only residues before proline not including glycine or 366 proline 367 368 The supported input file format are: PDB (.pdb), mmCIF (.cif) 369 370 The supported output file formats are: CSV (.csv), TSV (.tsv, .txt) 371 372 Options: 373 -c, --chainIDs <First, All or ID1,ID2...> [default: All] 374 List of chain IDs to use for calculating phi and psi angles for residues 375 in chains. Possible values: First, All, or a comma delimited list of chain 376 IDs. The default is to use all chain IDs in input file. 377 -e, --examples 378 Print examples. 379 -h, --help 380 Print this help message. 381 -i, --infile <infile> 382 Input file name. 383 -o, --outfile <outfile> 384 Output file name for writing out calculated values. Supported text file 385 extensions: csv, tsv or txt. 386 387 In addition to the specified outfile containing phi and psi angles for all 388 residues, a set of additional output files is generated for 'MultipleFiles' 389 value of '--outMode' option. The names of these output files are 390 automatically generated from the the name of the specified output 391 file as shown below: 392 393 General: <OutfileRoot>_General.<OutfileExt> 394 Glycine: <OutfileRoot>_Glycine.<OutfileExt> 395 Proline: <OutfileRoot>_Proline.<OutfileExt> 396 Pre-Proline: <OutfileRoot>_PreProline.<OutfileExt> 397 398 --outMode <SingleFile or MultipleFiles> [default: SingleFile] 399 A single output file containing phi and psi angles for all residues or 400 multiple output files corresponding to different categories of angles. 401 402 The phi and psi angles are categorized into the following groups 403 corresponding to four types of Ramachandran plots: 404 405 General: All residues except glycine, proline, or pre-proline 406 Glycine: Only glycine residues 407 Proline: Only proline residues 408 Pre-Proline: Only residues before proline not including glycine or 409 proline 410 411 The output files contain the following information: 412 413 ChainID ResNum ResName Phi Psi Category 414 415 --outChainID <yes or no> [default: yes] 416 Write chain IDs to output file. 417 --outCategory <yes or no> [default: yes] 418 Write phi and psi category to output file. 419 --overwrite 420 Overwrite existing files. 421 -p, --precision <number> [default: 2] 422 Floating point precision for writing the calculated phi and psi angles. 423 -w, --workingdir <dir> 424 Location of working directory which defaults to the current directory. 425 426 Examples: 427 To calculate phi and psi angles for all residues across all chains in input 428 file and write out a single CSV file containing calculated values along with 429 chain IDs, residue names and numbers, and category of angles corresponding 430 to Ramachandran plots, type: 431 432 % PyMOLCalculatePhiPsiAngles.py -i Sample3.pdb -o Sample3Out.csv 433 434 To calculate phi and psi angles for all residues across all chains in input 435 file and write out a multiple CSV files corresponding to categories of angles 436 for Ramachandran plots along with other relevant information, type: 437 438 % PyMOLCalculatePhiPsiAngles.py --outMode MultipleFiles -i Sample3.pdb 439 -o Sample3Out.csv 440 441 To calculate phi and psi angles for all residues in a specific chain in input 442 file and write out a single TSV file containing calculated values along with 443 other relevant information, type: 444 445 % PyMOLCalculatePhiPsiAngles.py -c E -i Sample3.pdb -o Sample3Out.csv 446 447 To calculate phi and psi angles for all residues in a specific chain in input 448 file and write out a multiple TSV files containing calculated values at a specific 449 precision along with other relevant information, type: 450 451 % PyMOLCalculatePhiPsiAngles.py --outMode MultipleFiles --chainIDs I 452 -i Sample3.pdb -o Sample3Out.csv 453 454 Author: 455 Manish Sud(msud@san.rr.com) 456 457 See also: 458 DownloadPDBFiles.pl, PyMOLCalculateRMSD.py, PyMOLCalculateProperties.py, 459 PyMOLGenerateRamachandranPlots.py 460 461 Copyright: 462 Copyright (C) 2024 Manish Sud. All rights reserved. 463 464 The functionality available in this script is implemented using PyMOL, a 465 molecular visualization system on an open source foundation originally 466 developed by Warren DeLano. 467 468 This file is part of MayaChemTools. 469 470 MayaChemTools is free software; you can redistribute it and/or modify it under 471 the terms of the GNU Lesser General Public License as published by the Free 472 Software Foundation; either version 3 of the License, or (at your option) any 473 later version. 474 475 """ 476 477 if __name__ == "__main__": 478 main()