MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLCalculatePhiPsiAngles.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 # Add local python path to the global path and import standard library modules...
  33 import os
  34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  35 import time
  36 import re
  37 
  38 # PyMOL imports...
  39 try:
  40     import pymol
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(['pymol', '-ckq'])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 try:
  52     from docopt import docopt
  53     import MiscUtil
  54     import PyMOLUtil
  55 except ImportError as ErrMsg:
  56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  58     sys.exit(1)
  59 
  60 ScriptName = os.path.basename(sys.argv[0])
  61 Options = {}
  62 OptionsInfo = {}
  63 
  64 def main():
  65     """Start execution of the script."""
  66     
  67     MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76     
  77     # Perform actions required by the script...
  78     CalculatePhiPsiAngles()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def CalculatePhiPsiAngles():
  84     """Calculate phi and psi angles for macromolecules containing amino acids."""
  85     
  86     SetupOutputFiles()
  87     WriteColumnLabels()
  88     
  89     Infile = OptionsInfo["Infile"]
  90     MolName = OptionsInfo["InfileRoot"]
  91     
  92     MiscUtil.PrintInfo("\nCalculating phi and psi torsion angles for input file %s..." % Infile)
  93     
  94     # Load infile
  95     pymol.cmd.load(Infile, MolName)
  96 
  97     OutDelim = OptionsInfo["OutDelim"]
  98     Precision = OptionsInfo["Precision"]
  99 
 100     # Go over specified chain IDs..
 101     for ChainID in OptionsInfo["SpecifiedChainsAndLigandsInfo"]["ChainIDs"]:
 102         # Write out information for combined file...
 103         PhiPsiInfo = PyMOLUtil.GetPhiPsiResiduesInfo(MolName, ChainID, Categorize = True)
 104         OptionsInfo["OutfileResCount"] += len(PhiPsiInfo["ResNums"])
 105         WritePhiPsiInfo(OptionsInfo["OutFH"], MolName, ChainID, PhiPsiInfo, OutDelim, Precision)
 106 
 107         # Write out information for category fies...
 108         if OptionsInfo["MultipleOutFiles"]:
 109             PhiPsiInfoList = []
 110             GeneralPhiPsiInfo, GlycinePhiPsiInfo, ProlinePhiPsiInfo, PreProlinePhiPsiInfo = PyMOLUtil.GetPhiPsiCategoriesResiduesInfo(MolName, ChainID)
 111             PhiPsiInfoList.extend([GeneralPhiPsiInfo, GlycinePhiPsiInfo, ProlinePhiPsiInfo, PreProlinePhiPsiInfo])
 112             
 113             for Index, Category in enumerate(OptionsInfo["Categories"]):
 114                 OptionsInfo["CategoriesResCount"][Category] += len(PhiPsiInfoList[Index]["ResNums"])
 115                 WritePhiPsiInfo(OptionsInfo["CategoriesOutFHs"][Category], MolName, ChainID, PhiPsiInfoList[Index], OutDelim, Precision)
 116     
 117     # Delete MolName object
 118     pymol.cmd.delete(MolName)
 119 
 120     # Close all files...
 121     CloseOutputFiles()
 122 
 123     # List number of phi and psi angles in output files...
 124     MiscUtil.PrintInfo("\nNumber of phi and psi angles in output file %s: %d" % (OptionsInfo["Outfile"],  OptionsInfo["OutfileResCount"]))
 125     if OptionsInfo["MultipleOutFiles"]:
 126         MiscUtil.PrintInfo("")
 127         for Index, Category in enumerate(OptionsInfo["Categories"]):
 128             MiscUtil.PrintInfo("Number of phi and psi angles in output file %s: %d" % (OptionsInfo["CategoriesOutfiles"][Category], OptionsInfo["CategoriesResCount"][Category]))
 129 
 130 def WritePhiPsiInfo(OutFH, MolName, ChainID, PhiPsiResiduesInfo, OutDelim, Precision):
 131     """Write out phi and psi information."""
 132 
 133     for ResNum in PhiPsiResiduesInfo["ResNums"]:
 134         ResName = PhiPsiResiduesInfo["ResName"][ResNum]
 135         Phi = "%.*f" % (Precision, PhiPsiResiduesInfo["Phi"][ResNum])
 136         Psi = "%.*f" % (Precision, PhiPsiResiduesInfo["Psi"][ResNum])
 137         Category = PhiPsiResiduesInfo["Category"][ResNum]
 138 
 139         LineWords = []
 140         if OptionsInfo["OutChainID"]:
 141             LineWords.append(ChainID)
 142         
 143         LineWords.extend([ResNum, ResName, Phi, Psi])
 144         
 145         if OptionsInfo["OutCategory"]:
 146             LineWords.append(Category)
 147             
 148         Line = OutDelim.join(LineWords)
 149         
 150         OutFH.write("%s\n" % Line)
 151 
 152 def WriteColumnLabels():
 153     """Write out column labels."""
 154     
 155     OutDelim = OptionsInfo["OutDelim"]
 156 
 157     ColLabels = []
 158     if OptionsInfo["OutChainID"]:
 159         ColLabels.append("ChainID")
 160     
 161     ColLabels.extend(["ResNum", "ResName", "Phi", "Psi"])
 162     
 163     if OptionsInfo["OutCategory"]:
 164         ColLabels.append("Category")
 165     
 166     Line = OutDelim.join(ColLabels)
 167 
 168     # Write column labels for combined file...
 169     OutFH = OptionsInfo["OutFH"]
 170     OutFH.write("%s\n" % Line)
 171 
 172     if not OptionsInfo["MultipleOutFiles"]:
 173         return
 174     
 175     # Write columns labels for category files...
 176     for Category in OptionsInfo["Categories"]:
 177         CategoryOutFH = OptionsInfo["CategoriesOutFHs"][Category]
 178         CategoryOutFH.write("%s\n" % Line)
 179 
 180 def SetupOutputFiles():
 181     """Open output files."""
 182 
 183     if OptionsInfo["MultipleOutFiles"]:
 184         MiscUtil.PrintInfo("\nGenerating output files: %s" % (", ".join(OptionsInfo["OutfilesList"])))
 185     else:
 186         MiscUtil.PrintInfo("\nGenerating output file %s..." % (OptionsInfo["Outfile"]))
 187         
 188     # Open combined output file...
 189     Outfile = OptionsInfo["Outfile"]
 190     OutFH = open(Outfile, "w")
 191     if OutFH is None:
 192         MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile))
 193     OptionsInfo["OutFH"] = OutFH
 194     OptionsInfo["OutfileResCount"] = 0
 195 
 196     if not OptionsInfo["MultipleOutFiles"]:
 197         return
 198     
 199     # Open output files for different categories...
 200     OptionsInfo["CategoriesOutFHs"] = {}
 201     OptionsInfo["CategoriesResCount"] = {}
 202     for Category in OptionsInfo["Categories"]:
 203         CategoryOutfile = OptionsInfo["CategoriesOutfiles"][Category]
 204         CategoryOutFH = open(CategoryOutfile, "w")
 205         if CategoryOutfile is None:
 206             MiscUtil.PrintError("Couldn't open output file: %s.\n" % (CategoryOutfile))
 207         
 208         OptionsInfo["CategoriesOutFHs"][Category] = CategoryOutFH
 209         OptionsInfo["CategoriesResCount"][Category] = 0
 210     
 211 def CloseOutputFiles():
 212     """Close output files."""
 213     
 214     OptionsInfo["OutFH"].close()
 215     
 216     if not OptionsInfo["MultipleOutFiles"]:
 217         return
 218     
 219     for Category in OptionsInfo["Categories"]:
 220         CategoryOutFH = OptionsInfo["CategoriesOutFHs"][Category]
 221         CategoryOutFH.close()
 222 
 223 def RetrieveInfileInfo():
 224     """Retrieve information for input file."""
 225     
 226     Infile = OptionsInfo["Infile"]
 227     InfileRoot = OptionsInfo["InfileRoot"]
 228     
 229     ChainsAndLigandsInfo = PyMOLUtil.GetChainsAndLigandsInfo(Infile, InfileRoot)
 230     OptionsInfo["ChainsAndLigandsInfo"] = ChainsAndLigandsInfo
 231 
 232 def ProcessChainIDs():
 233     """Process specified chain IDs for infile."""
 234     
 235     MiscUtil.PrintInfo("\nProcessing specified chain IDs for input file %s..." % OptionsInfo["Infile"])        
 236     ChainsAndLigandsInfo = OptionsInfo["ChainsAndLigandsInfo"]
 237     SpecifiedChainsAndLigandsInfo = PyMOLUtil.ProcessChainsAndLigandsOptionsInfo(ChainsAndLigandsInfo, "-c, --chainIDs", OptionsInfo["ChainIDs"], None, None)
 238     
 239     OptionsInfo["SpecifiedChainsAndLigandsInfo"] = SpecifiedChainsAndLigandsInfo
 240     
 241     MiscUtil.PrintInfo("Specified chain IDs: %s" % (", ".join(SpecifiedChainsAndLigandsInfo["ChainIDs"])))
 242 
 243 def SetupCategoryOutfiles():
 244     """Setup output file names for different categories of phi and psi angles."""
 245 
 246     # Initialize...
 247     OptionsInfo["OutfilesList"] = []
 248     OptionsInfo["OutfilesList"].append(OptionsInfo["Outfile"])
 249     
 250     OptionsInfo["Categories"] = ["General", "Glycine", "Proline", "PreProline"]
 251     OptionsInfo["CategoriesOutfiles"] = {}
 252     for Category in OptionsInfo["Categories"]:
 253         OptionsInfo["CategoriesOutfiles"][Category] = None
 254 
 255     if not OptionsInfo["MultipleOutFiles"]:
 256         return
 257 
 258     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Outfile"])
 259     OutfileRoot = FileName
 260     OutfileExt = FileExt
 261     
 262     for Category in OptionsInfo["Categories"]:
 263         CategoryOutfile = "%s_%s.%s" % (OutfileRoot, Category, OutfileExt)
 264         if os.path.exists(CategoryOutfile):
 265             if not OptionsInfo["Overwrite"]:
 266                 MiscUtil.PrintError("The category output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (CategoryOutfile))
 267                 
 268         OptionsInfo["CategoriesOutfiles"][Category] = CategoryOutfile
 269         OptionsInfo["OutfilesList"].append(CategoryOutfile)
 270         
 271 def ProcessOptions():
 272     """Process and validate command line arguments and options."""
 273 
 274     MiscUtil.PrintInfo("Processing options...")
 275     
 276     # Validate options...
 277     ValidateOptions()
 278 
 279     OptionsInfo["Infile"] = Options["--infile"]
 280     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
 281     OptionsInfo["InfileRoot"] = FileName
 282     
 283     OptionsInfo["Outfile"] = Options["--outfile"]
 284     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Outfile"])
 285     OptionsInfo["OutfileRoot"] = FileName
 286     
 287     OptionsInfo["Overwrite"] = Options["--overwrite"]
 288     
 289     OptionsInfo["OutDelim"] = " "
 290     if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"):
 291         OptionsInfo["OutDelim"] = ","
 292     elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"):
 293         OptionsInfo["OutDelim"] = "\t"
 294     else:
 295         MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"]))
 296     
 297     OptionsInfo["OutMode"] = Options["--outMode"]
 298     OptionsInfo["MultipleOutFiles"] = True if re.match("^MultipleFiles$", OptionsInfo["OutMode"], re.I) else False
 299     
 300     OptionsInfo["OutChainID"] = True if re.match("^Yes$", Options["--outChainID"], re.I) else False
 301     OptionsInfo["OutCategory"] = True if re.match("^Yes$", Options["--outCategory"], re.I) else False
 302     
 303     OptionsInfo["Overwrite"] = Options["--overwrite"]
 304     OptionsInfo["Precision"] = int(Options["--precision"])
 305     
 306     RetrieveInfileInfo()
 307     
 308     OptionsInfo["ChainIDs"] = Options["--chainIDs"]
 309     ProcessChainIDs()
 310     
 311     SetupCategoryOutfiles()
 312 
 313 def RetrieveOptions(): 
 314     """Retrieve command line arguments and options."""
 315     
 316     # Get options...
 317     global Options
 318     Options = docopt(_docoptUsage_)
 319     
 320     # Set current working directory to the specified directory...
 321     WorkingDir = Options["--workingdir"]
 322     if WorkingDir:
 323         os.chdir(WorkingDir)
 324     
 325     # Handle examples option...
 326     if "--examples" in Options and Options["--examples"]:
 327         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 328         sys.exit(0)
 329 
 330 def ValidateOptions():
 331     """Validate option values."""
 332 
 333     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 334     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb cif")
 335 
 336     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "csv tsv txt")
 337     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 338     
 339     MiscUtil.ValidateOptionTextValue("--outMode", Options["--outMode"], "SingleFile MultipleFiles")
 340     MiscUtil.ValidateOptionTextValue("--outChainID", Options["--outChainID"], "yes no")
 341     MiscUtil.ValidateOptionTextValue("--outCategory", Options["--outCategory"], "yes no")
 342     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 343     
 344 # Setup a usage string for docopt...
 345 _docoptUsage_ = """
 346 PyMOLCalculatePhiPsiAngles.py - Calculate phi and psi torsion angles
 347 
 348 Usage:
 349     PyMOLCalculatePhiPsiAngles.py [--chainIDs <First, All or ID1,ID2...>]
 350                                   [--outMode <SingleFile or MultipleFies>] [--outChainID <yes or no>]
 351                                   [--outCategory <yes or no>] [--overwrite] [--precision <number>]
 352                                   [-w <dir>] -i <infile> -o <outfile>
 353     PyMOLCalculatePhiPsiAngles.py -h | --help | -e | --examples
 354 
 355 Description:
 356     Calculate phi and psi torsion angels for amino acid residues present
 357     in macromolecules.
 358 
 359     The phi and psi angles are categorized into the following groups
 360     corresponding to four types of Ramachandran plots:
 361     
 362         General: All residues except glycine, proline, or pre-proline
 363         Glycine: Only glycine residues
 364         Proline: Only proline residues
 365         Pre-Proline: Only residues before proline not including glycine or
 366             proline
 367     
 368     The supported input  file format are: PDB (.pdb), mmCIF (.cif)
 369 
 370     The supported output file formats are:  CSV (.csv), TSV (.tsv, .txt)
 371 
 372 Options:
 373     -c, --chainIDs <First, All or ID1,ID2...>  [default: All]
 374         List of chain IDs to use for calculating phi and psi angles for residues
 375         in chains. Possible values: First, All, or a comma delimited list of chain
 376         IDs. The default is to use all chain IDs in input file.
 377     -e, --examples
 378         Print examples.
 379     -h, --help
 380         Print this help message.
 381     -i, --infile <infile>
 382         Input file name.
 383     -o, --outfile <outfile>
 384         Output file name for writing out calculated values. Supported text file
 385         extensions: csv, tsv or txt.
 386         
 387         In addition to the specified outfile containing phi and psi angles for all
 388         residues, a set of additional output files is generated for 'MultipleFiles'
 389         value of '--outMode' option. The names of these output files are
 390         automatically generated from the the name of the specified output
 391         file as shown below:
 392         
 393             General: <OutfileRoot>_General.<OutfileExt>
 394             Glycine: <OutfileRoot>_Glycine.<OutfileExt>
 395             Proline: <OutfileRoot>_Proline.<OutfileExt>
 396             Pre-Proline: <OutfileRoot>_PreProline.<OutfileExt>
 397         
 398     --outMode <SingleFile or MultipleFiles>  [default: SingleFile]
 399         A single output file containing phi and psi angles for all residues or
 400         multiple output files corresponding to different categories of angles.
 401         
 402         The phi and psi angles are categorized into the following groups
 403         corresponding to four types of Ramachandran plots:
 404         
 405             General: All residues except glycine, proline, or pre-proline
 406             Glycine: Only glycine residues
 407             Proline: Only proline residues
 408             Pre-Proline: Only residues before proline not including glycine or
 409                 proline
 410         
 411         The output files contain the following information:
 412         
 413             ChainID ResNum ResName Phi Psi Category
 414         
 415     --outChainID <yes or no>  [default: yes]
 416         Write chain IDs to output file.
 417     --outCategory <yes or no>  [default: yes]
 418         Write phi and psi category to output file.
 419     --overwrite
 420         Overwrite existing files.
 421     -p, --precision <number>  [default: 2]
 422         Floating point precision for writing the calculated phi and psi angles.
 423     -w, --workingdir <dir>
 424         Location of working directory which defaults to the current directory.
 425 
 426 Examples:
 427     To calculate phi and psi angles for all residues across all chains in input
 428     file and write out a single CSV file containing calculated values along with
 429     chain IDs, residue names and numbers, and category of angles corresponding
 430     to Ramachandran plots, type:
 431 
 432         % PyMOLCalculatePhiPsiAngles.py -i Sample3.pdb -o Sample3Out.csv
 433 
 434     To calculate phi and psi angles for all residues across all chains in input
 435     file and write out a multiple CSV files corresponding to categories of angles
 436     for Ramachandran plots along with other relevant information, type:
 437 
 438         % PyMOLCalculatePhiPsiAngles.py --outMode MultipleFiles -i Sample3.pdb
 439           -o Sample3Out.csv
 440 
 441     To calculate phi and psi angles for all residues in a specific chain in input
 442     file and write out a single TSV file containing calculated values along with
 443     other relevant information, type:
 444 
 445         % PyMOLCalculatePhiPsiAngles.py -c E  -i Sample3.pdb -o Sample3Out.csv
 446 
 447     To calculate phi and psi angles for all residues in a specific chain in input
 448     file and write out a multiple TSV files containing calculated values at a specific
 449     precision along with other relevant information, type:
 450 
 451         % PyMOLCalculatePhiPsiAngles.py --outMode MultipleFiles --chainIDs I
 452           -i Sample3.pdb -o Sample3Out.csv
 453 
 454 Author:
 455     Manish Sud(msud@san.rr.com)
 456 
 457 See also:
 458     DownloadPDBFiles.pl, PyMOLCalculateRMSD.py, PyMOLCalculateProperties.py,
 459     PyMOLGenerateRamachandranPlots.py
 460 
 461 Copyright:
 462     Copyright (C) 2024 Manish Sud. All rights reserved.
 463 
 464     The functionality available in this script is implemented using PyMOL, a
 465     molecular visualization system on an open source foundation originally
 466     developed by Warren DeLano.
 467 
 468     This file is part of MayaChemTools.
 469 
 470     MayaChemTools is free software; you can redistribute it and/or modify it under
 471     the terms of the GNU Lesser General Public License as published by the Free
 472     Software Foundation; either version 3 of the License, or (at your option) any
 473     later version.
 474 
 475 """
 476 
 477 if __name__ == "__main__":
 478     main()