MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLAlignChains.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 # Add local python path to the global path and import standard library modules...
  33 import os
  34 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  35 import time
  36 import re
  37 
  38 # PyMOL imports...
  39 try:
  40     import pymol
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(['pymol', '-ckq'])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 try:
  52     from docopt import docopt
  53     import MiscUtil
  54     import PyMOLUtil
  55 except ImportError as ErrMsg:
  56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  58     sys.exit(1)
  59 
  60 ScriptName = os.path.basename(sys.argv[0])
  61 Options = {}
  62 OptionsInfo = {}
  63 
  64 def main():
  65     """Start execution of the script."""
  66     
  67     MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76 
  77     # Perform actions required by the script...
  78     PerformChainAlignment()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def PerformChainAlignment():
  84     """Align chains and write out new files."""
  85 
  86     MiscUtil.PrintInfo("\nGenerating output files...")
  87 
  88     # Load reffile for alignment..
  89     SetupAlignReference()
  90 
  91     # Perform alignment for each input file and write it out...
  92     for FileIndex in range(0, len(OptionsInfo["InfilesInfo"]["InfilesNames"])):
  93         SetupInputObject(FileIndex)
  94         AlignInputObject(FileIndex)
  95         WriteAlignedInputObject(FileIndex)
  96         DeleteInputObject(FileIndex)
  97 
  98     # Delete reference object...
  99     pymol.cmd.delete(OptionsInfo["RefFileInfo"]["PyMOLObjectName"])
 100     
 101 def SetupAlignReference():
 102     """Setup object for alignment reference."""
 103 
 104     RefFile = OptionsInfo["RefFileInfo"]["RefFileName"]
 105     RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"]
 106 
 107     pymol.cmd.load(RefFile, RefName)
 108 
 109 def SetupInputObject(FileIndex):
 110     """Setup a PyMOL object for input file."""
 111 
 112     InputFile = OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex]
 113     InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 114     
 115     pymol.cmd.load(InputFile, InputName)
 116 
 117 def AlignInputObject(FileIndex):
 118     """Align input object to reference object."""
 119     
 120     RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"]
 121     FitName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 122     
 123     MiscUtil.PrintInfo("\nAligning %s to %s..." % (FitName, RefName))
 124 
 125     if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
 126         RefFirstChainID = OptionsInfo["RefFileInfo"]["ChainIDs"][0]
 127         RefSelection = "(%s and chain %s)" % (RefName, RefFirstChainID)
 128         
 129         FitFirstChainID = RetrieveFirstChainID(FileIndex)
 130         FitSelection = "(%s and chain %s)" % (FitName, FitFirstChainID)
 131     else:
 132         RefSelection = RefName
 133         FitSelection = FitName
 134 
 135     if re.match("^align$", OptionsInfo["AlignMethod"], re.I):
 136         pymol.cmd.align(FitSelection, RefSelection)
 137     elif re.match("^cealign$", OptionsInfo["AlignMethod"], re.I):
 138         pymol.cmd.cealign(RefSelection, FitSelection)
 139     elif re.match("^super$", OptionsInfo["AlignMethod"], re.I):
 140         pymol.cmd.super(FitSelection, RefSelection)
 141     else:
 142         MiscUtil.PrintError("Invalid alignment method: %s" % OptionsInfo["AlignMethod"])
 143 
 144 def WriteAlignedInputObject(FileIndex):
 145     """Write out aligned input object."""
 146     
 147     Outfile = OptionsInfo["InfilesInfo"]["OutfilesNames"][FileIndex]
 148     InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 149     
 150     MiscUtil.PrintInfo("Generating aligned output file %s..." % Outfile)
 151     
 152     pymol.cmd.save(Outfile, InputName)
 153     
 154     if not os.path.exists(Outfile):
 155         MiscUtil.PrintWarning("Failed to generate aligned output file, %s..." % (Outfile))
 156 
 157 def DeleteInputObject(FileIndex):
 158     """Delete aligned input object."""
 159     
 160     InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 161     pymol.cmd.delete(InputName)
 162 
 163 def RetrieveInfilesInfo():
 164     """Retrieve information for input files."""
 165 
 166     InfilesInfo = {}
 167     
 168     InfilesInfo["InfilesNames"] = []
 169     InfilesInfo["InfilesRoots"] = []
 170     InfilesInfo["ChainIDs"] = []
 171     InfilesInfo["PyMOLObjectNames"] = []
 172     
 173     InfilesInfo["OutfilesNames"] = []
 174 
 175     OutSuffix = OptionsInfo["OutSuffix"]
 176     
 177     for Infile in OptionsInfo["InfilesNames"]:
 178         MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile)
 179         
 180         FileDir, FileName, FileExt = MiscUtil.ParseFileName(Infile)
 181         InfileRoot = FileName
 182         
 183         ChainIDs = RetrieveChainIDs(Infile, InfileRoot)
 184         if not len(ChainIDs):
 185             if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
 186                 MiscUtil.PrintError("The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file." % (OptionsInfo["AlignMode"]))
 187         
 188         InfilesInfo["InfilesNames"].append(Infile)
 189         InfilesInfo["InfilesRoots"].append(InfileRoot)
 190         InfilesInfo["ChainIDs"].append(ChainIDs)
 191         
 192         InfilesInfo["PyMOLObjectNames"].append(InfileRoot)
 193 
 194         # Setup outfile name...
 195         Outfile = "%s%s.pdb" % (InfileRoot, OutSuffix)
 196         InfilesInfo["OutfilesNames"].append(Outfile)
 197         if os.path.exists(Outfile):
 198             if not OptionsInfo["Overwrite"]:
 199                 MiscUtil.PrintError("The output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (Outfile))
 200     
 201     OptionsInfo["InfilesInfo"] = InfilesInfo
 202 
 203 def RetrieveRefFileInfo():
 204     """Retrieve information for ref file."""
 205 
 206     RefFileInfo = {}
 207 
 208     RefFile = OptionsInfo["RefFileName"]
 209     
 210     FileDir, FileName, FileExt = MiscUtil.ParseFileName(RefFile)
 211     RefFileRoot = FileName
 212     
 213     if re.match("^FirstInputFile$", OptionsInfo["AlignRefFile"], re.I):
 214         ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][0]
 215     else:
 216         MiscUtil.PrintInfo("\nRetrieving chains information for alignment reference file %s..." % RefFile)
 217         ChainIDs = RetrieveChainIDs(RefFile, RefFileRoot)
 218         if not len(ChainIDs):
 219             if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
 220                 MiscUtil.PrintError("The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file." % (OptionsInfo["AlignMode"]))
 221 
 222     RefFileInfo["RefFileName"] = RefFile
 223     RefFileInfo["RefFileRoot"] = RefFileRoot
 224     RefFileInfo["PyMOLObjectName"] = "AlignRef_%s" % RefFileRoot
 225     RefFileInfo["ChainIDs"] = ChainIDs
 226     
 227     OptionsInfo["RefFileInfo"] = RefFileInfo
 228 
 229 def RetrieveChainIDs(Infile, InfileRoot):
 230     """Retrieve chains IDs for an input file."""
 231 
 232     pymol.cmd.reinitialize()
 233     
 234     MolName = InfileRoot
 235     pymol.cmd.load(Infile, MolName)
 236 
 237     ChainIDs = PyMOLUtil.GetChains(MolName)
 238     pymol.cmd.delete(MolName)
 239 
 240     if ChainIDs is None:
 241         ChainIDs = []
 242 
 243     # Print out chain and ligand IDs...
 244     ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None"
 245     MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo)
 246                          
 247     return ChainIDs
 248 
 249 def RetrieveFirstChainID(FileIndex):
 250     """Get first chain ID."""
 251     
 252     ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][FileIndex]
 253     
 254     FirstChainID = None
 255     if len(ChainIDs):
 256         FirstChainID = ChainIDs[0]
 257     
 258     return FirstChainID
 259 
 260 def ProcessOptions():
 261     """Process and validate command line arguments and options."""
 262 
 263     MiscUtil.PrintInfo("Processing options...")
 264     
 265     # Validate options...
 266     ValidateOptions()
 267     
 268     OptionsInfo["AlignMethod"] = Options["--alignMethod"].lower()
 269     OptionsInfo["AlignMode"] = Options["--alignMode"]
 270     
 271     OptionsInfo["Infiles"] = Options["--infiles"]
 272     OptionsInfo["InfilesNames"] =  Options["--infileNames"]
 273 
 274     OptionsInfo["AlignRefFile"] = Options["--alignRefFile"]
 275     if re.match("^FirstInputFile$", Options["--alignRefFile"], re.I):
 276         OptionsInfo["RefFileName"] = OptionsInfo["InfilesNames"][0]
 277     else:
 278         OptionsInfo["RefFileName"] = Options["--alignRefFile"]
 279     
 280     OptionsInfo["OutSuffix"] = Options["--outSuffix"]
 281     
 282     OptionsInfo["Overwrite"] = Options["--overwrite"]
 283 
 284     RetrieveInfilesInfo()
 285     RetrieveRefFileInfo()
 286 
 287 def RetrieveOptions(): 
 288     """Retrieve command line arguments and options."""
 289     
 290     # Get options...
 291     global Options
 292     Options = docopt(_docoptUsage_)
 293 
 294     # Set current working directory to the specified directory...
 295     WorkingDir = Options["--workingdir"]
 296     if WorkingDir:
 297         os.chdir(WorkingDir)
 298     
 299     # Handle examples option...
 300     if "--examples" in Options and Options["--examples"]:
 301         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 302         sys.exit(0)
 303 
 304 def ValidateOptions():
 305     """Validate option values."""
 306     
 307     MiscUtil.ValidateOptionTextValue("--alignMethod", Options["--alignMethod"], "align cealign super")
 308     MiscUtil.ValidateOptionTextValue("--alignMode", Options["--alignMode"], "FirstChain Complex")
 309     
 310     # Expand infiles to handle presence of multiple input files...
 311     InfileNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",")
 312     if len(InfileNames) < 2:
 313         MiscUtil.PrintError("Number of input files specified for \"-i, --infiles\" option, %d, must be greater than 2..." % (len(InfileNames)))
 314 
 315     # Validate file extensions...
 316     for Infile in InfileNames:
 317         MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile)
 318         MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif")
 319     Options["--infileNames"] = InfileNames
 320 
 321     if not re.match("^FirstInputFile$", Options["--alignRefFile"], re.I):
 322         AlignRefFile = Options["--alignRefFile"]
 323         MiscUtil.ValidateOptionFilePath("--alignRefFile", AlignRefFile)
 324         MiscUtil.ValidateOptionFileExt("--alignRefFile", AlignRefFile, "pdb cif")
 325     
 326 # Setup a usage string for docopt...
 327 _docoptUsage_ = """
 328 PyMOLAlignChains.py - Align chains
 329 
 330 Usage:
 331     PyMOLAlignChains.py [--alignMethod <align, cealign, super>]
 332                         [--alignMode <FirstChain or Complex>] [--alignRefFile <filename>]
 333                         [--outSuffix <text>] [--overwrite] [-w <dir>] -i <infile1,infile2,infile3...>
 334     PyMOLAlignChains.py -h | --help | -e | --examples
 335 
 336 Description:
 337     Align chains in input files to a reference file and write out aligned files.
 338 
 339     The supported input and output file format are: PDB (.pdb), CIF(.cif)
 340 
 341     The names of the aligned output files are automatically generated from the
 342     names of input as shown below:
 343     
 344         <InfileRoot><OutSuffux>.pdb
 345         Default: <InfileRoot>_Aligned.pdb
 346     
 347 Options:
 348     -a, --alignMethod <align, cealign, super>  [default: super]
 349         Alignment methodology to use for aligning input files to a
 350         reference file.
 351     --alignMode <FirstChain or Complex>  [default: FirstChain]
 352         Portion of input and reference files to use for spatial alignment of
 353         input files against reference file.  Possible values: FirstChain or
 354         Complex.
 355         
 356         The FirstChain mode allows alignment of the first chain in each input
 357         file to the first chain in the reference file along with moving the rest
 358         of the complex to coordinate space of the reference file. The complete
 359         complex in each input file is aligned to the complete complex in reference
 360         file for the Complex mode.
 361     --alignRefFile <filename>  [default: FirstInputFile]
 362         Reference input file name. The default is to use the first input file
 363         name specified using '-i, --infiles' option.
 364     -e, --examples
 365         Print examples.
 366     -h, --help
 367         Print this help message.
 368     -i, --infiles <infile1,infile2,...>
 369         A comma delimited list of input files. The wildcards are also allowed
 370         in file names.
 371     --outSuffix <text>  [default: _Aligned]
 372         Suffix to append to input file root for generating name of output file.
 373     --overwrite
 374         Overwrite existing files.
 375     -w, --workingdir <dir>
 376         Location of working directory which defaults to the current directory.
 377 
 378 Examples:
 379     To align first chain in all input files to the first chain in first input file
 380     and write out aligned output files, type:
 381 
 382         % PyMOLAlignChains.py -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 383 
 384     To align first chain in all input files to the first chain in specific reference
 385     file and write out aligned output files, type:
 386 
 387         % PyMOLAlignChains.py --alignRefFile Sample5.pdb
 388           -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 389 
 390     To align first chain in all input files to the first chain in first input file
 391     using a specific alignment method and write out aligned output files
 392     with specific suffix in names, type:
 393 
 394         % PyMOLAlignChains.py --alignMethod cealign --outSuffix "_aligned"
 395           -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 396 
 397     To align all chains in each input files to all chains in first input file and
 398     write out aligned output files, type:
 399 
 400         % PyMOLAlignChains.py --alignMode Complex
 401           -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 402 
 403 Author:
 404     Manish Sud(msud@san.rr.com)
 405 
 406 See also:
 407     PyMOLCalculateRMSD.py, PyMOLSplitChainsAndLigands.py,
 408     PyMOLVisualizeMacromolecules.py
 409 
 410 Copyright:
 411     Copyright (C) 2024 Manish Sud. All rights reserved.
 412 
 413     The functionality available in this script is implemented using PyMOL, a
 414     molecular visualization system on an open source foundation originally
 415     developed by Warren DeLano.
 416 
 417     This file is part of MayaChemTools.
 418 
 419     MayaChemTools is free software; you can redistribute it and/or modify it under
 420     the terms of the GNU Lesser General Public License as published by the Free
 421     Software Foundation; either version 3 of the License, or (at your option) any
 422     later version.
 423 
 424 """
 425 
 426 if __name__ == "__main__":
 427     main()