MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: PyMOLAlignChains.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2025 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using PyMOL, a
   9 # molecular visualization system on an open source foundation originally
  10 # developed by Warren DeLano.
  11 #
  12 # This file is part of MayaChemTools.
  13 #
  14 # MayaChemTools is free software; you can redistribute it and/or modify it under
  15 # the terms of the GNU Lesser General Public License as published by the Free
  16 # Software Foundation; either version 3 of the License, or (at your option) any
  17 # later version.
  18 #
  19 # MayaChemTools is distributed in the hope that it will be useful, but without
  20 # any warranty; without even the implied warranty of merchantability of fitness
  21 # for a particular purpose.  See the GNU Lesser General Public License for more
  22 # details.
  23 #
  24 # You should have received a copy of the GNU Lesser General Public License
  25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  27 # Boston, MA, 02111-1307, USA.
  28 #
  29 
  30 from __future__ import print_function
  31 
  32 import os
  33 import sys
  34 import time
  35 import re
  36 
  37 # PyMOL imports...
  38 try:
  39     import pymol
  40 
  41     # Finish launching PyMOL in  a command line mode for batch processing (-c)
  42     # along with the following options:  disable loading of pymolrc and plugins (-k);
  43     # suppress start up messages (-q)
  44     pymol.finish_launching(["pymol", "-ckq"])
  45 except ImportError as ErrMsg:
  46     sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg)
  47     sys.stderr.write("Check/update your PyMOL environment and try again.\n\n")
  48     sys.exit(1)
  49 
  50 # MayaChemTools imports...
  51 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  52 try:
  53     from docopt import docopt
  54     import MiscUtil
  55     import PyMOLUtil
  56 except ImportError as ErrMsg:
  57     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  58     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  59     sys.exit(1)
  60 
  61 ScriptName = os.path.basename(sys.argv[0])
  62 Options = {}
  63 OptionsInfo = {}
  64 
  65 
  66 def main():
  67     """Start execution of the script."""
  68 
  69     MiscUtil.PrintInfo(
  70         "\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n"
  71         % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())
  72     )
  73 
  74     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  75 
  76     # Retrieve command line arguments and options...
  77     RetrieveOptions()
  78 
  79     # Process and validate command line arguments and options...
  80     ProcessOptions()
  81 
  82     # Perform actions required by the script...
  83     PerformChainAlignment()
  84 
  85     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  86     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  87 
  88 
  89 def PerformChainAlignment():
  90     """Align chains and write out new files."""
  91 
  92     MiscUtil.PrintInfo("\nGenerating output files...")
  93 
  94     # Load reffile for alignment..
  95     SetupAlignReference()
  96 
  97     # Perform alignment for each input file and write it out...
  98     for FileIndex in range(0, len(OptionsInfo["InfilesInfo"]["InfilesNames"])):
  99         SetupInputObject(FileIndex)
 100         AlignInputObject(FileIndex)
 101         WriteAlignedInputObject(FileIndex)
 102         DeleteInputObject(FileIndex)
 103 
 104     # Delete reference object...
 105     pymol.cmd.delete(OptionsInfo["RefFileInfo"]["PyMOLObjectName"])
 106 
 107 
 108 def SetupAlignReference():
 109     """Setup object for alignment reference."""
 110 
 111     RefFile = OptionsInfo["RefFileInfo"]["RefFileName"]
 112     RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"]
 113 
 114     pymol.cmd.load(RefFile, RefName)
 115 
 116 
 117 def SetupInputObject(FileIndex):
 118     """Setup a PyMOL object for input file."""
 119 
 120     InputFile = OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex]
 121     InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 122 
 123     pymol.cmd.load(InputFile, InputName)
 124 
 125 
 126 def AlignInputObject(FileIndex):
 127     """Align input object to reference object."""
 128 
 129     RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"]
 130     FitName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 131 
 132     MiscUtil.PrintInfo("\nAligning %s to %s..." % (FitName, RefName))
 133 
 134     if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
 135         RefFirstChainID = OptionsInfo["RefFileInfo"]["ChainIDs"][0]
 136         RefSelection = "(%s and chain %s)" % (RefName, RefFirstChainID)
 137 
 138         FitFirstChainID = RetrieveFirstChainID(FileIndex)
 139         FitSelection = "(%s and chain %s)" % (FitName, FitFirstChainID)
 140     else:
 141         RefSelection = RefName
 142         FitSelection = FitName
 143 
 144     if re.match("^align$", OptionsInfo["AlignMethod"], re.I):
 145         pymol.cmd.align(FitSelection, RefSelection)
 146     elif re.match("^cealign$", OptionsInfo["AlignMethod"], re.I):
 147         pymol.cmd.cealign(RefSelection, FitSelection)
 148     elif re.match("^super$", OptionsInfo["AlignMethod"], re.I):
 149         pymol.cmd.super(FitSelection, RefSelection)
 150     else:
 151         MiscUtil.PrintError("Invalid alignment method: %s" % OptionsInfo["AlignMethod"])
 152 
 153 
 154 def WriteAlignedInputObject(FileIndex):
 155     """Write out aligned input object."""
 156 
 157     Outfile = OptionsInfo["InfilesInfo"]["OutfilesNames"][FileIndex]
 158     InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 159 
 160     MiscUtil.PrintInfo("Generating aligned output file %s..." % Outfile)
 161 
 162     pymol.cmd.save(Outfile, InputName)
 163 
 164     if not os.path.exists(Outfile):
 165         MiscUtil.PrintWarning("Failed to generate aligned output file, %s..." % (Outfile))
 166 
 167 
 168 def DeleteInputObject(FileIndex):
 169     """Delete aligned input object."""
 170 
 171     InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex]
 172     pymol.cmd.delete(InputName)
 173 
 174 
 175 def RetrieveInfilesInfo():
 176     """Retrieve information for input files."""
 177 
 178     InfilesInfo = {}
 179 
 180     InfilesInfo["InfilesNames"] = []
 181     InfilesInfo["InfilesRoots"] = []
 182     InfilesInfo["ChainIDs"] = []
 183     InfilesInfo["PyMOLObjectNames"] = []
 184 
 185     InfilesInfo["OutfilesNames"] = []
 186 
 187     OutSuffix = OptionsInfo["OutSuffix"]
 188 
 189     for Infile in OptionsInfo["InfilesNames"]:
 190         MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile)
 191 
 192         FileDir, FileName, FileExt = MiscUtil.ParseFileName(Infile)
 193         InfileRoot = FileName
 194 
 195         ChainIDs = RetrieveChainIDs(Infile, InfileRoot)
 196         if not len(ChainIDs):
 197             if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
 198                 MiscUtil.PrintError(
 199                     "The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file."
 200                     % (OptionsInfo["AlignMode"])
 201                 )
 202 
 203         InfilesInfo["InfilesNames"].append(Infile)
 204         InfilesInfo["InfilesRoots"].append(InfileRoot)
 205         InfilesInfo["ChainIDs"].append(ChainIDs)
 206 
 207         InfilesInfo["PyMOLObjectNames"].append(InfileRoot)
 208 
 209         # Setup outfile name...
 210         Outfile = "%s%s.pdb" % (InfileRoot, OutSuffix)
 211         InfilesInfo["OutfilesNames"].append(Outfile)
 212         if os.path.exists(Outfile):
 213             if not OptionsInfo["Overwrite"]:
 214                 MiscUtil.PrintError(
 215                     'The output file, %s, already exist. Use option "--ov" or "--overwrite" and try again.\n'
 216                     % (Outfile)
 217                 )
 218 
 219     OptionsInfo["InfilesInfo"] = InfilesInfo
 220 
 221 
 222 def RetrieveRefFileInfo():
 223     """Retrieve information for ref file."""
 224 
 225     RefFileInfo = {}
 226 
 227     RefFile = OptionsInfo["RefFileName"]
 228 
 229     FileDir, FileName, FileExt = MiscUtil.ParseFileName(RefFile)
 230     RefFileRoot = FileName
 231 
 232     if re.match("^FirstInputFile$", OptionsInfo["AlignRefFile"], re.I):
 233         ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][0]
 234     else:
 235         MiscUtil.PrintInfo("\nRetrieving chains information for alignment reference file %s..." % RefFile)
 236         ChainIDs = RetrieveChainIDs(RefFile, RefFileRoot)
 237         if not len(ChainIDs):
 238             if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I):
 239                 MiscUtil.PrintError(
 240                     "The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file."
 241                     % (OptionsInfo["AlignMode"])
 242                 )
 243 
 244     RefFileInfo["RefFileName"] = RefFile
 245     RefFileInfo["RefFileRoot"] = RefFileRoot
 246     RefFileInfo["PyMOLObjectName"] = "AlignRef_%s" % RefFileRoot
 247     RefFileInfo["ChainIDs"] = ChainIDs
 248 
 249     OptionsInfo["RefFileInfo"] = RefFileInfo
 250 
 251 
 252 def RetrieveChainIDs(Infile, InfileRoot):
 253     """Retrieve chains IDs for an input file."""
 254 
 255     pymol.cmd.reinitialize()
 256 
 257     MolName = InfileRoot
 258     pymol.cmd.load(Infile, MolName)
 259 
 260     ChainIDs = PyMOLUtil.GetChains(MolName)
 261     pymol.cmd.delete(MolName)
 262 
 263     if ChainIDs is None:
 264         ChainIDs = []
 265 
 266     # Print out chain and ligand IDs...
 267     ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None"
 268     MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo)
 269 
 270     return ChainIDs
 271 
 272 
 273 def RetrieveFirstChainID(FileIndex):
 274     """Get first chain ID."""
 275 
 276     ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][FileIndex]
 277 
 278     FirstChainID = None
 279     if len(ChainIDs):
 280         FirstChainID = ChainIDs[0]
 281 
 282     return FirstChainID
 283 
 284 
 285 def ProcessOptions():
 286     """Process and validate command line arguments and options."""
 287 
 288     MiscUtil.PrintInfo("Processing options...")
 289 
 290     # Validate options...
 291     ValidateOptions()
 292 
 293     OptionsInfo["AlignMethod"] = Options["--alignMethod"].lower()
 294     OptionsInfo["AlignMode"] = Options["--alignMode"]
 295 
 296     OptionsInfo["Infiles"] = Options["--infiles"]
 297     OptionsInfo["InfilesNames"] = Options["--infileNames"]
 298 
 299     OptionsInfo["AlignRefFile"] = Options["--alignRefFile"]
 300     if re.match("^FirstInputFile$", Options["--alignRefFile"], re.I):
 301         OptionsInfo["RefFileName"] = OptionsInfo["InfilesNames"][0]
 302     else:
 303         OptionsInfo["RefFileName"] = Options["--alignRefFile"]
 304 
 305     OptionsInfo["OutSuffix"] = Options["--outSuffix"]
 306 
 307     OptionsInfo["Overwrite"] = Options["--overwrite"]
 308 
 309     RetrieveInfilesInfo()
 310     RetrieveRefFileInfo()
 311 
 312 
 313 def RetrieveOptions():
 314     """Retrieve command line arguments and options."""
 315 
 316     # Get options...
 317     global Options
 318     Options = docopt(_docoptUsage_)
 319 
 320     # Set current working directory to the specified directory...
 321     WorkingDir = Options["--workingdir"]
 322     if WorkingDir:
 323         os.chdir(WorkingDir)
 324 
 325     # Handle examples option...
 326     if "--examples" in Options and Options["--examples"]:
 327         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 328         sys.exit(0)
 329 
 330 
 331 def ValidateOptions():
 332     """Validate option values."""
 333 
 334     MiscUtil.ValidateOptionTextValue("--alignMethod", Options["--alignMethod"], "align cealign super")
 335     MiscUtil.ValidateOptionTextValue("--alignMode", Options["--alignMode"], "FirstChain Complex")
 336 
 337     # Expand infiles to handle presence of multiple input files...
 338     InfileNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",")
 339     if len(InfileNames) < 2:
 340         MiscUtil.PrintError(
 341             'Number of input files specified for "-i, --infiles" option, %d, must be greater than 2...'
 342             % (len(InfileNames))
 343         )
 344 
 345     # Validate file extensions...
 346     for Infile in InfileNames:
 347         MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile)
 348         MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif")
 349     Options["--infileNames"] = InfileNames
 350 
 351     if not re.match("^FirstInputFile$", Options["--alignRefFile"], re.I):
 352         AlignRefFile = Options["--alignRefFile"]
 353         MiscUtil.ValidateOptionFilePath("--alignRefFile", AlignRefFile)
 354         MiscUtil.ValidateOptionFileExt("--alignRefFile", AlignRefFile, "pdb cif")
 355 
 356 
 357 # Setup a usage string for docopt...
 358 _docoptUsage_ = """
 359 PyMOLAlignChains.py - Align chains
 360 
 361 Usage:
 362     PyMOLAlignChains.py [--alignMethod <align, cealign, super>]
 363                         [--alignMode <FirstChain or Complex>] [--alignRefFile <filename>]
 364                         [--outSuffix <text>] [--overwrite] [-w <dir>] -i <infile1,infile2,infile3...>
 365     PyMOLAlignChains.py -h | --help | -e | --examples
 366 
 367 Description:
 368     Align chains in input files to a reference file and write out aligned files.
 369 
 370     The supported input and output file format are: PDB (.pdb), CIF(.cif)
 371 
 372     The names of the aligned output files are automatically generated from the
 373     names of input as shown below:
 374     
 375         <InfileRoot><OutSuffux>.pdb
 376         Default: <InfileRoot>_Aligned.pdb
 377     
 378 Options:
 379     -a, --alignMethod <align, cealign, super>  [default: super]
 380         Alignment methodology to use for aligning input files to a
 381         reference file.
 382     --alignMode <FirstChain or Complex>  [default: FirstChain]
 383         Portion of input and reference files to use for spatial alignment of
 384         input files against reference file.  Possible values: FirstChain or
 385         Complex.
 386         
 387         The FirstChain mode allows alignment of the first chain in each input
 388         file to the first chain in the reference file along with moving the rest
 389         of the complex to coordinate space of the reference file. The complete
 390         complex in each input file is aligned to the complete complex in reference
 391         file for the Complex mode.
 392     --alignRefFile <filename>  [default: FirstInputFile]
 393         Reference input file name. The default is to use the first input file
 394         name specified using '-i, --infiles' option.
 395     -e, --examples
 396         Print examples.
 397     -h, --help
 398         Print this help message.
 399     -i, --infiles <infile1,infile2,...>
 400         A comma delimited list of input files. The wildcards are also allowed
 401         in file names.
 402     --outSuffix <text>  [default: _Aligned]
 403         Suffix to append to input file root for generating name of output file.
 404     --overwrite
 405         Overwrite existing files.
 406     -w, --workingdir <dir>
 407         Location of working directory which defaults to the current directory.
 408 
 409 Examples:
 410     To align first chain in all input files to the first chain in first input file
 411     and write out aligned output files, type:
 412 
 413         % PyMOLAlignChains.py -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 414 
 415     To align first chain in all input files to the first chain in specific reference
 416     file and write out aligned output files, type:
 417 
 418         % PyMOLAlignChains.py --alignRefFile Sample5.pdb
 419           -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 420 
 421     To align first chain in all input files to the first chain in first input file
 422     using a specific alignment method and write out aligned output files
 423     with specific suffix in names, type:
 424 
 425         % PyMOLAlignChains.py --alignMethod cealign --outSuffix "_aligned"
 426           -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 427 
 428     To align all chains in each input files to all chains in first input file and
 429     write out aligned output files, type:
 430 
 431         % PyMOLAlignChains.py --alignMode Complex
 432           -i "Sample3.pdb,Sample4.pdb,Sample5.pdb"
 433 
 434 Author:
 435     Manish Sud(msud@san.rr.com)
 436 
 437 See also:
 438     PyMOLCalculateRMSD.py, PyMOLSplitChainsAndLigands.py,
 439     PyMOLVisualizeMacromolecules.py
 440 
 441 Copyright:
 442     Copyright (C) 2025 Manish Sud. All rights reserved.
 443 
 444     The functionality available in this script is implemented using PyMOL, a
 445     molecular visualization system on an open source foundation originally
 446     developed by Warren DeLano.
 447 
 448     This file is part of MayaChemTools.
 449 
 450     MayaChemTools is free software; you can redistribute it and/or modify it under
 451     the terms of the GNU Lesser General Public License as published by the Free
 452     Software Foundation; either version 3 of the License, or (at your option) any
 453     later version.
 454 
 455 """
 456 
 457 if __name__ == "__main__":
 458     main()