1 #!/bin/env python 2 # 3 # File: PyMOLAlignChains.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 38 # PyMOL imports... 39 try: 40 import pymol 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(['pymol', '-ckq']) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 try: 52 from docopt import docopt 53 import MiscUtil 54 import PyMOLUtil 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 58 sys.exit(1) 59 60 ScriptName = os.path.basename(sys.argv[0]) 61 Options = {} 62 OptionsInfo = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 PerformChainAlignment() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def PerformChainAlignment(): 84 """Align chains and write out new files.""" 85 86 MiscUtil.PrintInfo("\nGenerating output files...") 87 88 # Load reffile for alignment.. 89 SetupAlignReference() 90 91 # Perform alignment for each input file and write it out... 92 for FileIndex in range(0, len(OptionsInfo["InfilesInfo"]["InfilesNames"])): 93 SetupInputObject(FileIndex) 94 AlignInputObject(FileIndex) 95 WriteAlignedInputObject(FileIndex) 96 DeleteInputObject(FileIndex) 97 98 # Delete reference object... 99 pymol.cmd.delete(OptionsInfo["RefFileInfo"]["PyMOLObjectName"]) 100 101 def SetupAlignReference(): 102 """Setup object for alignment reference.""" 103 104 RefFile = OptionsInfo["RefFileInfo"]["RefFileName"] 105 RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"] 106 107 pymol.cmd.load(RefFile, RefName) 108 109 def SetupInputObject(FileIndex): 110 """Setup a PyMOL object for input file.""" 111 112 InputFile = OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex] 113 InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 114 115 pymol.cmd.load(InputFile, InputName) 116 117 def AlignInputObject(FileIndex): 118 """Align input object to reference object.""" 119 120 RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"] 121 FitName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 122 123 MiscUtil.PrintInfo("\nAligning %s to %s..." % (FitName, RefName)) 124 125 if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): 126 RefFirstChainID = OptionsInfo["RefFileInfo"]["ChainIDs"][0] 127 RefSelection = "(%s and chain %s)" % (RefName, RefFirstChainID) 128 129 FitFirstChainID = RetrieveFirstChainID(FileIndex) 130 FitSelection = "(%s and chain %s)" % (FitName, FitFirstChainID) 131 else: 132 RefSelection = RefName 133 FitSelection = FitName 134 135 if re.match("^align$", OptionsInfo["AlignMethod"], re.I): 136 pymol.cmd.align(FitSelection, RefSelection) 137 elif re.match("^cealign$", OptionsInfo["AlignMethod"], re.I): 138 pymol.cmd.cealign(RefSelection, FitSelection) 139 elif re.match("^super$", OptionsInfo["AlignMethod"], re.I): 140 pymol.cmd.super(FitSelection, RefSelection) 141 else: 142 MiscUtil.PrintError("Invalid alignment method: %s" % OptionsInfo["AlignMethod"]) 143 144 def WriteAlignedInputObject(FileIndex): 145 """Write out aligned input object.""" 146 147 Outfile = OptionsInfo["InfilesInfo"]["OutfilesNames"][FileIndex] 148 InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 149 150 MiscUtil.PrintInfo("Generating aligned output file %s..." % Outfile) 151 152 pymol.cmd.save(Outfile, InputName) 153 154 if not os.path.exists(Outfile): 155 MiscUtil.PrintWarning("Failed to generate aligned output file, %s..." % (Outfile)) 156 157 def DeleteInputObject(FileIndex): 158 """Delete aligned input object.""" 159 160 InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 161 pymol.cmd.delete(InputName) 162 163 def RetrieveInfilesInfo(): 164 """Retrieve information for input files.""" 165 166 InfilesInfo = {} 167 168 InfilesInfo["InfilesNames"] = [] 169 InfilesInfo["InfilesRoots"] = [] 170 InfilesInfo["ChainIDs"] = [] 171 InfilesInfo["PyMOLObjectNames"] = [] 172 173 InfilesInfo["OutfilesNames"] = [] 174 175 OutSuffix = OptionsInfo["OutSuffix"] 176 177 for Infile in OptionsInfo["InfilesNames"]: 178 MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile) 179 180 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Infile) 181 InfileRoot = FileName 182 183 ChainIDs = RetrieveChainIDs(Infile, InfileRoot) 184 if not len(ChainIDs): 185 if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): 186 MiscUtil.PrintError("The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file." % (OptionsInfo["AlignMode"])) 187 188 InfilesInfo["InfilesNames"].append(Infile) 189 InfilesInfo["InfilesRoots"].append(InfileRoot) 190 InfilesInfo["ChainIDs"].append(ChainIDs) 191 192 InfilesInfo["PyMOLObjectNames"].append(InfileRoot) 193 194 # Setup outfile name... 195 Outfile = "%s%s.pdb" % (InfileRoot, OutSuffix) 196 InfilesInfo["OutfilesNames"].append(Outfile) 197 if os.path.exists(Outfile): 198 if not OptionsInfo["Overwrite"]: 199 MiscUtil.PrintError("The output file, %s, already exist. Use option \"--ov\" or \"--overwrite\" and try again.\n" % (Outfile)) 200 201 OptionsInfo["InfilesInfo"] = InfilesInfo 202 203 def RetrieveRefFileInfo(): 204 """Retrieve information for ref file.""" 205 206 RefFileInfo = {} 207 208 RefFile = OptionsInfo["RefFileName"] 209 210 FileDir, FileName, FileExt = MiscUtil.ParseFileName(RefFile) 211 RefFileRoot = FileName 212 213 if re.match("^FirstInputFile$", OptionsInfo["AlignRefFile"], re.I): 214 ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][0] 215 else: 216 MiscUtil.PrintInfo("\nRetrieving chains information for alignment reference file %s..." % RefFile) 217 ChainIDs = RetrieveChainIDs(RefFile, RefFileRoot) 218 if not len(ChainIDs): 219 if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): 220 MiscUtil.PrintError("The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file." % (OptionsInfo["AlignMode"])) 221 222 RefFileInfo["RefFileName"] = RefFile 223 RefFileInfo["RefFileRoot"] = RefFileRoot 224 RefFileInfo["PyMOLObjectName"] = "AlignRef_%s" % RefFileRoot 225 RefFileInfo["ChainIDs"] = ChainIDs 226 227 OptionsInfo["RefFileInfo"] = RefFileInfo 228 229 def RetrieveChainIDs(Infile, InfileRoot): 230 """Retrieve chains IDs for an input file.""" 231 232 pymol.cmd.reinitialize() 233 234 MolName = InfileRoot 235 pymol.cmd.load(Infile, MolName) 236 237 ChainIDs = PyMOLUtil.GetChains(MolName) 238 pymol.cmd.delete(MolName) 239 240 if ChainIDs is None: 241 ChainIDs = [] 242 243 # Print out chain and ligand IDs... 244 ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None" 245 MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo) 246 247 return ChainIDs 248 249 def RetrieveFirstChainID(FileIndex): 250 """Get first chain ID.""" 251 252 ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][FileIndex] 253 254 FirstChainID = None 255 if len(ChainIDs): 256 FirstChainID = ChainIDs[0] 257 258 return FirstChainID 259 260 def ProcessOptions(): 261 """Process and validate command line arguments and options.""" 262 263 MiscUtil.PrintInfo("Processing options...") 264 265 # Validate options... 266 ValidateOptions() 267 268 OptionsInfo["AlignMethod"] = Options["--alignMethod"].lower() 269 OptionsInfo["AlignMode"] = Options["--alignMode"] 270 271 OptionsInfo["Infiles"] = Options["--infiles"] 272 OptionsInfo["InfilesNames"] = Options["--infileNames"] 273 274 OptionsInfo["AlignRefFile"] = Options["--alignRefFile"] 275 if re.match("^FirstInputFile$", Options["--alignRefFile"], re.I): 276 OptionsInfo["RefFileName"] = OptionsInfo["InfilesNames"][0] 277 else: 278 OptionsInfo["RefFileName"] = Options["--alignRefFile"] 279 280 OptionsInfo["OutSuffix"] = Options["--outSuffix"] 281 282 OptionsInfo["Overwrite"] = Options["--overwrite"] 283 284 RetrieveInfilesInfo() 285 RetrieveRefFileInfo() 286 287 def RetrieveOptions(): 288 """Retrieve command line arguments and options.""" 289 290 # Get options... 291 global Options 292 Options = docopt(_docoptUsage_) 293 294 # Set current working directory to the specified directory... 295 WorkingDir = Options["--workingdir"] 296 if WorkingDir: 297 os.chdir(WorkingDir) 298 299 # Handle examples option... 300 if "--examples" in Options and Options["--examples"]: 301 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 302 sys.exit(0) 303 304 def ValidateOptions(): 305 """Validate option values.""" 306 307 MiscUtil.ValidateOptionTextValue("--alignMethod", Options["--alignMethod"], "align cealign super") 308 MiscUtil.ValidateOptionTextValue("--alignMode", Options["--alignMode"], "FirstChain Complex") 309 310 # Expand infiles to handle presence of multiple input files... 311 InfileNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",") 312 if len(InfileNames) < 2: 313 MiscUtil.PrintError("Number of input files specified for \"-i, --infiles\" option, %d, must be greater than 2..." % (len(InfileNames))) 314 315 # Validate file extensions... 316 for Infile in InfileNames: 317 MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile) 318 MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif") 319 Options["--infileNames"] = InfileNames 320 321 if not re.match("^FirstInputFile$", Options["--alignRefFile"], re.I): 322 AlignRefFile = Options["--alignRefFile"] 323 MiscUtil.ValidateOptionFilePath("--alignRefFile", AlignRefFile) 324 MiscUtil.ValidateOptionFileExt("--alignRefFile", AlignRefFile, "pdb cif") 325 326 # Setup a usage string for docopt... 327 _docoptUsage_ = """ 328 PyMOLAlignChains.py - Align chains 329 330 Usage: 331 PyMOLAlignChains.py [--alignMethod <align, cealign, super>] 332 [--alignMode <FirstChain or Complex>] [--alignRefFile <filename>] 333 [--outSuffix <text>] [--overwrite] [-w <dir>] -i <infile1,infile2,infile3...> 334 PyMOLAlignChains.py -h | --help | -e | --examples 335 336 Description: 337 Align chains in input files to a reference file and write out aligned files. 338 339 The supported input and output file format are: PDB (.pdb), CIF(.cif) 340 341 The names of the aligned output files are automatically generated from the 342 names of input as shown below: 343 344 <InfileRoot><OutSuffux>.pdb 345 Default: <InfileRoot>_Aligned.pdb 346 347 Options: 348 -a, --alignMethod <align, cealign, super> [default: super] 349 Alignment methodology to use for aligning input files to a 350 reference file. 351 --alignMode <FirstChain or Complex> [default: FirstChain] 352 Portion of input and reference files to use for spatial alignment of 353 input files against reference file. Possible values: FirstChain or 354 Complex. 355 356 The FirstChain mode allows alignment of the first chain in each input 357 file to the first chain in the reference file along with moving the rest 358 of the complex to coordinate space of the reference file. The complete 359 complex in each input file is aligned to the complete complex in reference 360 file for the Complex mode. 361 --alignRefFile <filename> [default: FirstInputFile] 362 Reference input file name. The default is to use the first input file 363 name specified using '-i, --infiles' option. 364 -e, --examples 365 Print examples. 366 -h, --help 367 Print this help message. 368 -i, --infiles <infile1,infile2,...> 369 A comma delimited list of input files. The wildcards are also allowed 370 in file names. 371 --outSuffix <text> [default: _Aligned] 372 Suffix to append to input file root for generating name of output file. 373 --overwrite 374 Overwrite existing files. 375 -w, --workingdir <dir> 376 Location of working directory which defaults to the current directory. 377 378 Examples: 379 To align first chain in all input files to the first chain in first input file 380 and write out aligned output files, type: 381 382 % PyMOLAlignChains.py -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 383 384 To align first chain in all input files to the first chain in specific reference 385 file and write out aligned output files, type: 386 387 % PyMOLAlignChains.py --alignRefFile Sample5.pdb 388 -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 389 390 To align first chain in all input files to the first chain in first input file 391 using a specific alignment method and write out aligned output files 392 with specific suffix in names, type: 393 394 % PyMOLAlignChains.py --alignMethod cealign --outSuffix "_aligned" 395 -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 396 397 To align all chains in each input files to all chains in first input file and 398 write out aligned output files, type: 399 400 % PyMOLAlignChains.py --alignMode Complex 401 -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 402 403 Author: 404 Manish Sud(msud@san.rr.com) 405 406 See also: 407 PyMOLCalculateRMSD.py, PyMOLSplitChainsAndLigands.py, 408 PyMOLVisualizeMacromolecules.py 409 410 Copyright: 411 Copyright (C) 2024 Manish Sud. All rights reserved. 412 413 The functionality available in this script is implemented using PyMOL, a 414 molecular visualization system on an open source foundation originally 415 developed by Warren DeLano. 416 417 This file is part of MayaChemTools. 418 419 MayaChemTools is free software; you can redistribute it and/or modify it under 420 the terms of the GNU Lesser General Public License as published by the Free 421 Software Foundation; either version 3 of the License, or (at your option) any 422 later version. 423 424 """ 425 426 if __name__ == "__main__": 427 main()