1 #!/bin/env python 2 # 3 # File: PyMOLAlignChains.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 import os 33 import sys 34 import time 35 import re 36 37 # PyMOL imports... 38 try: 39 import pymol 40 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(["pymol", "-ckq"]) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 52 try: 53 from docopt import docopt 54 import MiscUtil 55 import PyMOLUtil 56 except ImportError as ErrMsg: 57 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 58 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 59 sys.exit(1) 60 61 ScriptName = os.path.basename(sys.argv[0]) 62 Options = {} 63 OptionsInfo = {} 64 65 66 def main(): 67 """Start execution of the script.""" 68 69 MiscUtil.PrintInfo( 70 "\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" 71 % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()) 72 ) 73 74 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 75 76 # Retrieve command line arguments and options... 77 RetrieveOptions() 78 79 # Process and validate command line arguments and options... 80 ProcessOptions() 81 82 # Perform actions required by the script... 83 PerformChainAlignment() 84 85 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 86 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 87 88 89 def PerformChainAlignment(): 90 """Align chains and write out new files.""" 91 92 MiscUtil.PrintInfo("\nGenerating output files...") 93 94 # Load reffile for alignment.. 95 SetupAlignReference() 96 97 # Perform alignment for each input file and write it out... 98 for FileIndex in range(0, len(OptionsInfo["InfilesInfo"]["InfilesNames"])): 99 SetupInputObject(FileIndex) 100 AlignInputObject(FileIndex) 101 WriteAlignedInputObject(FileIndex) 102 DeleteInputObject(FileIndex) 103 104 # Delete reference object... 105 pymol.cmd.delete(OptionsInfo["RefFileInfo"]["PyMOLObjectName"]) 106 107 108 def SetupAlignReference(): 109 """Setup object for alignment reference.""" 110 111 RefFile = OptionsInfo["RefFileInfo"]["RefFileName"] 112 RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"] 113 114 pymol.cmd.load(RefFile, RefName) 115 116 117 def SetupInputObject(FileIndex): 118 """Setup a PyMOL object for input file.""" 119 120 InputFile = OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex] 121 InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 122 123 pymol.cmd.load(InputFile, InputName) 124 125 126 def AlignInputObject(FileIndex): 127 """Align input object to reference object.""" 128 129 RefName = OptionsInfo["RefFileInfo"]["PyMOLObjectName"] 130 FitName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 131 132 MiscUtil.PrintInfo("\nAligning %s to %s..." % (FitName, RefName)) 133 134 if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): 135 RefFirstChainID = OptionsInfo["RefFileInfo"]["ChainIDs"][0] 136 RefSelection = "(%s and chain %s)" % (RefName, RefFirstChainID) 137 138 FitFirstChainID = RetrieveFirstChainID(FileIndex) 139 FitSelection = "(%s and chain %s)" % (FitName, FitFirstChainID) 140 else: 141 RefSelection = RefName 142 FitSelection = FitName 143 144 if re.match("^align$", OptionsInfo["AlignMethod"], re.I): 145 pymol.cmd.align(FitSelection, RefSelection) 146 elif re.match("^cealign$", OptionsInfo["AlignMethod"], re.I): 147 pymol.cmd.cealign(RefSelection, FitSelection) 148 elif re.match("^super$", OptionsInfo["AlignMethod"], re.I): 149 pymol.cmd.super(FitSelection, RefSelection) 150 else: 151 MiscUtil.PrintError("Invalid alignment method: %s" % OptionsInfo["AlignMethod"]) 152 153 154 def WriteAlignedInputObject(FileIndex): 155 """Write out aligned input object.""" 156 157 Outfile = OptionsInfo["InfilesInfo"]["OutfilesNames"][FileIndex] 158 InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 159 160 MiscUtil.PrintInfo("Generating aligned output file %s..." % Outfile) 161 162 pymol.cmd.save(Outfile, InputName) 163 164 if not os.path.exists(Outfile): 165 MiscUtil.PrintWarning("Failed to generate aligned output file, %s..." % (Outfile)) 166 167 168 def DeleteInputObject(FileIndex): 169 """Delete aligned input object.""" 170 171 InputName = OptionsInfo["InfilesInfo"]["PyMOLObjectNames"][FileIndex] 172 pymol.cmd.delete(InputName) 173 174 175 def RetrieveInfilesInfo(): 176 """Retrieve information for input files.""" 177 178 InfilesInfo = {} 179 180 InfilesInfo["InfilesNames"] = [] 181 InfilesInfo["InfilesRoots"] = [] 182 InfilesInfo["ChainIDs"] = [] 183 InfilesInfo["PyMOLObjectNames"] = [] 184 185 InfilesInfo["OutfilesNames"] = [] 186 187 OutSuffix = OptionsInfo["OutSuffix"] 188 189 for Infile in OptionsInfo["InfilesNames"]: 190 MiscUtil.PrintInfo("\nRetrieving chains information for input file %s..." % Infile) 191 192 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Infile) 193 InfileRoot = FileName 194 195 ChainIDs = RetrieveChainIDs(Infile, InfileRoot) 196 if not len(ChainIDs): 197 if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): 198 MiscUtil.PrintError( 199 "The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file." 200 % (OptionsInfo["AlignMode"]) 201 ) 202 203 InfilesInfo["InfilesNames"].append(Infile) 204 InfilesInfo["InfilesRoots"].append(InfileRoot) 205 InfilesInfo["ChainIDs"].append(ChainIDs) 206 207 InfilesInfo["PyMOLObjectNames"].append(InfileRoot) 208 209 # Setup outfile name... 210 Outfile = "%s%s.pdb" % (InfileRoot, OutSuffix) 211 InfilesInfo["OutfilesNames"].append(Outfile) 212 if os.path.exists(Outfile): 213 if not OptionsInfo["Overwrite"]: 214 MiscUtil.PrintError( 215 'The output file, %s, already exist. Use option "--ov" or "--overwrite" and try again.\n' 216 % (Outfile) 217 ) 218 219 OptionsInfo["InfilesInfo"] = InfilesInfo 220 221 222 def RetrieveRefFileInfo(): 223 """Retrieve information for ref file.""" 224 225 RefFileInfo = {} 226 227 RefFile = OptionsInfo["RefFileName"] 228 229 FileDir, FileName, FileExt = MiscUtil.ParseFileName(RefFile) 230 RefFileRoot = FileName 231 232 if re.match("^FirstInputFile$", OptionsInfo["AlignRefFile"], re.I): 233 ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][0] 234 else: 235 MiscUtil.PrintInfo("\nRetrieving chains information for alignment reference file %s..." % RefFile) 236 ChainIDs = RetrieveChainIDs(RefFile, RefFileRoot) 237 if not len(ChainIDs): 238 if re.match("^FirstChain$", OptionsInfo["AlignMode"], re.I): 239 MiscUtil.PrintError( 240 "The align mode, %s, can't be used for aligning chains: No non-empty chain IDs found in input file." 241 % (OptionsInfo["AlignMode"]) 242 ) 243 244 RefFileInfo["RefFileName"] = RefFile 245 RefFileInfo["RefFileRoot"] = RefFileRoot 246 RefFileInfo["PyMOLObjectName"] = "AlignRef_%s" % RefFileRoot 247 RefFileInfo["ChainIDs"] = ChainIDs 248 249 OptionsInfo["RefFileInfo"] = RefFileInfo 250 251 252 def RetrieveChainIDs(Infile, InfileRoot): 253 """Retrieve chains IDs for an input file.""" 254 255 pymol.cmd.reinitialize() 256 257 MolName = InfileRoot 258 pymol.cmd.load(Infile, MolName) 259 260 ChainIDs = PyMOLUtil.GetChains(MolName) 261 pymol.cmd.delete(MolName) 262 263 if ChainIDs is None: 264 ChainIDs = [] 265 266 # Print out chain and ligand IDs... 267 ChainInfo = ", ".join(ChainIDs) if len(ChainIDs) else "None" 268 MiscUtil.PrintInfo("Chain IDs: %s" % ChainInfo) 269 270 return ChainIDs 271 272 273 def RetrieveFirstChainID(FileIndex): 274 """Get first chain ID.""" 275 276 ChainIDs = OptionsInfo["InfilesInfo"]["ChainIDs"][FileIndex] 277 278 FirstChainID = None 279 if len(ChainIDs): 280 FirstChainID = ChainIDs[0] 281 282 return FirstChainID 283 284 285 def ProcessOptions(): 286 """Process and validate command line arguments and options.""" 287 288 MiscUtil.PrintInfo("Processing options...") 289 290 # Validate options... 291 ValidateOptions() 292 293 OptionsInfo["AlignMethod"] = Options["--alignMethod"].lower() 294 OptionsInfo["AlignMode"] = Options["--alignMode"] 295 296 OptionsInfo["Infiles"] = Options["--infiles"] 297 OptionsInfo["InfilesNames"] = Options["--infileNames"] 298 299 OptionsInfo["AlignRefFile"] = Options["--alignRefFile"] 300 if re.match("^FirstInputFile$", Options["--alignRefFile"], re.I): 301 OptionsInfo["RefFileName"] = OptionsInfo["InfilesNames"][0] 302 else: 303 OptionsInfo["RefFileName"] = Options["--alignRefFile"] 304 305 OptionsInfo["OutSuffix"] = Options["--outSuffix"] 306 307 OptionsInfo["Overwrite"] = Options["--overwrite"] 308 309 RetrieveInfilesInfo() 310 RetrieveRefFileInfo() 311 312 313 def RetrieveOptions(): 314 """Retrieve command line arguments and options.""" 315 316 # Get options... 317 global Options 318 Options = docopt(_docoptUsage_) 319 320 # Set current working directory to the specified directory... 321 WorkingDir = Options["--workingdir"] 322 if WorkingDir: 323 os.chdir(WorkingDir) 324 325 # Handle examples option... 326 if "--examples" in Options and Options["--examples"]: 327 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 328 sys.exit(0) 329 330 331 def ValidateOptions(): 332 """Validate option values.""" 333 334 MiscUtil.ValidateOptionTextValue("--alignMethod", Options["--alignMethod"], "align cealign super") 335 MiscUtil.ValidateOptionTextValue("--alignMode", Options["--alignMode"], "FirstChain Complex") 336 337 # Expand infiles to handle presence of multiple input files... 338 InfileNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",") 339 if len(InfileNames) < 2: 340 MiscUtil.PrintError( 341 'Number of input files specified for "-i, --infiles" option, %d, must be greater than 2...' 342 % (len(InfileNames)) 343 ) 344 345 # Validate file extensions... 346 for Infile in InfileNames: 347 MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile) 348 MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif") 349 Options["--infileNames"] = InfileNames 350 351 if not re.match("^FirstInputFile$", Options["--alignRefFile"], re.I): 352 AlignRefFile = Options["--alignRefFile"] 353 MiscUtil.ValidateOptionFilePath("--alignRefFile", AlignRefFile) 354 MiscUtil.ValidateOptionFileExt("--alignRefFile", AlignRefFile, "pdb cif") 355 356 357 # Setup a usage string for docopt... 358 _docoptUsage_ = """ 359 PyMOLAlignChains.py - Align chains 360 361 Usage: 362 PyMOLAlignChains.py [--alignMethod <align, cealign, super>] 363 [--alignMode <FirstChain or Complex>] [--alignRefFile <filename>] 364 [--outSuffix <text>] [--overwrite] [-w <dir>] -i <infile1,infile2,infile3...> 365 PyMOLAlignChains.py -h | --help | -e | --examples 366 367 Description: 368 Align chains in input files to a reference file and write out aligned files. 369 370 The supported input and output file format are: PDB (.pdb), CIF(.cif) 371 372 The names of the aligned output files are automatically generated from the 373 names of input as shown below: 374 375 <InfileRoot><OutSuffux>.pdb 376 Default: <InfileRoot>_Aligned.pdb 377 378 Options: 379 -a, --alignMethod <align, cealign, super> [default: super] 380 Alignment methodology to use for aligning input files to a 381 reference file. 382 --alignMode <FirstChain or Complex> [default: FirstChain] 383 Portion of input and reference files to use for spatial alignment of 384 input files against reference file. Possible values: FirstChain or 385 Complex. 386 387 The FirstChain mode allows alignment of the first chain in each input 388 file to the first chain in the reference file along with moving the rest 389 of the complex to coordinate space of the reference file. The complete 390 complex in each input file is aligned to the complete complex in reference 391 file for the Complex mode. 392 --alignRefFile <filename> [default: FirstInputFile] 393 Reference input file name. The default is to use the first input file 394 name specified using '-i, --infiles' option. 395 -e, --examples 396 Print examples. 397 -h, --help 398 Print this help message. 399 -i, --infiles <infile1,infile2,...> 400 A comma delimited list of input files. The wildcards are also allowed 401 in file names. 402 --outSuffix <text> [default: _Aligned] 403 Suffix to append to input file root for generating name of output file. 404 --overwrite 405 Overwrite existing files. 406 -w, --workingdir <dir> 407 Location of working directory which defaults to the current directory. 408 409 Examples: 410 To align first chain in all input files to the first chain in first input file 411 and write out aligned output files, type: 412 413 % PyMOLAlignChains.py -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 414 415 To align first chain in all input files to the first chain in specific reference 416 file and write out aligned output files, type: 417 418 % PyMOLAlignChains.py --alignRefFile Sample5.pdb 419 -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 420 421 To align first chain in all input files to the first chain in first input file 422 using a specific alignment method and write out aligned output files 423 with specific suffix in names, type: 424 425 % PyMOLAlignChains.py --alignMethod cealign --outSuffix "_aligned" 426 -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 427 428 To align all chains in each input files to all chains in first input file and 429 write out aligned output files, type: 430 431 % PyMOLAlignChains.py --alignMode Complex 432 -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 433 434 Author: 435 Manish Sud(msud@san.rr.com) 436 437 See also: 438 PyMOLCalculateRMSD.py, PyMOLSplitChainsAndLigands.py, 439 PyMOLVisualizeMacromolecules.py 440 441 Copyright: 442 Copyright (C) 2025 Manish Sud. All rights reserved. 443 444 The functionality available in this script is implemented using PyMOL, a 445 molecular visualization system on an open source foundation originally 446 developed by Warren DeLano. 447 448 This file is part of MayaChemTools. 449 450 MayaChemTools is free software; you can redistribute it and/or modify it under 451 the terms of the GNU Lesser General Public License as published by the Free 452 Software Foundation; either version 3 of the License, or (at your option) any 453 later version. 454 455 """ 456 457 if __name__ == "__main__": 458 main()