1 #!/bin/env python 2 # 3 # File: PyMOLCalculateProperties.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 38 # PyMOL imports... 39 try: 40 import pymol 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(['pymol', '-ckq']) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 try: 52 from docopt import docopt 53 import MiscUtil 54 import PyMOLUtil 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 58 sys.exit(1) 59 60 ScriptName = os.path.basename(sys.argv[0]) 61 Options = {} 62 OptionsInfo = {} 63 64 def main(): 65 """Start execution of the script.""" 66 67 MiscUtil.PrintInfo("\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 68 69 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 70 71 # Retrieve command line arguments and options... 72 RetrieveOptions() 73 74 # Process and validate command line arguments and options... 75 ProcessOptions() 76 77 # Perform actions required by the script... 78 CalculatePhysicochemicalProperties() 79 80 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 81 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 82 83 def CalculatePhysicochemicalProperties(): 84 """Calculate physicochemical properties for macromolecules.""" 85 86 Outfile = OptionsInfo["Outfile"] 87 OutDelim = OptionsInfo["OutDelim"] 88 Precision = OptionsInfo["Precision"] 89 90 MiscUtil.PrintInfo("\nGenerating file %s...\n" % Outfile) 91 OutFH = open(Outfile, "w") 92 if OutFH is None: 93 MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile)) 94 95 WriteColumnLabels(OutFH, OutDelim) 96 97 InfilesInfo = OptionsInfo["InfilesInfo"] 98 for FileIndex in range(0, len(InfilesInfo["InfilesNames"])): 99 MiscUtil.PrintInfo("Calculating properties for input file %s..." % OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex]) 100 101 LoadInfile(FileIndex) 102 AddHydrogens(FileIndex) 103 104 CalculatedValues = CalculatePropertyValues(FileIndex) 105 WriteCalculatedValues(FileIndex, OutFH, OutDelim, CalculatedValues) 106 107 # Delete MolName object 108 DeleteInfileObject(FileIndex) 109 110 OutFH.close() 111 112 def CalculatePropertyValues(FileIndex): 113 """Calculate property values.""" 114 115 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 116 Precision = OptionsInfo["Precision"] 117 118 CalculatedValues = [] 119 for PropertyName in OptionsInfo["SpecifiedPropertyNames"]: 120 PropertyValue = GetFormattedPropertyValue(MolName, PropertyName) 121 CalculatedValues.append(PropertyValue) 122 123 return CalculatedValues 124 125 def GetFormattedPropertyValue(Selection, Name): 126 """Calculate and return a formatted property value.""" 127 128 Quiet = OptionsInfo["Quiet"] 129 Precision = OptionsInfo["Precision"] 130 131 Value = None 132 if re.match("^CenterOfMass$", Name, re.I): 133 Value = PyMOLUtil.CalculateCenterOfMass(Selection, Quiet) 134 elif re.match("^MolecularWeight$", Name, re.I): 135 Value = pymol.util.compute_mass(Selection, implicit = False, quiet = Quiet) 136 elif re.match("^MolecularSurfaceArea$", Name, re.I): 137 Value = pymol.util.get_area(Selection, -1, 0, quiet = Quiet) 138 elif re.match("^SumOfFormalCharges$", Name, re.I): 139 Value = pymol.util.sum_formal_charges(Selection, quiet = Quiet) 140 elif re.match("^SumOfPartialCharges$", Name, re.I): 141 Value = pymol.util.sum_partial_charges(Selection, quiet = Quiet) 142 elif re.match("^SolventAccessibleSurfaceArea$", Name, re.I): 143 Value = pymol.util.get_sasa(Selection, quiet = Quiet) 144 else: 145 MiscUtil.PrintError("The property name specified, %s, using \"-m, --mode\" option is not a valid name." % Name) 146 147 if Value is None: 148 FormattedValue = "NA" 149 else: 150 if type(Value) is list: 151 FormattedValues = [] 152 for ListElement in Value: 153 FormattedListElement = "%.*f" % (Precision, ListElement) 154 FormattedValues.append(FormattedListElement) 155 FormattedValue = " ".join(FormattedValues) 156 else: 157 FormattedValue = "%.*f" % (Precision, Value) 158 159 return FormattedValue 160 161 def WriteCalculatedValues(FileIndex, OutFH, OutDelim, CalculatedValues): 162 """Write out calculated values.""" 163 164 PDBID = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 165 LineWords = [PDBID] 166 167 LineWords.extend(CalculatedValues) 168 169 Line = OutDelim.join(LineWords) 170 OutFH.write("%s\n" % Line) 171 172 def WriteColumnLabels(OutFH, OutDelim): 173 """Write out column labels.""" 174 175 ColLabels = [] 176 177 ColLabels = ["PDBID"] 178 ColLabels.extend(OptionsInfo["SpecifiedPropertyNames"]) 179 180 Line = OutDelim.join(ColLabels) 181 OutFH.write("%s\n" % Line) 182 183 def LoadInfile(FileIndex): 184 """Load a file.""" 185 186 Infile = OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex] 187 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 188 189 ChainSelections = OptionsInfo["InfilesInfo"]["ChainSelections"][FileIndex] 190 NonChainSelections = OptionsInfo["NonChainSelections"] 191 192 if ChainSelections is None and NonChainSelections is None: 193 pymol.cmd.load(Infile, MolName) 194 return 195 196 TmpMolName = "Tmp%s" % MolName 197 pymol.cmd.load(Infile, TmpMolName) 198 199 MolSelections = [] 200 MolSelections.append(TmpMolName) 201 if ChainSelections is not None: 202 MolSelections.append(ChainSelections) 203 if NonChainSelections is not None: 204 MolSelections.append(NonChainSelections) 205 206 MolSelection = " and ".join(MolSelections) 207 MolSelection = "(%s)" % MolSelection 208 pymol.cmd.create(MolName, MolSelection) 209 210 pymol.cmd.delete(TmpMolName) 211 212 def DeleteInfileObject(FileIndex): 213 """Delete PyMOL object.""" 214 215 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 216 217 pymol.cmd.delete(MolName) 218 219 def AddHydrogens(FileIndex): 220 """Add hydrogens.""" 221 222 if not OptionsInfo["Addhydrogens"]: 223 return 224 225 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 226 227 pymol.cmd.h_add(MolName) 228 pymol.cmd.sort("%s extend 1" % MolName) 229 230 def ProcessSpecifiedPropertyNames(): 231 """Process specified property names.""" 232 233 PropertyNames = RetrievePropertyNames() 234 235 OptionsInfo["SpecifiedPropertyNames"] = [] 236 237 SpecifiedNames = re.sub(" ", "", OptionsInfo["Mode"]) 238 if not SpecifiedNames: 239 MiscUtil.PrintError("No valid property names specifed using \"-m, --mode\" option") 240 241 if re.match("^All$", SpecifiedNames, re.I): 242 OptionsInfo["SpecifiedPropertyNames"] = PropertyNames 243 return 244 245 # Validate propery names... 246 CanonicalPropertyNamesMap = {} 247 for Name in PropertyNames: 248 CanonicalPropertyNamesMap[Name.lower()] = Name 249 250 SpecifiedNamesWords = SpecifiedNames.split(",") 251 for Name in SpecifiedNamesWords: 252 CanonicalName = Name.lower() 253 if CanonicalName not in CanonicalPropertyNamesMap: 254 MiscUtil.PrintError("The property name specified, %s, using \"-m, --mode\" option is not a valid name." % Name) 255 256 PropertyName = CanonicalPropertyNamesMap[CanonicalName] 257 OptionsInfo["SpecifiedPropertyNames"].append(PropertyName) 258 259 def ProcessListPropertyNames(): 260 """List available property names.""" 261 262 PropertyNames = RetrievePropertyNames() 263 264 MiscUtil.PrintInfo("\nListing available property names...") 265 Delimiter = ", " 266 MiscUtil.PrintInfo("\n%s" % (Delimiter.join(PropertyNames))) 267 268 MiscUtil.PrintInfo("") 269 270 def RetrievePropertyNames(): 271 """Retrieve available property names.""" 272 273 PropertyNames = ["CenterOfMass", "MolecularWeight", "MolecularSurfaceArea", "SumOfFormalCharges", "SumOfPartialCharges", "SolventAccessibleSurfaceArea"] 274 275 return PropertyNames 276 277 def RetrieveInfilesInfo(): 278 """Retrieve information for input files.""" 279 280 InfilesInfo = {} 281 282 InfilesInfo["InfilesNames"] = [] 283 InfilesInfo["InfilesRoots"] = [] 284 InfilesInfo["ChainsAndLigandsInfo"] = [] 285 286 for Infile in OptionsInfo["InfilesNames"]: 287 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Infile) 288 InfileRoot = FileName 289 ChainsAndLigandInfo = PyMOLUtil.GetChainsAndLigandsInfo(Infile, InfileRoot) 290 291 InfilesInfo["InfilesNames"].append(Infile) 292 InfilesInfo["InfilesRoots"].append(InfileRoot) 293 InfilesInfo["ChainsAndLigandsInfo"].append(ChainsAndLigandInfo) 294 295 OptionsInfo["InfilesInfo"] = InfilesInfo 296 297 def ProcessChainIDs(): 298 """Process specified chain IDs for infiles.""" 299 300 OptionsInfo["InfilesInfo"]["SpecifiedChainsAndLigandsInfo"] = [] 301 OptionsInfo["InfilesInfo"]["ChainSelections"] = [] 302 303 for FileIndex in range(0, len(OptionsInfo["InfilesInfo"]["InfilesNames"])): 304 MiscUtil.PrintInfo("\nProcessing specified chain IDs for input file %s..." % OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex]) 305 306 ChainsAndLigandsInfo = OptionsInfo["InfilesInfo"]["ChainsAndLigandsInfo"][FileIndex] 307 SpecifiedChainsAndLigandsInfo = PyMOLUtil.ProcessChainsAndLigandsOptionsInfo(ChainsAndLigandsInfo, "-c, --chainIDs", OptionsInfo["ChainIDs"], None, None) 308 309 # Setup chain selections... 310 ChainSelections = None 311 if not OptionsInfo["AllChains"]: 312 Chains = [] 313 for ChainID in SpecifiedChainsAndLigandsInfo["ChainIDs"]: 314 Chains.append("chain %s" % ChainID) 315 ChainSelections = " or ".join(Chains) 316 ChainSelections = "(%s)" % ChainSelections 317 318 OptionsInfo["InfilesInfo"]["SpecifiedChainsAndLigandsInfo"].append(SpecifiedChainsAndLigandsInfo) 319 OptionsInfo["InfilesInfo"]["ChainSelections"].append(ChainSelections) 320 321 MiscUtil.PrintInfo("Specified chain IDs: %s" % (", ".join(SpecifiedChainsAndLigandsInfo["ChainIDs"]))) 322 323 def ProcessKeepSelectionOptions(): 324 """Process keep selection options.""" 325 326 KeepSelections = [] 327 if not OptionsInfo["KeepSolvents"]: 328 KeepSelections.append("(not solvent)") 329 if not OptionsInfo["KeepInorganics"]: 330 KeepSelections.append("(not inorganic)") 331 if not OptionsInfo["KeepLigands"]: 332 KeepSelections.append("(not organic)") 333 334 NonChainSelections = None 335 if len(KeepSelections): 336 NonChainSelections = " and ".join(KeepSelections) 337 NonChainSelections = "(%s)" % NonChainSelections 338 339 OptionsInfo["NonChainSelections"] = NonChainSelections 340 341 def ProcessOptions(): 342 """Process and validate command line arguments and options.""" 343 344 MiscUtil.PrintInfo("Processing options...") 345 346 # Validate options... 347 ValidateOptions() 348 349 OptionsInfo["Addhydrogens"] = True if re.match("^Yes$", Options["--addHydrogens"], re.I) else False 350 351 OptionsInfo["Infiles"] = Options["--infiles"] 352 OptionsInfo["InfilesNames"] = Options["--infilesNames"] 353 354 OptionsInfo["Outfile"] = Options["--outfile"] 355 OptionsInfo["Overwrite"] = Options["--overwrite"] 356 357 OptionsInfo["OutDelim"] = " " 358 if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"): 359 OptionsInfo["OutDelim"] = "," 360 elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"): 361 OptionsInfo["OutDelim"] = "\t" 362 else: 363 MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid. Supported file formats: csv tsv txt\n" % (OptionsInfo["Outfile"])) 364 365 OptionsInfo["KeepInorganics"] = True if re.match("^Yes$", Options["--keepInorganics"], re.I) else False 366 OptionsInfo["KeepLigands"] = True if re.match("^Yes$", Options["--keepLigands"], re.I) else False 367 OptionsInfo["KeepSolvents"] = True if re.match("^Yes$", Options["--keepSolvents"], re.I) else False 368 ProcessKeepSelectionOptions() 369 370 OptionsInfo["Overwrite"] = Options["--overwrite"] 371 OptionsInfo["Quiet"] = 1 if re.match("^Yes$", Options["--quiet"], re.I) else 0 372 373 OptionsInfo["Precision"] = int(Options["--precision"]) 374 375 OptionsInfo["Mode"] = Options["--mode"] 376 ProcessSpecifiedPropertyNames() 377 378 RetrieveInfilesInfo() 379 OptionsInfo["ChainIDs"] = Options["--chainIDs"] 380 OptionsInfo["AllChains"] = True if re.match("^All$", Options["--chainIDs"], re.I) else False 381 ProcessChainIDs() 382 383 def RetrieveOptions(): 384 """Retrieve command line arguments and options.""" 385 386 # Get options... 387 global Options 388 Options = docopt(_docoptUsage_) 389 390 # Set current working directory to the specified directory... 391 WorkingDir = Options["--workingdir"] 392 if WorkingDir: 393 os.chdir(WorkingDir) 394 395 # Handle examples option... 396 if "--examples" in Options and Options["--examples"]: 397 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 398 sys.exit(0) 399 400 # Handle listing of property names... 401 if Options["--list"]: 402 ProcessListPropertyNames() 403 sys.exit(0) 404 405 def ValidateOptions(): 406 """Validate option values.""" 407 408 MiscUtil.ValidateOptionTextValue("-a, -addHydrogens", Options["--addHydrogens"], "yes no") 409 MiscUtil.ValidateOptionTextValue("--keepInorganics", Options["--keepInorganics"], "yes no") 410 MiscUtil.ValidateOptionTextValue("--keepLigands", Options["--keepLigands"], "yes no") 411 MiscUtil.ValidateOptionTextValue("--keepSolvents", Options["--keepSolvents"], "yes no") 412 413 # Expand infile names.. 414 InfilesNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",") 415 416 # Validate file extensions... 417 for Infile in InfilesNames: 418 MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile) 419 MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif") 420 Options["--infilesNames"] = InfilesNames 421 422 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "csv tsv txt") 423 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 424 425 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 426 MiscUtil.ValidateOptionTextValue("--quiet", Options["--quiet"], "yes no") 427 428 # Setup a usage string for docopt... 429 _docoptUsage_ = """ 430 PyMOLCalculateProperties.py - Calculate physicochemical properties 431 432 Usage: 433 PyMOLCalculateProperties.py [--addHydrogens <yes or no>] 434 [--chainIDs <First, All or ID1,ID2...>] [--list] [--keepInorganics <yes or no>] 435 [--keepLigands <yes or no>] [--keepSolvents <yes or no>] 436 [--mode <All or Name1,Name2,Name3,...>] 437 [--overwrite] [--precision <number>] [--quiet <yes or no>] 438 [-w <dir>] -i <infile1,infile2,infile3...> -o <outfile> 439 PyMOLCalculateProperties.py -l | --list 440 PyMOLCalculateProperties.py -h | --help | -e | --examples 441 442 Description: 443 Calculate physicochemical properties for macromolecules. The properties may 444 be calculated for the complete complex or a specified list of chain IDs. Ligands, 445 inorganics, and solvents may be optionally excluded during the calculation 446 of properties. 447 448 The supported input file format are: PDB (.pdb), mmCIF (.cif) 449 450 The supported output file formats are: CSV (.csv), TSV (.tsv, .txt) 451 452 Options: 453 -a, --addHydrogens <yes or no> [default: yes] 454 Add hydrogens before calculating physiochemical properties. 455 -c, --chainIDs <First, All or ID1,ID2...> [default: All] 456 List of chain IDs to use for calculating physicochemical properties. Possible 457 values: First, All, or a comma delimited list of chain IDs. The default is to use 458 all chain IDs in input file. 459 -e, --examples 460 Print examples. 461 -h, --help 462 Print this help message. 463 -i, --infiles <infile1,infile2,infile3...> 464 A comma delimited list of input files. The wildcards are also allowed 465 in file names. 466 --keepInorganics <yes or no> [default: yes] 467 Keep inorganic molecules during calculation of physiochemical properties. 468 The inorganic molecules are identified using inorganic selection operator 469 available in PyMOL. 470 --keepLigands <yes or no> [default: yes] 471 Keep ligand molecules during calculation of physiochemical properties. 472 The ligand molecules are identified using organic selection operator 473 available in PyMOL. 474 --keepSolvents <yes or no> [default: yes] 475 Keep solvent molecules during calculation of physiochemical properties. 476 The solvent molecules are identified using solvent selection operator 477 available in PyMOL. 478 -l, --list 479 List available property names without performing any calculations. 480 -m, --mode <All or Name1,Name2,Name3,...> [default: All] 481 Comma delimited lists of physicochemical properties to calculate. Default: 482 'All'. The following properties may be calculated for macromolecules: 483 484 CenterOfMass,MolecularWeight,MolecularSurfaceArea 485 SumOfFormalCharges,SumOfPartialCharges,SolventAccessibleSurfaceArea 486 487 -o, --outfile <outfile> 488 Output file name for writing out calculated values. Supported text file extensions: 489 csv, tsv or txt. 490 --overwrite 491 Overwrite existing files. 492 -p, --precision <number> [default: 3] 493 Floating point precision for writing the calculated property values. 494 -q, --quiet <yes or no> [default: yes] 495 Do not print information during the calculation of properties. 496 -w, --workingdir <dir> 497 Location of working directory which defaults to the current directory. 498 499 Examples: 500 To calculate all available properties for all chains in input file along with all 501 ligands, inorganics and solvents after adding hydrogens and write out a CSV 502 file containing calculated values and PDB IDs, type: 503 504 % PyMOLCalculateProperties.py -i Sample3.pdb -o Sample3Out.csv 505 506 To calculate specified properties for all chains in input file along with all 507 ligands, inorganics and solvents after adding hydrogens and write out a CSV 508 file containing calculated values and PDB IDs, type: 509 510 % PyMOLCalculateProperties.py -m "MolecularWeight,CenterOfMass" 511 -i Sample3.pdb -o Sample3Out.csv 512 513 To calculate all available properties for chain E in input file without including 514 ligands, inorganics and solvents, and addition of hydrogens, and write out a 515 TSV file containing calculated values and PDB IDs, type: 516 517 % PyMOLCalculateProperties.py --addHydrogens no -c E --keepLigands 518 no --keepInorganics no --keepSolvents no -i Sample3.pdb -o 519 Sample3Out.tsv 520 521 To calculate all available properties for all chains in multiple files along with all 522 ligands, inorganics and solvents after adding hydrogens and write out a CSV 523 file containing calculated values and PDB IDs, type: 524 525 % PyMOLCalculateProperties.py -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 526 -o SampleOut.csv 527 528 Author: 529 Manish Sud(msud@san.rr.com) 530 531 See also: 532 PyMOLCalculateRMSD.py, PyMOLSplitChainsAndLigands.py, 533 PyMOLVisualizeMacromolecules.py 534 535 Copyright: 536 Copyright (C) 2024 Manish Sud. All rights reserved. 537 538 The functionality available in this script is implemented using PyMOL, a 539 molecular visualization system on an open source foundation originally 540 developed by Warren DeLano. 541 542 This file is part of MayaChemTools. 543 544 MayaChemTools is free software; you can redistribute it and/or modify it under 545 the terms of the GNU Lesser General Public License as published by the Free 546 Software Foundation; either version 3 of the License, or (at your option) any 547 later version. 548 549 """ 550 551 if __name__ == "__main__": 552 main()