1 #!/bin/env python 2 # 3 # File: PyMOLCalculateProperties.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2026 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using PyMOL, a 9 # molecular visualization system on an open source foundation originally 10 # developed by Warren DeLano. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 import os 33 import sys 34 import time 35 import re 36 37 # PyMOL imports... 38 try: 39 import pymol 40 41 # Finish launching PyMOL in a command line mode for batch processing (-c) 42 # along with the following options: disable loading of pymolrc and plugins (-k); 43 # suppress start up messages (-q) 44 pymol.finish_launching(["pymol", "-ckq"]) 45 except ImportError as ErrMsg: 46 sys.stderr.write("\nFailed to import PyMOL module/package: %s\n" % ErrMsg) 47 sys.stderr.write("Check/update your PyMOL environment and try again.\n\n") 48 sys.exit(1) 49 50 # MayaChemTools imports... 51 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 52 try: 53 from docopt import docopt 54 import MiscUtil 55 import PyMOLUtil 56 except ImportError as ErrMsg: 57 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 58 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 59 sys.exit(1) 60 61 ScriptName = os.path.basename(sys.argv[0]) 62 Options = {} 63 OptionsInfo = {} 64 65 66 def main(): 67 """Start execution of the script.""" 68 69 MiscUtil.PrintInfo( 70 "\n%s (PyMOL v%s; MayaChemTools v%s; %s): Starting...\n" 71 % (ScriptName, pymol.cmd.get_version()[0], MiscUtil.GetMayaChemToolsVersion(), time.asctime()) 72 ) 73 74 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 75 76 # Retrieve command line arguments and options... 77 RetrieveOptions() 78 79 # Process and validate command line arguments and options... 80 ProcessOptions() 81 82 # Perform actions required by the script... 83 CalculatePhysicochemicalProperties() 84 85 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 86 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 87 88 89 def CalculatePhysicochemicalProperties(): 90 """Calculate physicochemical properties for macromolecules.""" 91 92 Outfile = OptionsInfo["Outfile"] 93 OutDelim = OptionsInfo["OutDelim"] 94 95 MiscUtil.PrintInfo("\nGenerating file %s...\n" % Outfile) 96 OutFH = open(Outfile, "w") 97 if OutFH is None: 98 MiscUtil.PrintError("Couldn't open output file: %s.\n" % (Outfile)) 99 100 WriteColumnLabels(OutFH, OutDelim) 101 102 InfilesInfo = OptionsInfo["InfilesInfo"] 103 for FileIndex in range(0, len(InfilesInfo["InfilesNames"])): 104 MiscUtil.PrintInfo( 105 "Calculating properties for input file %s..." % OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex] 106 ) 107 108 LoadInfile(FileIndex) 109 AddHydrogens(FileIndex) 110 111 CalculatedValues = CalculatePropertyValues(FileIndex) 112 WriteCalculatedValues(FileIndex, OutFH, OutDelim, CalculatedValues) 113 114 # Delete MolName object 115 DeleteInfileObject(FileIndex) 116 117 OutFH.close() 118 119 120 def CalculatePropertyValues(FileIndex): 121 """Calculate property values.""" 122 123 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 124 125 CalculatedValues = [] 126 for PropertyName in OptionsInfo["SpecifiedPropertyNames"]: 127 PropertyValue = GetFormattedPropertyValue(MolName, PropertyName) 128 CalculatedValues.append(PropertyValue) 129 130 return CalculatedValues 131 132 133 def GetFormattedPropertyValue(Selection, Name): 134 """Calculate and return a formatted property value.""" 135 136 Quiet = OptionsInfo["Quiet"] 137 Precision = OptionsInfo["Precision"] 138 139 Value = None 140 if re.match("^CenterOfMass$", Name, re.I): 141 Value = PyMOLUtil.CalculateCenterOfMass(Selection, Quiet) 142 elif re.match("^MolecularWeight$", Name, re.I): 143 Value = pymol.util.compute_mass(Selection, implicit=False, quiet=Quiet) 144 elif re.match("^MolecularSurfaceArea$", Name, re.I): 145 Value = pymol.util.get_area(Selection, -1, 0, quiet=Quiet) 146 elif re.match("^SumOfFormalCharges$", Name, re.I): 147 Value = pymol.util.sum_formal_charges(Selection, quiet=Quiet) 148 elif re.match("^SumOfPartialCharges$", Name, re.I): 149 Value = pymol.util.sum_partial_charges(Selection, quiet=Quiet) 150 elif re.match("^SolventAccessibleSurfaceArea$", Name, re.I): 151 Value = pymol.util.get_sasa(Selection, quiet=Quiet) 152 else: 153 MiscUtil.PrintError('The property name specified, %s, using "-m, --mode" option is not a valid name.' % Name) 154 155 if Value is None: 156 FormattedValue = "NA" 157 else: 158 if type(Value) is list: 159 FormattedValues = [] 160 for ListElement in Value: 161 FormattedListElement = "%.*f" % (Precision, ListElement) 162 FormattedValues.append(FormattedListElement) 163 FormattedValue = " ".join(FormattedValues) 164 else: 165 FormattedValue = "%.*f" % (Precision, Value) 166 167 return FormattedValue 168 169 170 def WriteCalculatedValues(FileIndex, OutFH, OutDelim, CalculatedValues): 171 """Write out calculated values.""" 172 173 PDBID = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 174 LineWords = [PDBID] 175 176 LineWords.extend(CalculatedValues) 177 178 Line = OutDelim.join(LineWords) 179 OutFH.write("%s\n" % Line) 180 181 182 def WriteColumnLabels(OutFH, OutDelim): 183 """Write out column labels.""" 184 185 ColLabels = [] 186 187 ColLabels = ["PDBID"] 188 ColLabels.extend(OptionsInfo["SpecifiedPropertyNames"]) 189 190 Line = OutDelim.join(ColLabels) 191 OutFH.write("%s\n" % Line) 192 193 194 def LoadInfile(FileIndex): 195 """Load a file.""" 196 197 Infile = OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex] 198 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 199 200 ChainSelections = OptionsInfo["InfilesInfo"]["ChainSelections"][FileIndex] 201 NonChainSelections = OptionsInfo["NonChainSelections"] 202 203 if ChainSelections is None and NonChainSelections is None: 204 pymol.cmd.load(Infile, MolName) 205 return 206 207 TmpMolName = "Tmp%s" % MolName 208 pymol.cmd.load(Infile, TmpMolName) 209 210 MolSelections = [] 211 MolSelections.append(TmpMolName) 212 if ChainSelections is not None: 213 MolSelections.append(ChainSelections) 214 if NonChainSelections is not None: 215 MolSelections.append(NonChainSelections) 216 217 MolSelection = " and ".join(MolSelections) 218 MolSelection = "(%s)" % MolSelection 219 pymol.cmd.create(MolName, MolSelection) 220 221 pymol.cmd.delete(TmpMolName) 222 223 224 def DeleteInfileObject(FileIndex): 225 """Delete PyMOL object.""" 226 227 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 228 229 pymol.cmd.delete(MolName) 230 231 232 def AddHydrogens(FileIndex): 233 """Add hydrogens.""" 234 235 if not OptionsInfo["Addhydrogens"]: 236 return 237 238 MolName = OptionsInfo["InfilesInfo"]["InfilesRoots"][FileIndex] 239 240 pymol.cmd.h_add(MolName) 241 pymol.cmd.sort("%s extend 1" % MolName) 242 243 244 def ProcessSpecifiedPropertyNames(): 245 """Process specified property names.""" 246 247 PropertyNames = RetrievePropertyNames() 248 249 OptionsInfo["SpecifiedPropertyNames"] = [] 250 251 SpecifiedNames = re.sub(" ", "", OptionsInfo["Mode"]) 252 if not SpecifiedNames: 253 MiscUtil.PrintError('No valid property names specifed using "-m, --mode" option') 254 255 if re.match("^All$", SpecifiedNames, re.I): 256 OptionsInfo["SpecifiedPropertyNames"] = PropertyNames 257 return 258 259 # Validate propery names... 260 CanonicalPropertyNamesMap = {} 261 for Name in PropertyNames: 262 CanonicalPropertyNamesMap[Name.lower()] = Name 263 264 SpecifiedNamesWords = SpecifiedNames.split(",") 265 for Name in SpecifiedNamesWords: 266 CanonicalName = Name.lower() 267 if CanonicalName not in CanonicalPropertyNamesMap: 268 MiscUtil.PrintError( 269 'The property name specified, %s, using "-m, --mode" option is not a valid name.' % Name 270 ) 271 272 PropertyName = CanonicalPropertyNamesMap[CanonicalName] 273 OptionsInfo["SpecifiedPropertyNames"].append(PropertyName) 274 275 276 def ProcessListPropertyNames(): 277 """List available property names.""" 278 279 PropertyNames = RetrievePropertyNames() 280 281 MiscUtil.PrintInfo("\nListing available property names...") 282 Delimiter = ", " 283 MiscUtil.PrintInfo("\n%s" % (Delimiter.join(PropertyNames))) 284 285 MiscUtil.PrintInfo("") 286 287 288 def RetrievePropertyNames(): 289 """Retrieve available property names.""" 290 291 PropertyNames = [ 292 "CenterOfMass", 293 "MolecularWeight", 294 "MolecularSurfaceArea", 295 "SumOfFormalCharges", 296 "SumOfPartialCharges", 297 "SolventAccessibleSurfaceArea", 298 ] 299 300 return PropertyNames 301 302 303 def RetrieveInfilesInfo(): 304 """Retrieve information for input files.""" 305 306 InfilesInfo = {} 307 308 InfilesInfo["InfilesNames"] = [] 309 InfilesInfo["InfilesRoots"] = [] 310 InfilesInfo["ChainsAndLigandsInfo"] = [] 311 312 for Infile in OptionsInfo["InfilesNames"]: 313 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Infile) 314 InfileRoot = FileName 315 ChainsAndLigandInfo = PyMOLUtil.GetChainsAndLigandsInfo(Infile, InfileRoot) 316 317 InfilesInfo["InfilesNames"].append(Infile) 318 InfilesInfo["InfilesRoots"].append(InfileRoot) 319 InfilesInfo["ChainsAndLigandsInfo"].append(ChainsAndLigandInfo) 320 321 OptionsInfo["InfilesInfo"] = InfilesInfo 322 323 324 def ProcessChainIDs(): 325 """Process specified chain IDs for infiles.""" 326 327 OptionsInfo["InfilesInfo"]["SpecifiedChainsAndLigandsInfo"] = [] 328 OptionsInfo["InfilesInfo"]["ChainSelections"] = [] 329 330 for FileIndex in range(0, len(OptionsInfo["InfilesInfo"]["InfilesNames"])): 331 MiscUtil.PrintInfo( 332 "\nProcessing specified chain IDs for input file %s..." 333 % OptionsInfo["InfilesInfo"]["InfilesNames"][FileIndex] 334 ) 335 336 ChainsAndLigandsInfo = OptionsInfo["InfilesInfo"]["ChainsAndLigandsInfo"][FileIndex] 337 SpecifiedChainsAndLigandsInfo = PyMOLUtil.ProcessChainsAndLigandsOptionsInfo( 338 ChainsAndLigandsInfo, "-c, --chainIDs", OptionsInfo["ChainIDs"], None, None 339 ) 340 341 # Setup chain selections... 342 ChainSelections = None 343 if not OptionsInfo["AllChains"]: 344 Chains = [] 345 for ChainID in SpecifiedChainsAndLigandsInfo["ChainIDs"]: 346 Chains.append("chain %s" % ChainID) 347 ChainSelections = " or ".join(Chains) 348 ChainSelections = "(%s)" % ChainSelections 349 350 OptionsInfo["InfilesInfo"]["SpecifiedChainsAndLigandsInfo"].append(SpecifiedChainsAndLigandsInfo) 351 OptionsInfo["InfilesInfo"]["ChainSelections"].append(ChainSelections) 352 353 MiscUtil.PrintInfo("Specified chain IDs: %s" % (", ".join(SpecifiedChainsAndLigandsInfo["ChainIDs"]))) 354 355 356 def ProcessKeepSelectionOptions(): 357 """Process keep selection options.""" 358 359 KeepSelections = [] 360 if not OptionsInfo["KeepSolvents"]: 361 KeepSelections.append("(not solvent)") 362 if not OptionsInfo["KeepInorganics"]: 363 KeepSelections.append("(not inorganic)") 364 if not OptionsInfo["KeepLigands"]: 365 KeepSelections.append("(not organic)") 366 367 NonChainSelections = None 368 if len(KeepSelections): 369 NonChainSelections = " and ".join(KeepSelections) 370 NonChainSelections = "(%s)" % NonChainSelections 371 372 OptionsInfo["NonChainSelections"] = NonChainSelections 373 374 375 def ProcessOptions(): 376 """Process and validate command line arguments and options.""" 377 378 MiscUtil.PrintInfo("Processing options...") 379 380 # Validate options... 381 ValidateOptions() 382 383 OptionsInfo["Addhydrogens"] = True if re.match("^Yes$", Options["--addHydrogens"], re.I) else False 384 385 OptionsInfo["Infiles"] = Options["--infiles"] 386 OptionsInfo["InfilesNames"] = Options["--infilesNames"] 387 388 OptionsInfo["Outfile"] = Options["--outfile"] 389 OptionsInfo["Overwrite"] = Options["--overwrite"] 390 391 OptionsInfo["OutDelim"] = " " 392 if MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "csv"): 393 OptionsInfo["OutDelim"] = "," 394 elif MiscUtil.CheckFileExt(OptionsInfo["Outfile"], "tsv txt"): 395 OptionsInfo["OutDelim"] = "\t" 396 else: 397 MiscUtil.PrintError( 398 'The file name specified , %s, for option "--outfile" is not valid. Supported file formats: csv tsv txt\n' 399 % (OptionsInfo["Outfile"]) 400 ) 401 402 OptionsInfo["KeepInorganics"] = True if re.match("^Yes$", Options["--keepInorganics"], re.I) else False 403 OptionsInfo["KeepLigands"] = True if re.match("^Yes$", Options["--keepLigands"], re.I) else False 404 OptionsInfo["KeepSolvents"] = True if re.match("^Yes$", Options["--keepSolvents"], re.I) else False 405 ProcessKeepSelectionOptions() 406 407 OptionsInfo["Overwrite"] = Options["--overwrite"] 408 OptionsInfo["Quiet"] = 1 if re.match("^Yes$", Options["--quiet"], re.I) else 0 409 410 OptionsInfo["Precision"] = int(Options["--precision"]) 411 412 OptionsInfo["Mode"] = Options["--mode"] 413 ProcessSpecifiedPropertyNames() 414 415 RetrieveInfilesInfo() 416 OptionsInfo["ChainIDs"] = Options["--chainIDs"] 417 OptionsInfo["AllChains"] = True if re.match("^All$", Options["--chainIDs"], re.I) else False 418 ProcessChainIDs() 419 420 421 def RetrieveOptions(): 422 """Retrieve command line arguments and options.""" 423 424 # Get options... 425 global Options 426 Options = docopt(_docoptUsage_) 427 428 # Set current working directory to the specified directory... 429 WorkingDir = Options["--workingdir"] 430 if WorkingDir: 431 os.chdir(WorkingDir) 432 433 # Handle examples option... 434 if "--examples" in Options and Options["--examples"]: 435 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 436 sys.exit(0) 437 438 # Handle listing of property names... 439 if Options["--list"]: 440 ProcessListPropertyNames() 441 sys.exit(0) 442 443 444 def ValidateOptions(): 445 """Validate option values.""" 446 447 MiscUtil.ValidateOptionTextValue("-a, -addHydrogens", Options["--addHydrogens"], "yes no") 448 MiscUtil.ValidateOptionTextValue("--keepInorganics", Options["--keepInorganics"], "yes no") 449 MiscUtil.ValidateOptionTextValue("--keepLigands", Options["--keepLigands"], "yes no") 450 MiscUtil.ValidateOptionTextValue("--keepSolvents", Options["--keepSolvents"], "yes no") 451 452 # Expand infile names.. 453 InfilesNames = MiscUtil.ExpandFileNames(Options["--infiles"], ",") 454 455 # Validate file extensions... 456 for Infile in InfilesNames: 457 MiscUtil.ValidateOptionFilePath("-i, --infiles", Infile) 458 MiscUtil.ValidateOptionFileExt("-i, --infiles", Infile, "pdb cif") 459 Options["--infilesNames"] = InfilesNames 460 461 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "csv tsv txt") 462 MiscUtil.ValidateOptionsOutputFileOverwrite( 463 "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"] 464 ) 465 466 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 467 MiscUtil.ValidateOptionTextValue("--quiet", Options["--quiet"], "yes no") 468 469 470 # Setup a usage string for docopt... 471 _docoptUsage_ = """ 472 PyMOLCalculateProperties.py - Calculate physicochemical properties 473 474 Usage: 475 PyMOLCalculateProperties.py [--addHydrogens <yes or no>] 476 [--chainIDs <First, All or ID1,ID2...>] [--list] [--keepInorganics <yes or no>] 477 [--keepLigands <yes or no>] [--keepSolvents <yes or no>] 478 [--mode <All or Name1,Name2,Name3,...>] 479 [--overwrite] [--precision <number>] [--quiet <yes or no>] 480 [-w <dir>] -i <infile1,infile2,infile3...> -o <outfile> 481 PyMOLCalculateProperties.py -l | --list 482 PyMOLCalculateProperties.py -h | --help | -e | --examples 483 484 Description: 485 Calculate physicochemical properties for macromolecules. The properties may 486 be calculated for the complete complex or a specified list of chain IDs. Ligands, 487 inorganics, and solvents may be optionally excluded during the calculation 488 of properties. 489 490 The supported input file format are: PDB (.pdb), mmCIF (.cif) 491 492 The supported output file formats are: CSV (.csv), TSV (.tsv, .txt) 493 494 Options: 495 -a, --addHydrogens <yes or no> [default: yes] 496 Add hydrogens before calculating physiochemical properties. 497 -c, --chainIDs <First, All or ID1,ID2...> [default: All] 498 List of chain IDs to use for calculating physicochemical properties. Possible 499 values: First, All, or a comma delimited list of chain IDs. The default is to use 500 all chain IDs in input file. 501 -e, --examples 502 Print examples. 503 -h, --help 504 Print this help message. 505 -i, --infiles <infile1,infile2,infile3...> 506 A comma delimited list of input files. The wildcards are also allowed 507 in file names. 508 --keepInorganics <yes or no> [default: yes] 509 Keep inorganic molecules during calculation of physiochemical properties. 510 The inorganic molecules are identified using inorganic selection operator 511 available in PyMOL. 512 --keepLigands <yes or no> [default: yes] 513 Keep ligand molecules during calculation of physiochemical properties. 514 The ligand molecules are identified using organic selection operator 515 available in PyMOL. 516 --keepSolvents <yes or no> [default: yes] 517 Keep solvent molecules during calculation of physiochemical properties. 518 The solvent molecules are identified using solvent selection operator 519 available in PyMOL. 520 -l, --list 521 List available property names without performing any calculations. 522 -m, --mode <All or Name1,Name2,Name3,...> [default: All] 523 Comma delimited lists of physicochemical properties to calculate. Default: 524 'All'. The following properties may be calculated for macromolecules: 525 526 CenterOfMass,MolecularWeight,MolecularSurfaceArea 527 SumOfFormalCharges,SumOfPartialCharges,SolventAccessibleSurfaceArea 528 529 -o, --outfile <outfile> 530 Output file name for writing out calculated values. Supported text file extensions: 531 csv, tsv or txt. 532 --overwrite 533 Overwrite existing files. 534 -p, --precision <number> [default: 3] 535 Floating point precision for writing the calculated property values. 536 -q, --quiet <yes or no> [default: yes] 537 Do not print information during the calculation of properties. 538 -w, --workingdir <dir> 539 Location of working directory which defaults to the current directory. 540 541 Examples: 542 To calculate all available properties for all chains in input file along with all 543 ligands, inorganics and solvents after adding hydrogens and write out a CSV 544 file containing calculated values and PDB IDs, type: 545 546 % PyMOLCalculateProperties.py -i Sample3.pdb -o Sample3Out.csv 547 548 To calculate specified properties for all chains in input file along with all 549 ligands, inorganics and solvents after adding hydrogens and write out a CSV 550 file containing calculated values and PDB IDs, type: 551 552 % PyMOLCalculateProperties.py -m "MolecularWeight,CenterOfMass" 553 -i Sample3.pdb -o Sample3Out.csv 554 555 To calculate all available properties for chain E in input file without including 556 ligands, inorganics and solvents, and addition of hydrogens, and write out a 557 TSV file containing calculated values and PDB IDs, type: 558 559 % PyMOLCalculateProperties.py --addHydrogens no -c E --keepLigands 560 no --keepInorganics no --keepSolvents no -i Sample3.pdb -o 561 Sample3Out.tsv 562 563 To calculate all available properties for all chains in multiple files along with all 564 ligands, inorganics and solvents after adding hydrogens and write out a CSV 565 file containing calculated values and PDB IDs, type: 566 567 % PyMOLCalculateProperties.py -i "Sample3.pdb,Sample4.pdb,Sample5.pdb" 568 -o SampleOut.csv 569 570 Author: 571 Manish Sud(msud@san.rr.com) 572 573 See also: 574 PyMOLCalculateRMSD.py, PyMOLSplitChainsAndLigands.py, 575 PyMOLVisualizeMacromolecules.py 576 577 Copyright: 578 Copyright (C) 2026 Manish Sud. All rights reserved. 579 580 The functionality available in this script is implemented using PyMOL, a 581 molecular visualization system on an open source foundation originally 582 developed by Warren DeLano. 583 584 This file is part of MayaChemTools. 585 586 MayaChemTools is free software; you can redistribute it and/or modify it under 587 the terms of the GNU Lesser General Public License as published by the Free 588 Software Foundation; either version 3 of the License, or (at your option) any 589 later version. 590 591 """ 592 593 if __name__ == "__main__": 594 main()