MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: OpenMMPrepareMacromolecule.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2025 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using OpenMM, an
   9 # open source molecuar simulation package.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 
  37 # OpenMM imports...
  38 try:
  39     import openmm as mm
  40     import pdbfixer
  41     from pdbfixer.pdbfixer import __version__ as pdbfixerversion
  42 except ImportError as ErrMsg:
  43     sys.stderr.write("\nFailed to import OpenMM module/package: %s\n" % ErrMsg)
  44     sys.stderr.write("Check/update your OpenMM environment and try again.\n\n")
  45     sys.exit(1)
  46 
  47 # MayaChemTools imports...
  48 try:
  49     from docopt import docopt
  50     import MiscUtil
  51 except ImportError as ErrMsg:
  52     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  53     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  54     sys.exit(1)
  55 
  56 ScriptName = os.path.basename(sys.argv[0])
  57 Options = {}
  58 OptionsInfo = {}
  59 
  60 def main():
  61     """Start execution of the script."""
  62     
  63     MiscUtil.PrintInfo("\n%s (OpenMM v%s; PDBFixerVersion v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, mm.Platform.getOpenMMVersion(), pdbfixerversion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  64     
  65     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  66     
  67     # Retrieve command line arguments and options...
  68     RetrieveOptions()
  69     
  70     # Process and validate command line arguments and options...
  71     ProcessOptions()
  72 
  73     # Perform actions required by the script...
  74     PrepareMacromolecule()
  75     
  76     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  77     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  78 
  79 def PrepareMacromolecule():
  80     """Prepare a macromolecule for simulation and write it out."""
  81 
  82     # Read macromolecule...
  83     PDBFixerObjectHandle = ReadMacromolecule()
  84 
  85     # Identify missing residues...
  86     IdentifyMissingResidues(PDBFixerObjectHandle)
  87 
  88     # Identify and replace non-standard residues...
  89     IdentifyAndReplaceNonStandardResidues(PDBFixerObjectHandle)
  90 
  91     # Identify and delete heterogen residues...
  92     IdentifyAndDeleteHeterogenResidues(PDBFixerObjectHandle)
  93 
  94     # Identify and add missing atoms...
  95     IdentifyAndAddMissingAtoms(PDBFixerObjectHandle)
  96 
  97     if OptionsInfo["WaterBox"]:
  98         AddWaterBox(PDBFixerObjectHandle)
  99     elif OptionsInfo["Membrane"]:
 100         AddLipidMembrane(PDBFixerObjectHandle)
 101     else:
 102         MiscUtil.PrintInfo("\nSkipping addtion of water box or membrane...")
 103     
 104     # Write macromolecule...
 105     WriteMacromolecule(PDBFixerObjectHandle)
 106 
 107 def ReadMacromolecule():
 108     """Read macromolecule."""
 109 
 110     Infile = OptionsInfo["Infile"]
 111     MiscUtil.PrintInfo("\nProcessing file %s..." % Infile)
 112     
 113     PDBFixerObjectHandle = pdbfixer.pdbfixer.PDBFixer(filename = Infile)
 114 
 115     return PDBFixerObjectHandle
 116 
 117 def WriteMacromolecule(PDBFixerObjectHandle):
 118     """Write macromolecule. """
 119     
 120     Outfile = OptionsInfo["Outfile"]
 121     MiscUtil.PrintInfo("\nGenerating output file %s..." % Outfile)
 122 
 123     OutFH = open(Outfile, "w")
 124     
 125     if PDBFixerObjectHandle.source is not None:
 126         OutFH.write("REMARK   1 PDBFIXER FROM: %s\n" % PDBFixerObjectHandle.source)
 127     
 128     mm.app.PDBFile.writeFile(PDBFixerObjectHandle.topology, PDBFixerObjectHandle.positions, OutFH, keepIds = True)
 129     
 130     OutFH.close()
 131 
 132 def IdentifyMissingResidues(PDBFixerObjectHandle):
 133     """Identify missing residues based on PDB REMARK records."""
 134     
 135     MiscUtil.PrintInfo("\nIdentifying missing residues...")
 136     
 137     PDBFixerObjectHandle.missingResidues = {}
 138     PDBFixerObjectHandle.findMissingResidues()
 139 
 140     ListMissingResidues(PDBFixerObjectHandle)
 141 
 142 def ListMissingResidues(PDBFixerObjectHandle):
 143     """List missing residues."""
 144     
 145     if len(PDBFixerObjectHandle.missingResidues) == 0:
 146         MiscUtil.PrintInfo("Number of missing residues: 0")
 147         return
 148 
 149     Chains = list(PDBFixerObjectHandle.topology.chains())
 150     
 151     MissingResiduesCount = 0
 152     MissingResiduesInfo = []
 153     for Index, Key in enumerate(sorted(PDBFixerObjectHandle.missingResidues)):
 154         ChainIndex = Key[0]
 155         # ResidueIndex corresponds to the residue after the insertion of missing residues...`
 156         ResidueIndex = Key[1]
 157         
 158         MissingResidues = PDBFixerObjectHandle.missingResidues[Key]
 159         MissingResiduesCount += len(MissingResidues)
 160         
 161         Chain = Chains[ChainIndex]
 162         ChainResidues = list(Chain.residues())
 163         
 164         if ResidueIndex < len(ChainResidues):
 165             ResidueNumOffset = int(ChainResidues[ResidueIndex].id) - len(MissingResidues) - 1
 166         else:
 167             ResidueNumOffset = int(ChainResidues[-1].id)
 168 
 169         StartResNum = ResidueNumOffset + 1
 170         EndResNum = ResidueNumOffset + len(MissingResidues)
 171         
 172         MissingResiduesInfo.append("Chain: %s; StartResNum-EndResNum: %s-%s; Count: %s; Residues: %s" % (Chain.id, StartResNum, EndResNum, len(MissingResidues), ", ".join(MissingResidues)))
 173 
 174     MiscUtil.PrintInfo("Total number of missing residues: %s" % (MissingResiduesCount))
 175     if OptionsInfo["ListDetails"]:
 176         MiscUtil.PrintInfo("%s" % ("\n".join(MissingResiduesInfo)))
 177 
 178 def IdentifyAndReplaceNonStandardResidues(PDBFixerObjectHandle):
 179     """Identify and replace non-standard residues."""
 180     
 181     MiscUtil.PrintInfo("\nIdentifying non-standard residues...")
 182     
 183     PDBFixerObjectHandle.findNonstandardResidues()
 184     NonStandardResidues = PDBFixerObjectHandle.nonstandardResidues
 185     NumNonStandardResiues = len(NonStandardResidues)
 186     
 187     MiscUtil.PrintInfo("Total number of non-standard residues: %s" % NumNonStandardResiues)
 188     
 189     if  NumNonStandardResiues == 0:
 190         return
 191     
 192     if OptionsInfo["ListDetails"]:
 193         ListNonStandardResidues(PDBFixerObjectHandle)
 194     
 195     if not OptionsInfo["ReplaceNonStandardResidues"]:
 196         MiscUtil.PrintInfo("Skipping replacement of non-standard residues...")
 197         return
 198     
 199     MiscUtil.PrintInfo("Replacing non-standard residues...")
 200     PDBFixerObjectHandle.replaceNonstandardResidues()
 201 
 202 def ListNonStandardResidues(PDBFixerObjectHandle):
 203     """List non-standard residues. """
 204 
 205     NonStandardResidues = PDBFixerObjectHandle.nonstandardResidues
 206     
 207     if len(NonStandardResidues) == 0:
 208         return
 209 
 210     NonStandardResiduesMappingInfo = []
 211     for Values in NonStandardResidues:
 212         NonStandardRes = Values[0]
 213         StandardResName  = Values[1]
 214         
 215         MappingInfo = "<%s %s %s>: %s" % (NonStandardRes.id, NonStandardRes.name, NonStandardRes.chain.id, StandardResName)
 216         NonStandardResiduesMappingInfo.append(MappingInfo)
 217 
 218     if len(NonStandardResiduesMappingInfo):
 219         MiscUtil.PrintInfo("Non-standard residues mapping: %s\n" % ",".join(NonStandardResiduesMappingInfo))
 220 
 221 def IdentifyAndDeleteHeterogenResidues(PDBFixerObjectHandle):
 222     """Identify and delete heterogen residues."""
 223     
 224     DeleteHeterogens = OptionsInfo["DeleteHeterogens"]
 225     if DeleteHeterogens is None:
 226         MiscUtil.PrintInfo("\nSkipping deletion of any heterogen residues...")
 227         return
 228 
 229     if re.match("^All$", DeleteHeterogens, re.I):
 230         MiscUtil.PrintInfo("\nDeleting all heterogen residues including water...")
 231         PDBFixerObjectHandle.removeHeterogens(keepWater = False)
 232     elif re.match("^AllExceptWater$", DeleteHeterogens, re.I):
 233         MiscUtil.PrintInfo("\nDeleting all heterogen residues except water...")
 234         PDBFixerObjectHandle.removeHeterogens(keepWater = True)
 235     elif re.match("^WaterOnly$", DeleteHeterogens, re.I):
 236         MiscUtil.PrintInfo("\nDeleting water only heterogen residues...")
 237         DeleteWater(PDBFixerObjectHandle)
 238     else:
 239         MiscUtil.PrintError("The value specified, %s, for option \"--deleteHeterogens\" is not valid. Supported values: All AllExceptWater WaterOnly None" % DeleteHeterogens)
 240         
 241 def DeleteWater(PDBFixerObjectHandle):
 242     """Delete water. """
 243 
 244     ModellerObjectHandle = mm.app.Modeller(PDBFixerObjectHandle.topology, PDBFixerObjectHandle.positions)
 245     ModellerObjectHandle.deleteWater()
 246     
 247     PDBFixerObjectHandle.topology = ModellerObjectHandle.topology
 248     PDBFixerObjectHandle.positions = ModellerObjectHandle.positions
 249 
 250 def IdentifyAndAddMissingAtoms(PDBFixerObjectHandle):
 251     """Identify and missing atoms along with already identified missing residues."""
 252     
 253     MiscUtil.PrintInfo("\nIdentifying missing atoms...")
 254     PDBFixerObjectHandle.findMissingAtoms()
 255 
 256     ListMissingAtoms(PDBFixerObjectHandle)
 257     
 258     if OptionsInfo["AddHeavyAtoms"] and OptionsInfo["AddResidues"]:
 259         Msg = "Adding missing heavy atoms along with missing residues..."
 260     elif OptionsInfo["AddHeavyAtoms"]:
 261         Msg = "Adding missing heavy atoms..."
 262     elif OptionsInfo["AddResidues"]:
 263         Msg = "Adding missing residues..."
 264     else:
 265         Msg = "Skipping addition of any heavy atoms along with any missing residues..."
 266         
 267     if not OptionsInfo["AddHeavyAtoms"]:
 268         PDBFixerObjectHandle.missingAtoms = {}
 269         PDBFixerObjectHandle.missingTerminals = {}
 270 
 271     MiscUtil.PrintInfo("\n%s" % Msg)
 272     if OptionsInfo["AddHeavyAtoms"] or OptionsInfo["AddResidues"]:
 273         PDBFixerObjectHandle.addMissingAtoms()
 274     
 275     if OptionsInfo["AddHydrogens"]:
 276         MiscUtil.PrintInfo("Adding missing hydrogens at pH %s..." % OptionsInfo["AddHydrogensAtpH"])
 277         PDBFixerObjectHandle.addMissingHydrogens(pH = OptionsInfo["AddHydrogensAtpH"], forcefield = None)
 278     else:
 279         MiscUtil.PrintInfo("Skipping addition of any missing hydrogens...")
 280         
 281 def ListMissingAtoms(PDBFixerObjectHandle):
 282     """List missing atoms."""
 283 
 284     if len(PDBFixerObjectHandle.missingAtoms) == 0 and len(PDBFixerObjectHandle.missingTerminals) == 0:
 285         MiscUtil.PrintInfo("Total number of missing atoms: 0")
 286         return
 287 
 288     ListMissingAtomsInfo(PDBFixerObjectHandle.missingAtoms, TerminalAtoms = False)
 289     ListMissingAtomsInfo(PDBFixerObjectHandle.missingTerminals, TerminalAtoms = True)
 290     
 291 def ListMissingAtomsInfo(MissingAtoms, TerminalAtoms):
 292     """Setup missing atoms information. """
 293     
 294     MissingAtomsInfo = []
 295     MissingAtomsCount = 0
 296     MissingAtomsResiduesCount = 0
 297     for Residue, Atoms in MissingAtoms.items():
 298         MissingAtomsResiduesCount += 1
 299         MissingAtomsCount  += len(Atoms)
 300         
 301         if TerminalAtoms:
 302             AtomsNames = [AtomName for AtomName in Atoms]
 303         else:
 304             AtomsNames = [Atom.name for Atom in Atoms]
 305         AtomsInfo = "<%s %s %s>: %s" % (Residue.id, Residue.name, Residue.chain.id, ", ".join(AtomsNames))
 306         MissingAtomsInfo.append(AtomsInfo)
 307 
 308     AtomsLabel = "terminal atoms" if TerminalAtoms else "atoms"
 309     if MissingAtomsCount == 0:
 310         MiscUtil.PrintInfo("Total number of missing %s: %s" % (MissingAtomsCount, AtomsLabel))
 311     else:
 312         ResiduesLabel = "residue" if MissingAtomsResiduesCount == 1 else "residues"
 313         MiscUtil.PrintInfo("Total number of missing %s across %s %s: %s" % (AtomsLabel, MissingAtomsResiduesCount, ResiduesLabel, MissingAtomsCount))
 314         if OptionsInfo["ListDetails"]:
 315             MiscUtil.PrintInfo("Missing %s across residues: %s" % (AtomsLabel, "; ".join(MissingAtomsInfo)))
 316 
 317 def AddWaterBox(PDBFixerObjectHandle):
 318     """Add water box. """
 319     
 320     MiscUtil.PrintInfo("\nAdding water box...")
 321     
 322     WaterBoxParams = OptionsInfo["WaterBoxParams"]
 323     
 324     Size, Padding, Shape = [None] * 3
 325     if WaterBoxParams["ModeSize"]:
 326         SizeList = WaterBoxParams["SizeList"]
 327         Size = mm.Vec3(SizeList[0], SizeList[1], SizeList[2]) * mm.unit.nanometer
 328     elif WaterBoxParams["ModePadding"]:
 329         Padding = WaterBoxParams["Padding"] * mm.unit.nanometer
 330         Shape = WaterBoxParams["Shape"]
 331     else:
 332         MiscUtil.PrintError("The parameter value, %s, specified for parameter name, mode, using \"--waterBoxParams\" option is not a valid value. Supported values: Size Padding \n" % (WaterBoxParams["Mode"]))
 333         
 334     IonicStrength = OptionsInfo["IonicStrength"] * mm.unit.molar
 335     PDBFixerObjectHandle.addSolvent(boxSize = Size, padding = Padding, boxShape = Shape, positiveIon = OptionsInfo["IonPositive"], negativeIon = OptionsInfo["IonNegative"], ionicStrength = IonicStrength)
 336     
 337 def AddLipidMembrane(PDBFixerObjectHandle):
 338     """Add lipid membrane along with water."""
 339     
 340     MiscUtil.PrintInfo("\nAdding membrane along with water...")
 341     
 342     MembraneParams = OptionsInfo["MembraneParams"]
 343 
 344     LipidType = MembraneParams["LipidType"]
 345     MembraneCenterZ = MembraneParams["MembraneCenterZ"] *  mm.unit.nanometer
 346     Padding = MembraneParams["Padding"] *  mm.unit.nanometer
 347     
 348     IonicStrength = OptionsInfo["IonicStrength"] * mm.unit.molar
 349 
 350     PDBFixerObjectHandle.addMembrane(lipidType = LipidType, membraneCenterZ = MembraneCenterZ, minimumPadding = Padding, positiveIon = OptionsInfo["IonPositive"], negativeIon = OptionsInfo["IonNegative"], ionicStrength = IonicStrength)
 351     
 352 def ProcessMembraneParamsOption():
 353     """Process option for membrane parameters."""
 354 
 355     ParamsOptionName = "--membraneParams"
 356     ParamsOptionValue = Options[ParamsOptionName]
 357     ParamsDefaultInfo = {"LipidType" : ["str", "POPC"], "MembraneCenterZ": ["float", 0.0], "Padding": ["float", 1.0]}
 358     MembraneParams = MiscUtil.ProcessOptionNameValuePairParameters(ParamsOptionName, ParamsOptionValue, ParamsDefaultInfo)
 359 
 360     for ParamName in ["LipidType", "MembraneCenterZ", "Padding"]:
 361         ParamValue = MembraneParams[ParamName]
 362         if ParamName == "LipidType":
 363             LipidTypes = ["POPC", "POPE", "DLPC", "DLPE", "DMPC", "DOPC",  "DPPC"]
 364             if ParamValue not in LipidTypes:
 365                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: %s \n" % (ParamValue, ParamName, ParamsOptionName, LipidTypes))
 366         elif ParamName == "Padding":
 367             if  ParamValue <= 0:
 368                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0\n" % (ParamValue, ParamName, ParamsOptionName))
 369 
 370     OptionsInfo["MembraneParams"] = MembraneParams
 371 
 372 def ProcessWaterBoxParamsOption():
 373     """Process option for membrane parameters."""
 374 
 375     ParamsOptionName = "--waterBoxParams"
 376     ParamsOptionValue = Options[ParamsOptionName]
 377     ParamsDefaultInfo = {"Mode" : ["str", "Padding"], "Size": ["str", None], "Padding": ["float", 1.0], "Shape": ["str", "cube"]}
 378     WaterBoxParams = MiscUtil.ProcessOptionNameValuePairParameters(ParamsOptionName, ParamsOptionValue, ParamsDefaultInfo)
 379 
 380     ParamName = "Mode"
 381     ParamValue = WaterBoxParams[ParamName]
 382     ModeSize, ModePadding = [False] * 2
 383     if re.match("Size$", ParamValue, re.I):
 384         ParamValue = "Size"
 385         ModeSize = True
 386     elif re.match("Padding$", ParamValue, re.I):
 387         ParamValue = "Padding"
 388         ModePadding = True
 389     else:
 390         MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: Size Padding \n" % (ParamValue, ParamName, ParamsOptionName))
 391     WaterBoxParams["Mode"] = ParamValue
 392     WaterBoxParams["ModeSize"] = ModeSize
 393     WaterBoxParams["ModePadding"] = ModePadding
 394 
 395     ParamName = "Size"
 396     ParamValue = WaterBoxParams[ParamName]
 397     ParamValueList = None
 398     if ModeSize:
 399         if ParamValue is None:
 400             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: x y z\n" % (ParamValue, ParamName, ParamsOptionName))
 401         Values = ParamValue.split()
 402         if len(Values) != 3:
 403             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. It must contain 3 float values separated by spaces.\n" % (ParamValue, ParamName, ParamsOptionName))
 404         
 405         ParamValueList = []
 406         for Value in Values:
 407             if not MiscUtil.IsFloat(Value):
 408                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option must be a float.\n" % (Value, ParamName, ParamsOptionName))
 409             Value = float(Value)
 410             if Value <= 0:
 411                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0\n" % (Value, ParamName, ParamsOptionName))
 412             ParamValueList.append(Value)
 413     
 414     WaterBoxParams["SizeList"] = ParamValueList
 415     
 416     ParamName = "Padding"
 417     ParamValue = WaterBoxParams[ParamName]
 418     if  ParamValue <= 0:
 419         MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0\n" % (ParamValue, ParamName, ParamsOptionName))
 420     
 421     ParamName = "Shape"
 422     ParamValue = WaterBoxParams[ParamName].lower()
 423     ShapeValues = ["cube", "dodecahedron", "octahedron"]
 424     if ParamValue not in ShapeValues:
 425         MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: %s \n" % (ParamValue, ParamName, ParamsOptionName, ShapeValues))
 426     WaterBoxParams[ParamName] = ParamValue
 427     
 428     OptionsInfo["WaterBoxParams"] = WaterBoxParams
 429 
 430 def ProcessOptions():
 431     """Process and validate command line arguments and options."""
 432     
 433     MiscUtil.PrintInfo("Processing options...")
 434     
 435     # Validate options...
 436     ValidateOptions()
 437 
 438     OptionsInfo["Infile"] = Options["--infile"]
 439     FileDir, FileName, FileExt = MiscUtil.ParseFileName(OptionsInfo["Infile"])
 440     OptionsInfo["InfileRoot"] = FileName
 441 
 442     OptionsInfo["Outfile"] = Options["--outfile"]
 443 
 444     OptionsInfo["AddHeavyAtoms"] = True if re.match("^yes$", Options["--addHeavyAtoms"], re.I) else False
 445 
 446     OptionsInfo["AddHydrogens"] = True if re.match("^yes$", Options["--addHydrogens"], re.I) else False
 447     OptionsInfo["AddHydrogensAtpH"] = float(Options["--addHydrogensAtpH"])
 448     
 449     OptionsInfo["AddResidues"] = True if re.match("^yes$", Options["--addResidues"], re.I) else False
 450 
 451     DeleteHeterogens = Options["--deleteHeterogens"]
 452     if re.match("^All$", DeleteHeterogens, re.I):
 453         DeleteHeterogens = "All"
 454     elif re.match("^AllExceptWater$", DeleteHeterogens, re.I):
 455         DeleteHeterogens = "AllExceptWater"
 456     elif re.match("^WaterOnly$", DeleteHeterogens, re.I):
 457         DeleteHeterogens = "WaterOnly"
 458     elif re.match("^None$", DeleteHeterogens, re.I):
 459         DeleteHeterogens = None
 460     elif re.match("^auto$", DeleteHeterogens, re.I):
 461         DeleteHeterogens = None
 462         if re.match("^yes$", Options["--membrane"], re.I) or  re.match("^yes$", Options["--waterBox"], re.I):
 463             DeleteHeterogens = "WaterOnly"
 464     else:
 465         MiscUtil.PrintError("The value specified, %s, for option \"--deleteHeterogens\" is not valid. Supported values: All AllExceptWater WaterOnly None" % DeleteHeterogens)
 466     OptionsInfo["DeleteHeterogens"] = DeleteHeterogens
 467 
 468     OptionsInfo["IonPositive"] = Options["--ionPositive"]
 469     OptionsInfo["IonNegative"] = Options["--ionNegative"]
 470     OptionsInfo["IonicStrength"] = float(Options["--ionicStrength"])
 471     
 472     OptionsInfo["ListDetails"] = True if re.match("^yes$", Options["--listDetails"], re.I) else False
 473     
 474     OptionsInfo["Membrane"] = True if re.match("^yes$", Options["--membrane"], re.I) else False
 475     ProcessMembraneParamsOption()
 476     
 477     OptionsInfo["ReplaceNonStandardResidues"] = True if re.match("^yes$", Options["--replaceNonStandardResidues"], re.I) else False
 478     
 479     OptionsInfo["WaterBox"] = True if re.match("^yes$", Options["--waterBox"], re.I) else False
 480     ProcessWaterBoxParamsOption()
 481     
 482     OptionsInfo["Overwrite"] = Options["--overwrite"]
 483     
 484 def RetrieveOptions(): 
 485     """Retrieve command line arguments and options."""
 486     
 487     # Get options...
 488     global Options
 489     Options = docopt(_docoptUsage_)
 490 
 491     # Set current working directory to the specified directory...
 492     WorkingDir = Options["--workingdir"]
 493     if WorkingDir:
 494         os.chdir(WorkingDir)
 495     
 496     # Handle examples option...
 497     if "--examples" in Options and Options["--examples"]:
 498         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 499         sys.exit(0)
 500 
 501 def ValidateOptions():
 502     """Validate option values."""
 503 
 504     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 505     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "pdb pdbx cif")
 506 
 507     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "pdb")
 508     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 509     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 510 
 511     MiscUtil.ValidateOptionTextValue("--addHeavyAtoms", Options["--addHeavyAtoms"], "yes no")
 512     
 513     MiscUtil.ValidateOptionTextValue("--addHydrogens", Options["--addHydrogens"], "yes no")
 514     MiscUtil.ValidateOptionFloatValue("--addHydrogensAtpH", Options["--addHydrogensAtpH"], {">=": 0})
 515     
 516     MiscUtil.ValidateOptionTextValue("--addResidues", Options["--addResidues"], "yes no")
 517 
 518     MiscUtil.ValidateOptionTextValue("--deleteHeterogens", Options["--deleteHeterogens"], "All AllExceptWater WaterOnly None auto")
 519     DeleteHeterogens = Options["--deleteHeterogens"]
 520     if re.match("^(AllExceptWater|None)$", DeleteHeterogens, re.I):
 521         if re.match("^yes$", Options["--membrane"], re.I):
 522             MiscUtil.PrintError("The value specified, %s, for option \"--deleteHeterogens\" is not valid during \"yes\" value of option \"--membrane\". " % DeleteHeterogens)
 523         elif re.match("^yes$", Options["--waterBox"], re.I):
 524             MiscUtil.PrintError("The value specified, %s, for option \"--deleteHeterogens\" is not valid during \"yes\" value of option \"--waterBox\". " % DeleteHeterogens)
 525 
 526     ValidValues = "Li+ Na+ K+ Rb+ Cs+"
 527     EscapedValidValuesPattern = "Li\+|Na\+|K\+|Rb\+|Cs\+"
 528     Value = Options["--ionPositive"]
 529     if not re.match("^(%s)$" % EscapedValidValuesPattern, Value):
 530         MiscUtil.PrintError("The value specified, %s, for option \"-ionPositive\" is not valid: Supported value(s): %s" % (Value, ValidValues))
 531     
 532     ValidValues = "F- Cl- Br- I-"
 533     ValidValuesPattern = "F-|Cl-|Br-|I-"
 534     Value = Options["--ionNegative"]
 535     if not re.match("^(%s)$" % ValidValuesPattern, Value):
 536         MiscUtil.PrintError("The value specified, %s, for option \"-ionNegative\" is not valid: Supported value(s): %s" % (Value, ValidValues))
 537 
 538     MiscUtil.ValidateOptionFloatValue("--ionicStrength", Options["--ionicStrength"], {">=": 0})
 539     
 540     MiscUtil.ValidateOptionTextValue("--listDetails", Options["--listDetails"], "yes no")
 541     MiscUtil.ValidateOptionTextValue("--replaceNonStandardResidues", Options["--replaceNonStandardResidues"], "yes no")
 542     
 543     MiscUtil.ValidateOptionTextValue("--membrane", Options["--membrane"], "yes no")
 544     MiscUtil.ValidateOptionTextValue("--waterBox", Options["--waterBox"], "yes no")
 545     
 546     if re.match("^yes$", Options["--membrane"], re.I) and  re.match("^yes$", Options["--waterBox"], re.I):
 547         MiscUtil.PrintError("You've specified, \"Yes\" for both \"--membrane\" and \"--waterBox\" options. It's not allowed. You must choose between adding a water box or a membrane. The water is automatically added during the construction of the membrane.")
 548 
 549 # Setup a usage string for docopt...
 550 _docoptUsage_ = """
 551 OpenMMPrepareMacromolecule.py - Prepare a macromolecule for simulation.
 552 
 553 Usage:
 554     OpenMMPrepareMacromolecule.py [--addHeavyAtoms <yes or no>] [--addHydrogens <yes or no>] [--addHydrogensAtpH <number>]
 555                                   [--addResidues <yes or no>] [--deleteHeterogens <All, AllExceptWater, WaterOnly, None>] [--ionPositive <text>]
 556                                   [--ionNegative <text>] [--ionicStrength <number>] [--listDetails <yes or no> ] [--membrane <yes or no>]
 557                                   [--membraneParams <Name,Value,..>] [--overwrite] [--replaceNonStandardResidues <yes or no>] [--waterBox <yes or no>]
 558                                   [--waterBoxParams <Name,Value,..>] [-w <dir>] -i <infile> -o <outfile>
 559     OpenMMPrepareMacromolecule.py -h | --help | -e | --examples
 560 
 561 Description:
 562     Prepare a macromolecute in an input file for molecular simulation and
 563     and write it out to an output file. The macromolecule is prepared by
 564     automatically performing the following actions:
 565 
 566         . Identify and replace non-standard residues
 567         . Add missing residues
 568         . Add missing heavy atoms
 569         . Add missing hydrogens
 570 
 571     You may optionally remove heterogens, add a water box, and add a lipid
 572     membrane.
 573 
 574     The supported input file format are:  PDB (.pdb, .pdbx) and CIF (.cif)
 575 
 576     The supported output file formats are:  PDB (.pdb)
 577 
 578 Options:
 579     --addHeavyAtoms <yes or no>  [default: yes]
 580         Add missing non-hydrogen atoms based on the templates. The terminal atoms
 581         are also added.
 582     --addHydrogens <yes or no>  [default: yes]
 583         Add missing hydrogens at pH value specified using '-addHydrogensAtpH'
 584         option. The missing hydrogens are added based on the templates and pKa
 585         calculations are performed.
 586     --addHydrogensAtpH <number>  [default: 7.0]
 587         pH value to use for adding missing hydrogens.
 588     --addResidues <yes or no>  [default: yes]
 589         Add missing residues unidentified based on the PDB records.
 590     --deleteHeterogens <All, AllExceptWater, WaterOnly, None>  [default: auto]
 591         Delete heterogens corresponding to  non-standard names of amino acids, dna,
 592         and rna along with any ligand names. 'N' and 'UNK' also consider standard
 593         residues. Default value: WaterOnly during addition of WaterBox or Membrane;
 594         Otherwise, None.
 595         
 596         The 'AllExceptWater' or 'None' values are not allowed during the addition of
 597         a water box or membrane. The waters must be deleted as they are explicitly
 598         added during the construction of a water box and membrane.
 599     -e, --examples
 600         Print examples.
 601     -h, --help
 602         Print this help message.
 603     -i, --infile <infile>
 604         Input file name.
 605     --ionPositive <text>  [default: Na+]
 606         Type of positive ion to add during the addition of a water box or membrane.
 607         Possible values: Li+, Na+, K+, Rb+, or Cs+.
 608     --ionNegative <text>  [default: Cl-]
 609         Type of negative ion to add during the addition of a water box or membrane.
 610         Possible values: Cl-, Br-, F-, or I-.
 611     --ionicStrength <number>  [default: 0.0]
 612         Total concentration of both positive and negative ions to add excluding
 613         the ions added to neutralize the system during the addition of a water box
 614         or a membrane.
 615     -l, --listDetails <yes or no>  [default: no]
 616         List details about missing and non-standard residues along with residues
 617         containing missing atoms.
 618     --membrane <yes or no>  [default: no]
 619         Add lipid membrane along with a water box. The script replies on OpenMM
 620         method modeller.addMebrane() to perform the task. The membrane is added
 621         in the XY plane. The existing macromolecule must be oriented and positioned
 622         correctly.
 623         
 624         A word to the wise: You may want to start with a model from the Orientations
 625         of Proteins in Membranes (OPM) database at http://opm.phar.umich.edu.
 626         
 627         The size of the membrane and water box are determined by the value of
 628         'padding' parameter specified using '--membraneParams' option. All atoms
 629         in macromolecule are guaranteed to be at least this far from any edge of
 630         the periodic box.
 631     --membraneParams <Name,Value,..>  [default: auto]
 632         A comma delimited list of parameter name and value pairs for adding
 633         a lipid membrane and water.
 634         
 635         The supported parameter names along with their default values are
 636         are shown below:
 637             
 638             lipidType, POPC  [ Possible values: POPC, POPE, DLPC, DLPE,  DMPC, 
 639                 DOPC or DPPC ]
 640             membraneCenterZ, 0.0
 641             padding, 1.0
 642             
 643         A brief description of parameters is provided below:
 644             
 645             lipidType: Type of lipid to use for constructing the membrane.
 646             membraneCenterZ: Position along the Z axis of the center of
 647                 the membrane in nanomertes.
 648             padding: Minimum padding distance to use in nanomertes. It's used
 649                 to determine the size of the membrane and water box. All atoms
 650                 in macromolecule are guaranteed to be at least this far from
 651                 any edge of the periodic box.
 652             
 653     -o, --outfile <infile>
 654         Output file name.
 655     --overwrite
 656         Overwrite existing files.
 657     --replaceNonStandardResidues <yes or no>  [default: yes]
 658         Replace non-standard residue names by standard residue names based on the
 659         list of non-standard residues available in pdbfixer.
 660     --waterBox <yes or no>  [default: no]
 661         Add water box.
 662     --waterBoxParams <Name,Value,..>  [default: auto]
 663         A comma delimited list of parameter name and value pairs for adding
 664         a water box.
 665         
 666         The supported parameter names along with their default values are
 667         are shown below:
 668             
 669             mode, Padding  [ Possible values: Size or Padding ]
 670             size, None  [ Possible value: xsize ysize zsize ]
 671             padding, 1.0
 672             shape, cube  [ Possible values: cube, dodecahedron, or octahedron ]
 673             
 674         A brief description of parameters is provided below:
 675             
 676             mode: Specify the size of the waterbox explicitly or calculate it
 677                 automatically for a macromolecule along with adding padding
 678                 around macromolecule.
 679             size: A space delimited triplet of values corresponding to water
 680                 size in nanometers. It must be specified during 'Size' value of
 681                 'mode' parameter.
 682             padding: Padding around macromolecule in nanometers for filling
 683                 box with water. It must be specified during 'Padding' value of
 684                 'mode' parameter.
 685             
 686     -w, --workingdir <dir>
 687         Location of working directory which defaults to the current directory.
 688 
 689 Examples:
 690     To prepare a macromolecule in a PDB file by replacing non-standard residues,
 691     adding missing residues, adding missing heavy atoms and missing hydrogens,
 692     and generate a PDB file, type:
 693 
 694         % OpenMMPrepareMacromolecule.py -i sample11.pdb -o Sample11Out.pdb
 695 
 696     To run the first example for listing additional details about missing atoms and
 697     residues, and generate a PDB file, type:
 698 
 699         % OpenMMPrepareMacromolecule.py --listDetails -i Sample11.pdb
 700           -o Sample11Out.pdb
 701 
 702     To run the first example for deleting all heterogens including water along
 703     with performing all default actions, and generate a PDB file, type:
 704 
 705         % OpenMMPrepareMacromolecule.py --deleteHeterogens All -i Sample11.pdb
 706           -o Sample11Out.pdb
 707 
 708     To run the first example for deleting water only heterogens along with performing
 709     all default actions, and generate a PDB file, type:
 710 
 711         % OpenMMPrepareMacromolecule.py --deleteHeterogens WaterOnly
 712           -i Sample11.pdb -o Sample11Out.pdb --ov
 713 
 714     To run the first example for adding a water box by automatically calculating its
 715     size, along with performing all default actions, and generate a PDB file, type:
 716 
 717         % OpenMMPrepareMacromolecule.py --waterBox yes -i Sample11.pdb
 718           -o Sample11Out.pdb
 719 
 720     To run the previous example by explcitly specifying various water box parameters,
 721     and generate a PDB file, type:
 722 
 723         % OpenMMPrepareMacromolecule.py --waterBox yes
 724           --waterBoxParams "mode,Padding, size, none, padding, 1.0, shape, cube"
 725           -i Sample11.pdbi -o Sample11Out.pdb
 726 
 727     To run the first example for adding a water box of a specific size along with
 728     performing all default actions, and generate a PDB file, type:
 729 
 730         % OpenMMPrepareMacromolecule.py --waterBox yes
 731           --waterBoxParams "mode,Size, size, 7.635 7.077 7.447"
 732           -i Sample11.pdbi -o Sample11Out.pdb
 733 
 734     To add a lipid membrane around a correctly oriented and positioned macromolecule,
 735     along with performing all default actions,  and generate a PDB file, type:
 736 
 737         % OpenMMPrepareMacromolecule.py --membrane yes
 738           -i Sample11.pdbi -o Sample11Out.pdb
 739 
 740     To run the previous example by explcitly specifying various membrane parameters,
 741     and generate a PDB file, type:
 742 
 743         % OpenMMPrepareMacromolecule.py --membrane yes
 744           --membraneParams "lipidType, POPC, membraneCenterZ, 0.0, padding, 1.0"
 745           -i Sample11.pdb -o Sample11Out.pdb
 746 
 747 Author:
 748     Manish Sud(msud@san.rr.com)
 749 
 750 See also:
 751     InfoPDBFiles.pl, ExtractFromPDBFiles.pl, PyMOLExtractSelection.py,
 752     PyMOLInfoMacromolecules.py, PyMOLSplitChainsAndLigands.py
 753 
 754 Copyright:
 755     Copyright (C) 2025 Manish Sud. All rights reserved.
 756 
 757     The functionality available in this script is implemented using OpenMM, an
 758     open source molecuar simulation package.
 759 
 760     This file is part of MayaChemTools.
 761 
 762     MayaChemTools is free software; you can redistribute it and/or modify it under
 763     the terms of the GNU Lesser General Public License as published by the Free
 764     Software Foundation; either version 3 of the License, or (at your option) any
 765     later version.
 766 
 767 """
 768 
 769 if __name__ == "__main__":
 770     main()