1 #!/bin/env python 2 # 3 # File: Psi4PerformMinimization.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Contributor: Francois Berenger 7 # 8 # Copyright (C) 2024 Manish Sud. All rights reserved. 9 # 10 # The functionality available in this script is implemented using Psi4, an 11 # open source quantum chemistry software package, and RDKit, an open 12 # source toolkit for cheminformatics developed by Greg Landrum. 13 # 14 # This file is part of MayaChemTools. 15 # 16 # MayaChemTools is free software; you can redistribute it and/or modify it under 17 # the terms of the GNU Lesser General Public License as published by the Free 18 # Software Foundation; either version 3 of the License, or (at your option) any 19 # later version. 20 # 21 # MayaChemTools is distributed in the hope that it will be useful, but without 22 # any warranty; without even the implied warranty of merchantability of fitness 23 # for a particular purpose. See the GNU Lesser General Public License for more 24 # details. 25 # 26 # You should have received a copy of the GNU Lesser General Public License 27 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 28 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 29 # Boston, MA, 02111-1307, USA. 30 # 31 32 from __future__ import print_function 33 34 # Add local python path to the global path and import standard library modules... 35 import os 36 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 37 import time 38 import re 39 import shutil 40 import multiprocessing as mp 41 42 # Psi4 imports... 43 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 44 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 45 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 46 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 47 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 48 sys.stderr.write("Psi4 environment and try again.\n\n") 49 50 # RDKit imports... 51 try: 52 from rdkit import rdBase 53 from rdkit import Chem 54 from rdkit.Chem import AllChem 55 except ImportError as ErrMsg: 56 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 57 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 58 sys.exit(1) 59 60 # MayaChemTools imports... 61 try: 62 from docopt import docopt 63 import MiscUtil 64 import Psi4Util 65 import RDKitUtil 66 except ImportError as ErrMsg: 67 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 68 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 69 sys.exit(1) 70 71 ScriptName = os.path.basename(sys.argv[0]) 72 Options = {} 73 OptionsInfo = {} 74 75 def main(): 76 """Start execution of the script.""" 77 78 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 79 80 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 81 82 # Retrieve command line arguments and options... 83 RetrieveOptions() 84 85 # Process and validate command line arguments and options... 86 ProcessOptions() 87 88 # Perform actions required by the script... 89 PerformMinimization() 90 91 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 92 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 93 94 def PerformMinimization(): 95 """Perform minimization.""" 96 97 # Setup a molecule reader... 98 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 99 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 100 101 # Set up a molecule writer... 102 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 103 if Writer is None: 104 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 105 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 106 107 MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(Mols, Writer) 108 109 if Writer is not None: 110 Writer.close() 111 112 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 113 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 114 MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount) 115 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount) 116 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + MinimizationFailedCount + WriteFailedCount)) 117 118 def ProcessMolecules(Mols, Writer): 119 """Process and minimize molecules.""" 120 121 if OptionsInfo["MPMode"]: 122 return ProcessMoleculesUsingMultipleProcesses(Mols, Writer) 123 else: 124 return ProcessMoleculesUsingSingleProcess(Mols, Writer) 125 126 def ProcessMoleculesUsingSingleProcess(Mols, Writer): 127 """Process and minimize molecules using a single process.""" 128 129 # Intialize Psi4... 130 MiscUtil.PrintInfo("\nInitializing Psi4...") 131 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 132 if OptionsInfo["Psi4DDXSolvationMode"]: 133 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, OptionsInfo["Psi4DDXSolvationOptions"]) 134 OptionsInfo["psi4"] = Psi4Handle 135 136 # Setup max iterations global variable... 137 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 138 139 # Setup conversion factor for energy units... 140 SetupEnergyConversionFactor(Psi4Handle) 141 142 if OptionsInfo["SkipConformerGeneration"]: 143 if OptionsInfo["Psi4ConstrainTorsionsMode"]: 144 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers and constrained torsions...") 145 else: 146 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers...") 147 else: 148 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers...") 149 150 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 151 for Mol in Mols: 152 MolCount += 1 153 154 if not CheckAndValidateMolecule(Mol, MolCount): 155 continue 156 157 # Setup 2D coordinates for SMILES input file... 158 if OptionsInfo["SMILESInfileStatus"]: 159 AllChem.Compute2DCoords(Mol) 160 161 ValidMolCount += 1 162 163 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolCount) 164 165 if not CalcStatus: 166 if not OptionsInfo["QuietMode"]: 167 MolName = RDKitUtil.GetMolName(Mol, MolCount) 168 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) 169 170 MinimizationFailedCount += 1 171 continue 172 173 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy) 174 175 if not WriteStatus: 176 WriteFailedCount += 1 177 178 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 179 180 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer): 181 """Process and minimize molecules using multiprocessing.""" 182 183 if OptionsInfo["MPLevelConformersMode"]: 184 return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer) 185 elif OptionsInfo["MPLevelMoleculesMode"]: 186 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer) 187 else: 188 MiscUtil.PrintError("The value, %s, option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"])) 189 190 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols, Writer): 191 """Process and minimize molecules using multiprocessing at molecules level.""" 192 193 if OptionsInfo["SkipConformerGeneration"]: 194 if OptionsInfo["Psi4ConstrainTorsionsMode"]: 195 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers and constrained torsions using multiprocessing at molecules level...") 196 else: 197 MiscUtil.PrintInfo("\nPerforming minimization without generation of conformers using multiprocessing at molecules level...") 198 else: 199 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at molecules level...") 200 201 MPParams = OptionsInfo["MPParams"] 202 203 # Setup data for initializing a worker process... 204 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 205 206 # Setup a encoded mols data iterable for a worker process... 207 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 208 209 # Setup process pool along with data initialization for each process... 210 if not OptionsInfo["QuietMode"]: 211 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 212 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 213 214 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 215 216 # Start processing... 217 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 218 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 219 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 220 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 221 else: 222 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 223 224 # Print out Psi4 version in the main process... 225 MiscUtil.PrintInfo("\nInitializing Psi4...\n") 226 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False) 227 OptionsInfo["psi4"] = Psi4Handle 228 229 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 230 for Result in Results: 231 MolCount += 1 232 MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result 233 234 if EncodedMol is None: 235 continue 236 ValidMolCount += 1 237 238 if not CalcStatus: 239 MinimizationFailedCount += 1 240 continue 241 242 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 243 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy) 244 245 if not WriteStatus: 246 WriteFailedCount += 1 247 248 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 249 250 def InitializeWorkerProcess(*EncodedArgs): 251 """Initialize data for a worker process.""" 252 253 global Options, OptionsInfo 254 255 if not OptionsInfo["QuietMode"]: 256 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 257 258 # Decode Options and OptionInfo... 259 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 260 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 261 262 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 263 # output files for each process... 264 OptionsInfo["Psi4Initialized"] = False 265 266 def WorkerProcess(EncodedMolInfo): 267 """Process data for a worker process.""" 268 269 if not OptionsInfo["Psi4Initialized"]: 270 InitializePsi4ForWorkerProcess() 271 272 MolIndex, EncodedMol = EncodedMolInfo 273 274 MolNum = MolIndex + 1 275 CalcStatus = False 276 ConfID = None 277 Energy = None 278 SolvationEnergy = None 279 280 if EncodedMol is None: 281 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy] 282 283 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 284 285 if not CheckAndValidateMolecule(Mol, MolNum): 286 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy] 287 288 # Setup 2D coordinates for SMILES input file... 289 if OptionsInfo["SMILESInfileStatus"]: 290 AllChem.Compute2DCoords(Mol) 291 292 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = MinimizeMoleculeOrConformers(OptionsInfo["psi4"], Mol, MolNum) 293 294 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy] 295 296 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(Mols, Writer): 297 """Process and minimize molecules using multiprocessing at conformers level.""" 298 299 MiscUtil.PrintInfo("\nPerforming minimization with generation of conformers using multiprocessing at conformers level...") 300 301 (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) = [0] * 4 302 for Mol in Mols: 303 MolCount += 1 304 305 if not CheckAndValidateMolecule(Mol, MolCount): 306 continue 307 308 # Setup 2D coordinates for SMILES input file... 309 if OptionsInfo["SMILESInfileStatus"]: 310 AllChem.Compute2DCoords(Mol) 311 312 ValidMolCount += 1 313 314 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = ProcessConformersUsingMultipleProcesses(Mol, MolCount) 315 316 if not CalcStatus: 317 if not OptionsInfo["QuietMode"]: 318 MolName = RDKitUtil.GetMolName(Mol, MolCount) 319 MiscUtil.PrintWarning("Failed to calculate energy for molecule %s" % MolName) 320 321 MinimizationFailedCount += 1 322 continue 323 324 WriteStatus = WriteMolecule(Writer, Mol, MolCount, ConfID, Energy, SolvationEnergy) 325 326 if not WriteStatus: 327 WriteFailedCount += 1 328 329 return (MolCount, ValidMolCount, MinimizationFailedCount, WriteFailedCount) 330 331 def ProcessConformersUsingMultipleProcesses(Mol, MolNum): 332 """Generate conformers and minimize them using multiple processes. """ 333 334 # Add hydrogens... 335 Mol = Chem.AddHs(Mol) 336 337 # Setup conformers... 338 ConfIDs = EmbedMolecule(Mol, MolNum) 339 if not len(ConfIDs): 340 if not OptionsInfo["QuietMode"]: 341 MolName = RDKitUtil.GetMolName(Mol, MolNum) 342 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) 343 return (Mol, False, None, None, None) 344 345 MPParams = OptionsInfo["MPParams"] 346 347 # Setup data for initializing a worker process... 348 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 349 350 # Setup a encoded mols data iterable for a worker process... 351 MolIndex = MolNum - 1 352 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs) 353 354 # Setup process pool along with data initialization for each process... 355 if not OptionsInfo["QuietMode"]: 356 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 357 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 358 359 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs) 360 361 # Start processing... 362 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 363 Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 364 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 365 Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 366 else: 367 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 368 369 CalcEnergyMap = {} 370 CalcSolvationEnergyMap = {} 371 CalcFailedCount = 0 372 for Result in Results: 373 MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy = Result 374 375 if EncodedMol is None: 376 CalcFailedCount += 1 377 continue 378 379 if not CalcStatus: 380 CalcFailedCount += 1 381 continue 382 383 # Retrieve minimized atom positions... 384 MinimizedMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 385 AtomPositions = RDKitUtil.GetAtomPositions(MinimizedMol, ConfID = ConfID) 386 387 # Update atom positions... 388 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 389 390 CalcEnergyMap[ConfID] = Energy 391 CalcSolvationEnergyMap[ConfID] = SolvationEnergy 392 393 if CalcFailedCount: 394 return (Mol, False, None, None, None) 395 396 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 397 MinEnergyConfID = SortedConfIDs[0] 398 MinEnergy = CalcEnergyMap[MinEnergyConfID] 399 SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID] 400 401 return (Mol, True, MinEnergyConfID, MinEnergy, SolvationEnergy) 402 403 def InitializeConformerWorkerProcess(*EncodedArgs): 404 """Initialize data for a conformer worker process.""" 405 406 global Options, OptionsInfo 407 408 if not OptionsInfo["QuietMode"]: 409 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 410 411 # Decode Options and OptionInfo... 412 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 413 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 414 415 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 416 # output files for each process... 417 OptionsInfo["Psi4Initialized"] = False 418 419 def ConformerWorkerProcess(EncodedMolInfo): 420 """Process data for a conformer worker process.""" 421 422 if not OptionsInfo["Psi4Initialized"]: 423 InitializePsi4ForWorkerProcess() 424 425 MolIndex, EncodedMol, ConfID = EncodedMolInfo 426 427 MolNum = MolIndex + 1 428 429 CalcStatus = False 430 Energy = None 431 SolvationEnergy = None 432 433 if EncodedMol is None: 434 return [MolIndex, None, CalcStatus, ConfID, Energy, SolvationEnergy] 435 436 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 437 MolName = RDKitUtil.GetMolName(Mol, MolNum) 438 439 if not OptionsInfo["QuietMode"]: 440 MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID)) 441 442 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID) 443 if not Status: 444 return [MolIndex, EncodedMol, CalcStatus, ConfID, Energy, SolvationEnergy] 445 446 if ConvergeStatus != 0: 447 if not OptionsInfo["QuietMode"]: 448 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 449 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])) 450 451 # Perform Psi4 minimization... 452 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], Mol, MolNum, ConfID) 453 if not CalcStatus: 454 if not OptionsInfo["QuietMode"]: 455 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 456 return [MolIndex, EncodedMol, False, ConfID, Energy, SolvationEnergy] 457 458 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, ConfID, Energy, SolvationEnergy] 459 460 def InitializePsi4ForWorkerProcess(): 461 """Initialize Psi4 for a worker process.""" 462 463 if OptionsInfo["Psi4Initialized"]: 464 return 465 466 OptionsInfo["Psi4Initialized"] = True 467 468 if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I): 469 # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile... 470 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet" 471 else: 472 # Update output file... 473 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 474 475 # Intialize Psi4... 476 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 477 478 if OptionsInfo["Psi4DDXSolvationMode"]: 479 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], OptionsInfo["Psi4DDXSolvationOptions"]) 480 481 # Setup max iterations global variable... 482 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 483 484 # Setup conversion factor for energy units... 485 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 486 487 def MinimizeMoleculeOrConformers(Psi4Handle, Mol, MolNum): 488 """Minimize molecule or conformers.""" 489 490 ConfID = None 491 if OptionsInfo["SkipConformerGeneration"]: 492 Mol, CalcStatus, Energy, SolvationEnergy = MinimizeMolecule(Psi4Handle, Mol, MolNum) 493 else: 494 Mol, CalcStatus, ConfID, Energy, SolvationEnergy = GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum) 495 496 return (Mol, CalcStatus, ConfID, Energy, SolvationEnergy) 497 498 def GenerateAndMinimizeConformers(Psi4Handle, Mol, MolNum = None): 499 """Generate and minimize conformers for a molecule to get the lowest energy conformer.""" 500 501 # Add hydrogens.. 502 Mol = Chem.AddHs(Mol) 503 504 MolName = RDKitUtil.GetMolName(Mol, MolNum) 505 506 # Setup conformers... 507 ConfIDs = EmbedMolecule(Mol, MolNum) 508 if not len(ConfIDs): 509 if not OptionsInfo["QuietMode"]: 510 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) 511 return (Mol, False, None, None, None) 512 513 # Minimize conformers... 514 CalcEnergyMap = {} 515 CalcSolvationEnergyMap = {} 516 for ConfID in ConfIDs: 517 if not OptionsInfo["QuietMode"]: 518 MiscUtil.PrintInfo("Processing molecule %s conformer ID %s..." % (MolName, ConfID)) 519 520 # Perform forcefield minimization... 521 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID) 522 if not Status: 523 return (Mol, False, None, None, None) 524 525 if ConvergeStatus != 0: 526 if not OptionsInfo["QuietMode"]: 527 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])) 528 529 # Perform Psi4 minimization... 530 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID) 531 if not CalcStatus: 532 if not OptionsInfo["QuietMode"]: 533 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 534 return (Mol, False, None, None, None) 535 536 CalcEnergyMap[ConfID] = Energy 537 CalcSolvationEnergyMap[ConfID] = SolvationEnergy 538 539 # Select the lowest energy conformer.. 540 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 541 MinEnergyConfID = SortedConfIDs[0] 542 Energy = CalcEnergyMap[MinEnergyConfID] 543 SolvationEnergy = CalcSolvationEnergyMap[MinEnergyConfID] 544 545 return (Mol, True, MinEnergyConfID, Energy, SolvationEnergy) 546 547 def MinimizeMolecule(Psi4Handle, Mol, MolNum): 548 """Minimize molecule.""" 549 550 # Skip conformation generation and forcefield minimization... 551 CalcStatus, Energy, SolvationEnergy = MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum) 552 553 return (Mol, CalcStatus, Energy, SolvationEnergy) 554 555 def MinimizeMoleculeUsingPsi4(Psi4Handle, Mol, MolNum, ConfID = -1): 556 """Minimize molecule using Psi4.""" 557 558 # Setup a Psi4Mol... 559 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID) 560 if Psi4Mol is None: 561 return (False, None, None) 562 563 # Setup reference wave function... 564 Reference = SetupReferenceWavefunction(Mol) 565 Psi4Handle.set_options({'Reference': Reference}) 566 567 # Setup method name and basis set... 568 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 569 570 # Setup constrained torsions... 571 if OptionsInfo["Psi4ConstrainTorsionsMode"]: 572 SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol) 573 574 # Optimize geometry... 575 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 576 577 if not Status: 578 PerformPsi4Cleanup(Psi4Handle) 579 return (False, None, None) 580 581 SolvationEnergy = None 582 if OptionsInfo["Psi4DDXSolvationMode"]: 583 SolvationEnergy = RetrievePsi4DDXSolvationEnergy(WaveFunction) 584 585 # Update atom positions... 586 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True) 587 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 588 589 # Convert energy units... 590 if OptionsInfo["ApplyEnergyConversionFactor"]: 591 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 592 if SolvationEnergy is not None: 593 SolvationEnergy = SolvationEnergy * OptionsInfo["EnergyConversionFactor"] 594 595 # Clean up 596 PerformPsi4Cleanup(Psi4Handle) 597 598 return (True, Energy, SolvationEnergy) 599 600 def RetrievePsi4DDXSolvationEnergy(WaveFunction): 601 """Retrieve DDX solvation energy.""" 602 603 SolvationEnergy = None 604 try: 605 SolvationEnergy = WaveFunction.variable('DD Solvation Energy') 606 except Exception as ErrMsg: 607 SolvationEnergy = None 608 if not OptionsInfo["QuietMode"]: 609 MiscUtil.PrintWarning("Failed to retrieve Psi4 DD Solvation Energy using wave function: %s" % ErrMsg) 610 611 return SolvationEnergy 612 613 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID = -1): 614 """Minimize molecule using forcefield available in RDKit.""" 615 616 try: 617 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 618 ConvergeStatus = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"]) 619 elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]: 620 ConvergeStatus = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"], mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) 621 else: 622 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"]) 623 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 624 if not OptionsInfo["QuietMode"]: 625 MolName = RDKitUtil.GetMolName(Mol, MolNum) 626 MiscUtil.PrintWarning("Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 627 return (False, None) 628 629 return (True, ConvergeStatus) 630 631 def EmbedMolecule(Mol, MolNum = None): 632 """Embed conformations.""" 633 634 ConfIDs = [] 635 636 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"] 637 RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"] 638 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 639 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 640 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 641 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 642 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 643 644 try: 645 ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, pruneRmsThresh = EmbedRMSDCutoff, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 646 except ValueError as ErrMsg: 647 if not OptionsInfo["QuietMode"]: 648 MolName = RDKitUtil.GetMolName(Mol, MolNum) 649 MiscUtil.PrintWarning("Embedding failed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 650 ConfIDs = [] 651 652 if not OptionsInfo["QuietMode"]: 653 if EmbedRMSDCutoff > 0: 654 MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, MaxConfs, len(ConfIDs))) 655 else: 656 MiscUtil.PrintInfo("Generating initial conformation ensemble by distance geometry for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(ConfIDs))) 657 658 return ConfIDs 659 660 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1): 661 """Setup a Psi4 molecule object.""" 662 663 if OptionsInfo["RecenterAndReorient"]: 664 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = False, NoReorient = False) 665 else: 666 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True) 667 668 try: 669 Psi4Mol = Psi4Handle.geometry(MolGeometry) 670 except Exception as ErrMsg: 671 Psi4Mol = None 672 if not OptionsInfo["QuietMode"]: 673 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 674 MolName = RDKitUtil.GetMolName(Mol, MolNum) 675 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 676 677 if OptionsInfo["Symmetrize"]: 678 Psi4Mol.symmetrize(OptionsInfo["SymmetrizeTolerance"]) 679 680 return Psi4Mol 681 682 def PerformPsi4Cleanup(Psi4Handle): 683 """Perform clean up.""" 684 685 # Clean up after Psi4 run... 686 Psi4Handle.core.clean() 687 688 # Clean up any leftover scratch files... 689 if OptionsInfo["MPMode"]: 690 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 691 692 def CheckAndValidateMolecule(Mol, MolCount = None): 693 """Validate molecule for Psi4 calculations.""" 694 695 if Mol is None: 696 if not OptionsInfo["QuietMode"]: 697 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 698 return False 699 700 MolName = RDKitUtil.GetMolName(Mol, MolCount) 701 if not OptionsInfo["QuietMode"]: 702 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 703 704 if RDKitUtil.IsMolEmpty(Mol): 705 if not OptionsInfo["QuietMode"]: 706 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 707 return False 708 709 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 710 if not OptionsInfo["QuietMode"]: 711 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 712 return False 713 714 # Check for 3D flag... 715 if OptionsInfo["SkipConformerGeneration"]: 716 if not Mol.GetConformer().Is3D(): 717 if not OptionsInfo["QuietMode"]: 718 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName) 719 720 # Check for missing hydrogens... 721 if OptionsInfo["SkipConformerGeneration"]: 722 # Otherwise, Hydrogens are always added... 723 if RDKitUtil.AreHydrogensMissingInMolecule(Mol): 724 if not OptionsInfo["QuietMode"]: 725 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName) 726 727 return True 728 729 def SetupMethodNameAndBasisSet(Mol): 730 """Setup method name and basis set.""" 731 732 MethodName = OptionsInfo["MethodName"] 733 if OptionsInfo["MethodNameAuto"]: 734 MethodName = "B3LYP" 735 736 BasisSet = OptionsInfo["BasisSet"] 737 if OptionsInfo["BasisSetAuto"]: 738 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 739 740 return (MethodName, BasisSet) 741 742 def SetupReferenceWavefunction(Mol): 743 """Setup reference wavefunction.""" 744 745 Reference = OptionsInfo["Reference"] 746 if OptionsInfo["ReferenceAuto"]: 747 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 748 749 return Reference 750 751 def SetupConstrainedTorsionsAroundRotatableBonds(Psi4Handle, Mol): 752 """Setup constrained torsions around rotatable bonds. """ 753 754 if not OptionsInfo["Psi4ConstrainTorsionsMode"]: 755 return 756 757 # Get torsions around rotatable bonds... 758 TorsionAtomsIndicesList = RDKitUtil.GetTorsionsAroundRotatableBonds(Mol, GetConstrainTorsionsRotBondsPatternMol(), OptionsInfo["Psi4ConstrainTorsionsParams"]["IgnoreHydrogens"]) 759 760 # Freeze dihedrals corresponding to torsions around rotatable bonds... 761 FreezeDihedralsString = SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList) 762 if FreezeDihedralsString is None: 763 # Clear any frozen dihedral values 764 Psi4Handle.set_options({"OPTKING__frozen_dihedral": ""}) 765 else: 766 # Setup frozen dihedral values... 767 Psi4Handle.set_options({"OPTKING__frozen_dihedral": FreezeDihedralsString}) 768 769 def SetupFreezeDihedralsStringForPsi4(TorsionAtomsIndicesList): 770 """Setup freeze dihedrals string for Psi4.""" 771 772 FreezeDihedralsString = None 773 if TorsionAtomsIndicesList is None or len(TorsionAtomsIndicesList) == 0: 774 return FreezeDihedralsString 775 776 TorsionsList = [] 777 for TorsionAtomIndices in TorsionAtomsIndicesList: 778 AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4 = TorsionAtomIndices 779 780 # Atom indices start from 1 for Psi4 instead 0 for RDKit... 781 TorsionString = "%s %s %s %s" % (AtomIndex1 +1, AtomIndex2 + 1, AtomIndex3 + 1, AtomIndex4 + 1) 782 TorsionsList.append(TorsionString) 783 784 if len(TorsionsList): 785 FreezeDihedralsString = "%s" % "\n".join(TorsionsList) 786 787 return FreezeDihedralsString 788 789 def GetConstrainTorsionsRotBondsPatternMol(): 790 """Get constrain torsions rotatable bonds pattern molecule.""" 791 792 if "RotBondsSMARTSPatternMol" not in OptionsInfo["Psi4ConstrainTorsionsParams"]: 793 OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"] = Chem.MolFromSmarts(OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"]) 794 795 return OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPatternMol"] 796 797 def SetupEnergyConversionFactor(Psi4Handle): 798 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 799 800 EnergyUnits = OptionsInfo["EnergyUnits"] 801 802 ApplyConversionFactor = True 803 if re.match("^kcal\/mol$", EnergyUnits, re.I): 804 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 805 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 806 ConversionFactor = Psi4Handle.constants.hartree2kJmol 807 elif re.match("^eV$", EnergyUnits, re.I): 808 ConversionFactor = Psi4Handle.constants.hartree2ev 809 else: 810 ApplyConversionFactor = False 811 ConversionFactor = 1.0 812 813 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 814 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 815 816 def WriteMolecule(Writer, Mol, MolNum = None, ConfID = None, Energy = None, SolvationEnergy = None): 817 """Write molecule.""" 818 819 if OptionsInfo["EnergyOut"] and Energy is not None: 820 Energy = "%.*f" % (OptionsInfo["Precision"], Energy) 821 Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy) 822 823 if OptionsInfo["Psi4DDXSolvationMode"] and SolvationEnergy is not None: 824 SolvationEnergy = "%.*f" % (OptionsInfo["Precision"], SolvationEnergy) 825 Mol.SetProp(OptionsInfo["SolvationEnergyDataFieldLabel"], SolvationEnergy) 826 827 try: 828 if ConfID is None: 829 Writer.write(Mol) 830 else: 831 Writer.write(Mol, confId = ConfID) 832 except (ValueError, RuntimeError) as ErrMsg: 833 if not OptionsInfo["QuietMode"]: 834 MolName = RDKitUtil.GetMolName(Mol, MolNum) 835 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg)) 836 return False 837 838 return True 839 840 def CheckAndSetupDDX(): 841 """Load pyddx and return its handle.""" 842 843 try: 844 import pyddx 845 except ImportError as ErrMsg: 846 sys.stderr.write("\nFailed to import module/package pyddx for solvation: %s\n" % ErrMsg) 847 sys.stderr.write("Psi4 relies on pyddx module for DDX solvation calculation.\n") 848 sys.stderr.write("You may need to install it from pypi or conda-forge.\n") 849 sys.stderr.write("Check/update your environment and try again.\n\n") 850 sys.exit(1) 851 852 return pyddx 853 854 def ProcessDDXListSolventsOption(): 855 """List solvent information provided by tge DDX module for the calculation 856 of solvation energy.""" 857 858 PyDDXHandle = CheckAndSetupDDX() 859 860 MiscUtil.PrintInfo("Solvent name and dielectric constant information avaibale in the DDX module:\n") 861 MiscUtil.PrintInfo("Solvent\tEpsilon") 862 Count = 0 863 for Solvent in sorted(PyDDXHandle.data.solvent_epsilon.keys()): 864 Count += 1 865 Epsilon = PyDDXHandle.data.solvent_epsilon[Solvent] 866 MiscUtil.PrintInfo("%s\t%s" % (Solvent, Epsilon)) 867 868 MiscUtil.PrintInfo("\nTotal number of solvents: %s\n" % Count) 869 870 def ProcessOptions(): 871 """Process and validate command line arguments and options.""" 872 873 MiscUtil.PrintInfo("Processing options...") 874 875 # Validate options... 876 ValidateOptions() 877 878 OptionsInfo["Infile"] = Options["--infile"] 879 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False 880 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 881 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 882 883 OptionsInfo["Outfile"] = Options["--outfile"] 884 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 885 886 OptionsInfo["Overwrite"] = Options["--overwrite"] 887 888 # Method, basis set, and reference wavefunction... 889 OptionsInfo["BasisSet"] = Options["--basisSet"] 890 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 891 892 OptionsInfo["MethodName"] = Options["--methodName"] 893 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 894 895 OptionsInfo["Reference"] = Options["--reference"] 896 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 897 898 # Run and options parameters... 899 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 900 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 901 902 # Psi4 constrain torsions paramaters... 903 OptionsInfo["Psi4ConstrainTorsionsMode"] = True if re.match("^yes$", Options["--psi4ConstrainTorsions"], re.I) else False 904 ParamsDefaultInfoOverride = {"RotBondsSMARTSMode": "NonStrict"} 905 Psi4ConstrainTorsionsParams = Psi4Util.ProcessPsi4ConstrainTorrionsParameters("--psi4ConstrainTorsionsParams", Options["--psi4ConstrainTorsionsParams"], ParamsDefaultInfoOverride) 906 OptionsInfo["Psi4ConstrainTorsionsParams"] = Psi4ConstrainTorsionsParams 907 908 if OptionsInfo["Psi4ConstrainTorsionsMode"]: 909 RotBondsSMARTSPattern = OptionsInfo["Psi4ConstrainTorsionsParams"]["RotBondsSMARTSPattern"] 910 RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPattern) 911 if RotBondsPatternMol is None: 912 MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified for parameter name \"--rotBondsSMARTSPattern\" using \"--psi4ConstrainTorsions\"option is not valid." % (RotBondsSMARTSPattern)) 913 914 # DDX solvation parameters... 915 OptionsInfo["Psi4DDXSolvationMode"] = True if re.match("^yes$", Options["--psi4DDXSolvation"], re.I) else False 916 Psi4DDXSolvationParams = Psi4Util.ProcessPsi4DDXSolvationParameters("--psi4DDXSolvationParams", Options["--psi4DDXSolvationParams"]) 917 Psi4DDXSolvationOptions = Psi4Util.SetupPsi4DDXSolvationOptions(OptionsInfo["Psi4DDXSolvationMode"], Psi4DDXSolvationParams) 918 OptionsInfo["Psi4DDXSolvationParams"] = Psi4DDXSolvationParams 919 OptionsInfo["Psi4DDXSolvationOptions"] = Psi4DDXSolvationOptions 920 921 if OptionsInfo["Psi4DDXSolvationMode"]: 922 CheckAndSetupDDX() 923 924 # Conformer generation paramaters... 925 OptionsInfo["ConfGeneration"] = Options["--confGeneration"] 926 OptionsInfo["SkipConformerGeneration"] = False if re.match("^yes$", Options["--confGeneration"], re.I) else True 927 928 if OptionsInfo["SkipConformerGeneration"]: 929 if OptionsInfo["SMILESInfileStatus"]: 930 MiscUtil.PrintError("The value, %s, specified for option \"-c --confGeneration\" is not allowed during, %s, value for option \"-i, --infile\" . The input file must be a 3D SD or MOL file. " % (Options["--confGeneration"], Options["--infile"])) 931 932 if not OptionsInfo["SkipConformerGeneration"]: 933 if OptionsInfo["Psi4ConstrainTorsionsMode"]: 934 MiscUtil.PrintError("The value, %s, specified for option \"-c --confGeneration\" is not allowed during, %s, value for option \"--psi4ConstrainTorsions\"." % (Options["--confGeneration"], Options["--psi4ConstrainTorsions"])) 935 936 ParamsDefaultInfoOverride = {"MaxConfs": 50, "MaxIters": 250} 937 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride) 938 939 # Write energy parameters... 940 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False 941 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 942 943 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 944 if re.match("^auto$", EnergyDataFieldLabel, re.I): 945 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 946 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 947 948 SolvationEnergyDataFieldLabel = "Psi4_DDX_Solvation_Energy (%s)" % Options["--energyUnits"] 949 OptionsInfo["SolvationEnergyDataFieldLabel"] = SolvationEnergyDataFieldLabel 950 951 OptionsInfo["MaxIters"] = int(Options["--maxIters"]) 952 953 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 954 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 955 956 # Multiprocessing level... 957 MPLevelMoleculesMode = False 958 MPLevelConformersMode = False 959 MPLevel = Options["--mpLevel"] 960 if re.match("^Molecules$", MPLevel, re.I): 961 MPLevelMoleculesMode = True 962 elif re.match("^Conformers$", MPLevel, re.I): 963 if OptionsInfo["SkipConformerGeneration"]: 964 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not allowed during, %s, value of option \"--confGeneration\" . " % (MPLevel, Options["--confGeneration"])) 965 MPLevelConformersMode = True 966 else: 967 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel) 968 OptionsInfo["MPLevel"] = MPLevel 969 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode 970 OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode 971 972 OptionsInfo["Precision"] = int(Options["--precision"]) 973 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 974 975 RecenterAndReorient = Options["--recenterAndReorient"] 976 if re.match("^auto$", RecenterAndReorient, re.I): 977 RecenterAndReorient = False if OptionsInfo["SkipConformerGeneration"] else True 978 else: 979 RecenterAndReorient = True if re.match("^yes$", RecenterAndReorient, re.I) else False 980 OptionsInfo["RecenterAndReorient"] = RecenterAndReorient 981 982 Symmetrize = Options["--symmetrize"] 983 if re.match("^auto$", Symmetrize, re.I): 984 Symmetrize = True if RecenterAndReorient else False 985 else: 986 Symmetrize = True if re.match("^yes$", Symmetrize, re.I) else False 987 OptionsInfo["Symmetrize"] = Symmetrize 988 989 OptionsInfo["SymmetrizeTolerance"] = float(Options["--symmetrizeTolerance"]) 990 991 def RetrieveOptions(): 992 """Retrieve command line arguments and options.""" 993 994 # Get options... 995 global Options 996 Options = docopt(_docoptUsage_) 997 998 # Set current working directory to the specified directory... 999 WorkingDir = Options["--workingdir"] 1000 if WorkingDir: 1001 os.chdir(WorkingDir) 1002 1003 # Handle examples option... 1004 if "--examples" in Options and Options["--examples"]: 1005 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1006 sys.exit(0) 1007 1008 # Handle listing of solvent information... 1009 if Options and Options["--psi4DDXListSolvents"]: 1010 ProcessDDXListSolventsOption() 1011 sys.exit(0) 1012 1013 def ValidateOptions(): 1014 """Validate option values.""" 1015 1016 MiscUtil.ValidateOptionTextValue("-c, --confGeneration", Options["--confGeneration"], "yes no") 1017 1018 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no") 1019 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 1020 1021 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1022 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 1023 1024 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0}) 1025 1026 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1027 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1028 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1029 1030 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1031 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers") 1032 1033 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 1034 1035 MiscUtil.ValidateOptionTextValue("--psi4ConstrainTorsions", Options["--psi4ConstrainTorsions"], "yes no") 1036 1037 MiscUtil.ValidateOptionTextValue("--psi4DDXSolvation", Options["--psi4DDXSolvation"], "yes no") 1038 1039 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 1040 1041 MiscUtil.ValidateOptionTextValue("--recenterAndReorient", Options["--recenterAndReorient"], "yes no auto") 1042 MiscUtil.ValidateOptionTextValue("--symmetrize", Options["--symmetrize"], "yes no auto") 1043 MiscUtil.ValidateOptionFloatValue("--symmetrizeTolerance", Options["--symmetrizeTolerance"], {">": 0}) 1044 1045 # Setup a usage string for docopt... 1046 _docoptUsage_ = """ 1047 Psi4PerformMinimization.py - Perform structure minimization 1048 1049 Usage: 1050 Psi4PerformMinimization.py [--basisSet <text>] [--confGeneration <yes or no> ] [--confParams <Name,Value,...>] 1051 [--energyOut <yes or no>] [--energyDataFieldLabel <text>] [--energyUnits <text>] 1052 [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>] 1053 [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>] 1054 [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ] 1055 [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 1056 [--psi4ConstrainTorsions <yes or no>] [--psi4ConstrainTorsionsParams <Name,Value,...>] 1057 [--psi4DDXSolvation <yes or no>] [--psi4DDXSolvationParams <Name,Value,...>] 1058 [--quiet <yes or no>] [--reference <text>] [--recenterAndReorient <yes or no>] 1059 [--symmetrize <yes or no>] [--symmetrizeTolerance <number>] [-w <dir>] -i <infile> -o <outfile> 1060 Psi4PerformMinimization.py --psi4DDXListSolvents 1061 Psi4PerformMinimization.py -h | --help | -e | --examples 1062 1063 Description: 1064 Generate 3D structures for molecules using a combination of distance geometry 1065 and forcefield minimization followed by geometry optimization using a quantum 1066 chemistry method. A set of initial 3D structures are generated for a molecule 1067 employing distance geometry. The 3D structures in the conformation ensemble 1068 are sequentially minimized using forcefield and a quantum chemistry method. 1069 The conformation with lowest energy is selected to represent the final structure. 1070 An option is available to skip the generation of the conformation ensemble along 1071 with forcefield minimization and simply perform minimization on the initial 3D 1072 structure using a quantum chemistry method. 1073 1074 A Psi4 XYZ format geometry string is automatically generated for each molecule 1075 in input file. It contains atom symbols and 3D coordinates for each atom in a 1076 molecule. In addition, the formal charge and spin multiplicity are present in the 1077 the geometry string. These values are either retrieved from molecule properties 1078 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 1079 molecule. 1080 1081 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 1082 .csv, .tsv, .txt) 1083 1084 The supported output file formats are: SD (.sdf, .sd) 1085 1086 Options: 1087 -b, --basisSet <text> [default: auto] 1088 Basis set to use for energy minimization. Default: 6-31+G** for sulfur 1089 containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 1090 value must be a valid Psi4 basis set. No validation is performed. 1091 1092 The following list shows a representative sample of basis sets available 1093 in Psi4: 1094 1095 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 1096 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 1097 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 1098 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 1099 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 1100 1101 -c, --confGeneration <yes or no> [default: yes] 1102 Generate an initial 3D conformation ensemble using distance geometry and 1103 forcefield minimization before final geometry optimization by a specified 1104 method name and basis set. Possible values: yes or no. 1105 1106 The 'No' value skips the generation of conformations employing distance 1107 geometry and forcefield minimization. The 3D structures in input file are 1108 minimized by a quantum method. It is not allowed for SMILES input file. 1109 --confParams <Name,Value,...> [default: auto] 1110 A comma delimited list of parameter name and value pairs for generating 1111 initial sets of 3D conformations for molecules. The 3D conformation ensemble 1112 is generated using distance geometry and forcefield functionality available 1113 in RDKit. The 3D structures in the conformation ensemble are subsequently 1114 minimized by a quantum chemistry method available in Psi4. 1115 1116 The supported parameter names along with their default values are shown 1117 below: 1118 1119 confMethod,ETKDGv2, 1120 forceField,MMFF, forceFieldMMFFVariant,MMFF94, 1121 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50, 1122 maxIters,250,randomSeed,auto 1123 1124 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG, 1125 ETKDG , or ETKDGv2] 1126 forceField, MMFF [ Possible values: UFF or MMFF ] 1127 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ] 1128 enforceChirality,yes [ Possible values: yes or no ] 1129 embedRMSDCutoff,0.5 [ Possible values: number or None] 1130 1131 confMethod: Conformation generation methodology for generating initial 3D 1132 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental 1133 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms 1134 with Distance Geometry (KDG) and Experimental Torsion-angle preference 1135 along with basic Knowledge-terms and Distance Geometry (ETKDG or 1136 ETKDGv2) [Ref 129, 167] . 1137 1138 forceField: Forcefield method to use for energy minimization. Possible 1139 values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics 1140 Force Field [ Ref 83-87 ] . 1141 1142 enforceChirality: Enforce chirality for defined chiral centers during 1143 forcefield minimization. 1144 1145 maxConfs: Maximum number of conformations to generate for each molecule 1146 during the generation of an initial 3D conformation ensemble using 1147 conformation generation methodology. The conformations are minimized 1148 using the specified forcefield and a quantum chemistry method. The lowest 1149 energy conformation is written to the output file. 1150 1151 embedRMSDCutoff: RMSD cutoff for retaining initial set conformers embedded 1152 using distance geometry and before forcefield minimization. All embedded 1153 conformers are kept for 'None' value. Otherwise, only those conformers which 1154 are different from each other by the specified RMSD cutoff, 0.5 by default, 1155 are kept. The first embedded conformer is always retained. 1156 1157 maxIters: Maximum number of iterations to perform for each conformation 1158 during forcefield minimization. 1159 1160 randomSeed: Seed for the random number generator for reproducing initial 1161 3D coordinates in a conformation ensemble. Default is to use a random seed. 1162 --energyOut <yes or no> [default: yes] 1163 Write out energy values. 1164 --energyDataFieldLabel <text> [default: auto] 1165 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 1166 --energyUnits <text> [default: kcal/mol] 1167 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 1168 -e, --examples 1169 Print examples. 1170 -h, --help 1171 Print this help message. 1172 -i, --infile <infile> 1173 Input file name. 1174 --infileParams <Name,Value,...> [default: auto] 1175 A comma delimited list of parameter name and value pairs for reading 1176 molecules from files. The supported parameter names for different file 1177 formats, along with their default values, are shown below: 1178 1179 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1180 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 1181 smilesTitleLine,auto,sanitize,yes 1182 1183 Possible values for smilesDelimiter: space, comma or tab. 1184 --maxIters <number> [default: 50] 1185 Maximum number of iterations to perform for each molecule or conformer 1186 during energy minimization by a quantum chemistry method. 1187 -m, --methodName <text> [default: auto] 1188 Method to use for energy minimization. Default: B3LYP [ Ref 150 ]. The 1189 specified value must be a valid Psi4 method name. No validation is 1190 performed. 1191 1192 The following list shows a representative sample of methods available 1193 in Psi4: 1194 1195 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 1196 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 1197 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 1198 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 1199 1200 --mp <yes or no> [default: no] 1201 Use multiprocessing. 1202 1203 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1204 function employing lazy RDKit data iterable. This allows processing of 1205 arbitrary large data sets without any additional requirements memory. 1206 1207 All input data may be optionally loaded into memory by mp.Pool.map() 1208 before starting worker processes in a process pool by setting the value 1209 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1210 1211 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1212 data mode may adversely impact the performance. The '--mpParams' section 1213 provides additional information to tune the value of 'chunkSize'. 1214 --mpLevel <Molecules or Conformers> [default: Molecules] 1215 Perform multiprocessing at molecules or conformers level. Possible values: 1216 Molecules or Conformers. The 'Molecules' value starts a process pool at the 1217 molecules level. All conformers of a molecule are processed in a single 1218 process. The 'Conformers' value, however, starts a process pool at the 1219 conformers level. Each conformer of a molecule is processed in an individual 1220 process in the process pool. The default Psi4 'OutputFile' is set to 'quiet' 1221 using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate 1222 a large number of Psi4 output files. 1223 --mpParams <Name,Value,...> [default: auto] 1224 A comma delimited list of parameter name and value pairs to configure 1225 multiprocessing. 1226 1227 The supported parameter names along with their default and possible 1228 values are shown below: 1229 1230 chunkSize, auto 1231 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 1232 numProcesses, auto [ Default: mp.cpu_count() ] 1233 1234 These parameters are used by the following functions to configure and 1235 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 1236 mp.Pool.imap(). 1237 1238 The chunkSize determines chunks of input data passed to each worker 1239 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 1240 The default value of chunkSize is dependent on the value of 'inputDataMode'. 1241 1242 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 1243 automatically converts RDKit data iterable into a list, loads all data into 1244 memory, and calculates the default chunkSize using the following method 1245 as shown in its code: 1246 1247 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 1248 if extra: chunkSize += 1 1249 1250 For example, the default chunkSize will be 7 for a pool of 4 worker processes 1251 and 100 data items. 1252 1253 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 1254 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 1255 data into memory. Consequently, the size of input data is not known a priori. 1256 It's not possible to estimate an optimal value for the chunkSize. The default 1257 chunkSize is set to 1. 1258 1259 The default value for the chunkSize during 'Lazy' data mode may adversely 1260 impact the performance due to the overhead associated with exchanging 1261 small chunks of data. It is generally a good idea to explicitly set chunkSize to 1262 a larger value during 'Lazy' input data mode, based on the size of your input 1263 data and number of processes in the process pool. 1264 1265 The mp.Pool.map() function waits for all worker processes to process all 1266 the data and return the results. The mp.Pool.imap() function, however, 1267 returns the the results obtained from worker processes as soon as the 1268 results become available for specified chunks of data. 1269 1270 The order of data in the results returned by both mp.Pool.map() and 1271 mp.Pool.imap() functions always corresponds to the input data. 1272 -o, --outfile <outfile> 1273 Output file name. 1274 --outfileParams <Name,Value,...> [default: auto] 1275 A comma delimited list of parameter name and value pairs for writing 1276 molecules to files. The supported parameter names for different file 1277 formats, along with their default values, are shown below: 1278 1279 SD: kekulize,yes,forceV3000,no 1280 1281 --overwrite 1282 Overwrite existing files. 1283 --precision <number> [default: 6] 1284 Floating point precision for writing energy values. 1285 --psi4OptionsParams <Name,Value,...> [default: none] 1286 A comma delimited list of Psi4 option name and value pairs for setting 1287 global and module options. The names are 'option_name' for global options 1288 and 'module_name__option_name' for options local to a module. The 1289 specified option names must be valid Psi4 names. No validation is 1290 performed. 1291 1292 The specified option name and value pairs are processed and passed to 1293 psi4.set_options() as a dictionary. The supported value types are float, 1294 integer, boolean, or string. The float value string is converted into a float. 1295 The valid values for a boolean string are yes, no, true, false, on, or off. 1296 --psi4RunParams <Name,Value,...> [default: auto] 1297 A comma delimited list of parameter name and value pairs for configuring 1298 Psi4 jobs. 1299 1300 The supported parameter names along with their default and possible 1301 values are shown below: 1302 1303 MemoryInGB, 1 1304 NumThreads, 1 1305 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 1306 ScratchDir, auto [ Possivle values: DirName] 1307 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 1308 1309 These parameters control the runtime behavior of Psi4. 1310 1311 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 1312 is appended to output file name during multiprocessing as shown: 1313 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 1314 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 1315 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 1316 'Conformers' of '--mpLevel' option. 1317 1318 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 1319 of any existing Psi4 before creating new files to append output from 1320 multiple Psi4 runs. 1321 1322 The option 'ScratchDir' is a directory path to the location of scratch 1323 files. The default value corresponds to Psi4 default. It may be used to 1324 override the deafult path. 1325 --psi4ConstrainTorsions <yes or no> [default: no] 1326 Constrain torsions around rotatable bonds during Psi4 minimization. 1327 --psi4ConstrainTorsionsParams <Name,Value,...> [default: auto] 1328 A space delimited list of parameter name and value pairs for constraining 1329 torsions around rotatable bonds during Psi4 minimization. The supported 1330 parameter names, along with their default values, are shown below: 1331 1332 ignoreHydrogens, yes, [ Possible values: yes or no ] 1333 rotBondsSMARTSMode, NonStrict, [ Possible values: NonStrict, 1334 SemiStrict, Strict, Specify ] 1335 rotBondsSMARTSPattern, auto [ Possible values: Valid SMARTS ] 1336 1337 ignoreHydrogens: Ignore torsions around rotatable bonds containing 1338 hydrogens during Psi4 minimization. 1339 1340 rotBondsSMARTSMode: SMARTS pattern to use for identifying rotatable bonds 1341 for constraining torsions during Psi4 minimization. Possible values: NonStrict, 1342 SemiStrict, Strict or Specify. The following SMARTS patterns are used to identify 1343 rotatable bonds for different modes: 1344 1345 NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1] 1346 1347 SemiStrict: 1348 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 1349 &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 1350 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 1351 1352 Strict: 1353 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 1354 &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1]) 1355 &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1]) 1356 &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 1357 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 1358 1359 The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 1360 'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS 1361 specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 1362 derived from 'Strict' SMARTS patterns for its usage in this script. 1363 1364 You may use any arbitrary SMARTS pattern to identify rotatable bonds by 1365 choosing 'Specify' value for 'rotBondsSMARTSMode' parameter and providing 1366 its value via '--rotBondsSMARTSPattern'. 1367 1368 rotBondsSMARTSPattern: SMARTS pattern for identifying rotatable bonds for 1369 constraining torsions during Psi4 minimization. This option is only valid for 1370 'Specify' value of 'rotBondsSMARTSMode' parameter. 1371 --psi4DDXSolvation <yes or no> [default: no] 1372 Perform minimization in solution using domain-decomposition-based 1373 continuum solvation models [ Ref 160-161 ]. The script relies on Psi4 1374 interface to the DDX module to perform these calculations. The DDX library 1375 provides a linear-scaling implementation of standard continuum solvation 1376 models using a domain- decomposition ansatz. Two solvation models are 1377 supported: COnductor-like Screening MOdel (COSMO) [ Ref 162-163] and 1378 Polarizable Continuum Model (PCM) [ Ref 164-165 ]. 1379 1380 The solvation energy is included in the value of the total energy written to 1381 the output SD file. In addition, the value of solvation energy is written to the 1382 output file under its own data field label. 1383 1384 Psi4 relies on Python module PYDDX to calculate solvation energy. It must be 1385 present in your environment. 1386 --psi4DDXSolvationParams <Name,Value,...> [default: auto] 1387 A space delimited list of parameter name and value pairs for calculating 1388 solvation energy using the DDX module. The supported parameter names, 1389 along with their default values, are shown below: 1390 1391 solvationModel PCM [ Possible values: COSMO or PCM] 1392 solvent water [ Possible values: A valid solvent name] 1393 solventEpsilon None 1394 radiiSet UFF [ Possible values: Bondi or UFF] 1395 radiiScaling auto [ Default: 1.2 for Bondi; 1.1 for UFF] 1396 1397 solvationModel: Solvation model for calculating solvation energy. 1398 The corresponding Psi4 option is DDX_MODEL. 1399 1400 solvent: Solvent to use. The corresponding Ps4 option is 1401 DDX_SOLVENT 1402 1403 solventEpsilon: Dielectric constant of the solvent. The 1404 corresponding Psi4 option is DDX_SOLVENT_EPSILON. 1405 1406 radiiSet: Radius set for cavity spheres. The corresponding Psi4 1407 option is DDX_RADII_SET. 1408 1409 radiiScaling: Scaling factor for cavity spheres. The default value 1410 depends on radiiSet: 1.2 for Bondi; 1.1 for UFF. The corresponding 1411 Psi4 option is DDX_RADII_SCALING. 1412 1413 These parameter names are automatically mapped to appropriate DDX keywords. 1414 1415 You may specify the solvent either by directly providing a dielectric constant using 1416 'solventEpsilon' parameter or a solvent name corresponding to 'solvent' parameter. 1417 The solvent name must be a valid named supported by the DDX module. For example: 1418 water, ethanol, dimethylsulfoxide, cis-1,2-dimethylcyclohexane , etc. The 'solvent' 1419 parameter is ignored for non-zero value of 'solvent' option. 1420 1421 The DDX module contains a variety of addtional options to confgure the calculation 1422 of solvation energy. For example: DDX_MAXITER, DDX_RADII_SCALING, etc. You may 1423 use '--psi4OptionsParams' to modify values for additional DDX options. 1424 --psi4DDXListSolvents 1425 List solvent names, along with dielectric values, supported by the DDX module 1426 for the calculation of solvent energy without performing any calculation. 1427 -q, --quiet <yes or no> [default: no] 1428 Use quiet mode. The warning and information messages will not be printed. 1429 -r, --reference <text> [default: auto] 1430 Reference wave function to use for energy calculation. Default: RHF or UHF. 1431 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules 1432 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell 1433 molecules with unpaired electrons. 1434 1435 The specified value must be a valid Psi4 reference wave function. No validation 1436 is performed. For example: ROHF, CUHF, RKS, etc. 1437 1438 The spin multiplicity determines the default value of reference wave function 1439 for input molecules. It is calculated from number of free radical electrons using 1440 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 1441 electron spin. The total spin is 1/2 the number of free radical electrons in a 1442 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 1443 over the calculated value of spin multiplicity. 1444 --recenterAndReorient <yes or no> [default: auto] 1445 Recenter and reorient a molecule during creation of a Psi4 molecule from 1446 a geometry string. Default: 'No' during 'No' value of '--confGeneration'; 1447 Otherwise, 'Yes'. 1448 1449 The 'No' values allows the minimization of a molecule in its initial 3D 1450 coordinate space in input file or conformer ensemble. 1451 --symmetrize <yes or no> [default: auto] 1452 Symmetrize molecules before energy minimization. Default: 'Yes' during 1453 'Yes' value of '--recenterAndReorient'; Otherwise, 'No'. The psi4 function, 1454 psi4mol.symmetrize( SymmetrizeTolerance), is called to symmetrize 1455 the molecule before calling psi4.optimize(). 1456 1457 The 'No' value of '--symmetrize' during 'Yes' value of '--recenterAndReorient' 1458 may cause psi4.optimize() to fail with a 'Point group changed...' error 1459 message. 1460 --symmetrizeTolerance <number> [default: 0.01] 1461 Symmetry tolerance for '--symmetrize'. 1462 -w, --workingdir <dir> 1463 Location of working directory which defaults to the current directory. 1464 1465 Examples: 1466 To generate an initial conformer ensemble of up to 50 conformations using a 1467 combination of ETKDGv2 distance geometry methodology, applying RMSD cutoff 1468 of 0.5 and MMFF forcefield minimization, followed by energy minimization 1469 using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and sulfur containing 1470 molecules in a SMILES file and write out a SD file containing minimum energy 1471 structure corresponding to each molecule, type: 1472 1473 % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf 1474 1475 To run the first example in a quiet mode and write out a SD file, type: 1476 1477 % Psi4PerformMinimization.py -q yes -i Psi4Sample.smi -o 1478 Psi4SampleOut.sdf 1479 1480 To run the first example along with performing energy mimimization in solution 1481 using default DDX solvation parameters for water and write out a SD file, type: 1482 1483 % Psi4PerformMinimization.py -i Psi4Sample.smi -o Psi4SampleOut.sdf 1484 --psi4DDXSolvation yes 1485 1486 To run the first example in multiprocessing mode at molecules level on all 1487 available CPUs without loading all data into memory and write out a SD file, 1488 type: 1489 1490 % Psi4PerformMinimization.py --mp yes -i Psi4Sample.smi -o 1491 Psi4SampleOut.sdf 1492 1493 To run the first example in multiprocessing mode at conformers level on all 1494 available CPUs without loading all data into memory and write out a SD file, 1495 type: 1496 1497 % Psi4PerformMinimization.py --mp yes --mpLevel Conformers 1498 -i Psi4Sample.smi -o Psi4SampleOut.sdf 1499 1500 To run the first example in multiprocessing mode at molecules level on all 1501 available CPUs by loading all data into memory and write out a SD file, type: 1502 1503 % Psi4PerformMinimization.py --mp yes --mpParams "inputDataMode, 1504 InMemory" -i Psi4Sample.smi -o Psi4SampleOut.sdf 1505 1506 To run the first example in multiprocessing mode at molecules level on specific 1507 number of CPUs and chunk size without loading all data into memory and write 1508 out a SD file, type: 1509 1510 % Psi4PerformMinimization.py --mp yes --mpParams "inputDataMode,Lazy, 1511 numProcesses,4,chunkSize,8" -i Psi4Sample.smi -o Psi4SampleOut.sdf 1512 1513 To generate an initial conformer ensemble of up to 20 conformations using a 1514 combination of ETKDGv2 distance geometry methodology and MMFF94s forcefield 1515 minimization followed by energy minimization for a maxium of 20 iterations 1516 using B3LYP/6-31+G** molecules in a SMILES file and write out a SD file 1517 containing minimum energy structure along with energy in specific units, 1518 type: 1519 1520 % Psi4PerformMinimization.py --confGeneration yes --confParams 1521 "confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s, 1522 maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP 1523 -b "6-31+G**" --maxIters 20 -i Psi4Sample.smi -o Psi4SampleOut.sdf 1524 1525 To minimize molecules in a 3D files using B3LYP/6-31G** and B3LYP/6-31+G** 1526 for non-sulfur and sulfur containing molecules for a maximum of 25 iterations 1527 without generating any conformations and write out a SD file containing 1528 minimum energy structures corresponding to each molecule, type: 1529 1530 % Psi4PerformMinimization.py --confGeneration no --maxIters 25 1531 -i Psi4Sample3D.sdf -o Psi4Sample3DOut.sdf 1532 1533 To run the first example for molecules in a CSV SMILES file, SMILES strings 1534 in column 1, name column 2, and write out a SD file, type: 1535 1536 % Psi4PerformMinimization.py --infileParams "smilesDelimiter,comma, 1537 smilesTitleLine,yes,smilesColumn,1,smilesNameColumn,2" 1538 -i Psi4Sample.csv -o Psi4SampleOut.sdf 1539 1540 Author: 1541 1542 Manish Sud(msud@san.rr.com) 1543 1544 Contributor: 1545 Francois Berenger 1546 1547 See also: 1548 Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py 1549 1550 Copyright: 1551 Copyright (C) 2024 Manish Sud. All rights reserved. 1552 1553 The functionality available in this script is implemented using Psi4, an 1554 open source quantum chemistry software package, and RDKit, an open 1555 source toolkit for cheminformatics developed by Greg Landrum. 1556 1557 This file is part of MayaChemTools. 1558 1559 MayaChemTools is free software; you can redistribute it and/or modify it under 1560 the terms of the GNU Lesser General Public License as published by the Free 1561 Software Foundation; either version 3 of the License, or (at your option) any 1562 later version. 1563 1564 """ 1565 1566 if __name__ == "__main__": 1567 main()