1 #!/bin/env python 2 # 3 # File: Psi4PerformTorsionScan.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 # 30 31 from __future__ import print_function 32 33 # Add local python path to the global path and import standard library modules... 34 import os 35 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 36 import time 37 import re 38 import glob 39 import shutil 40 import multiprocessing as mp 41 42 import matplotlib.pyplot as plt 43 import seaborn as sns 44 45 # Psi4 imports... 46 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 47 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 48 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 49 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 50 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 51 sys.stderr.write("Psi4 environment and try again.\n\n") 52 53 # RDKit imports... 54 try: 55 from rdkit import rdBase 56 from rdkit import Chem 57 from rdkit.Chem import AllChem 58 from rdkit.Chem import rdMolAlign 59 from rdkit.Chem import rdMolTransforms 60 except ImportError as ErrMsg: 61 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 62 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 63 sys.exit(1) 64 65 # MayaChemTools imports... 66 try: 67 from docopt import docopt 68 import MiscUtil 69 import Psi4Util 70 import RDKitUtil 71 except ImportError as ErrMsg: 72 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 73 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 74 sys.exit(1) 75 76 ScriptName = os.path.basename(sys.argv[0]) 77 Options = {} 78 OptionsInfo = {} 79 80 def main(): 81 """Start execution of the script.""" 82 83 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 84 85 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 86 87 # Retrieve command line arguments and options... 88 RetrieveOptions() 89 90 # Process and validate command line arguments and options... 91 ProcessOptions() 92 93 # Perform actions required by the script... 94 PerformTorsionScan() 95 96 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 97 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 98 99 def PerformTorsionScan(): 100 """Perform torsion scan.""" 101 102 # Setup a molecule reader for input file... 103 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 104 OptionsInfo["InfileParams"]["AllowEmptyMols"] = True 105 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 106 107 PlotExt = OptionsInfo["OutPlotParams"]["OutExt"] 108 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 109 MiscUtil.PrintInfo("Generating output files %s_*.sdf, %s_*Torsion*Match*.sdf, %s_*Torsion*Match*Energies.csv, %s_*Torsion*Match*Plot.%s..." % (FileName, FileName, FileName, FileName, PlotExt)) 110 111 MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount = ProcessMolecules(Mols) 112 113 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 114 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 115 MiscUtil.PrintInfo("Number of molecules failed during initial minimization: %d" % MinimizationFailedCount) 116 MiscUtil.PrintInfo("Number of molecules without any matched torsions: %d" % TorsionsMissingCount) 117 MiscUtil.PrintInfo("Number of molecules failed during torsion scan: %d" % TorsionsScanFailedCount) 118 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + TorsionsMissingCount + MinimizationFailedCount + TorsionsScanFailedCount)) 119 120 def ProcessMolecules(Mols): 121 """Process molecules to perform torsion scan.""" 122 123 if OptionsInfo["MPMode"]: 124 return ProcessMoleculesUsingMultipleProcesses(Mols) 125 else: 126 return ProcessMoleculesUsingSingleProcess(Mols) 127 128 def ProcessMoleculesUsingSingleProcess(Mols): 129 """Process molecules to perform torsion scan using a single process.""" 130 131 # Intialize Psi4... 132 MiscUtil.PrintInfo("\nInitializing Psi4...") 133 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 134 OptionsInfo["psi4"] = Psi4Handle 135 136 # Setup max iterations global variable... 137 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 138 139 # Setup conversion factor for energy units... 140 SetupEnergyConversionFactor(Psi4Handle) 141 142 MolInfoText = "first molecule" 143 if not OptionsInfo["FirstMolMode"]: 144 MolInfoText = "all molecules" 145 146 if OptionsInfo["TorsionMinimize"]: 147 MiscUtil.PrintInfo("\nPerforming torsion scan on %s by generating conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 148 else: 149 MiscUtil.PrintInfo("\nPerforming torsion scan on %s by skipping generation of conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 150 151 SetupTorsionsPatternsInfo() 152 153 (MolCount, ValidMolCount, TorsionsMissingCount, MinimizationFailedCount, TorsionsScanFailedCount) = [0] * 5 154 155 for Mol in Mols: 156 MolCount += 1 157 158 if OptionsInfo["FirstMolMode"] and MolCount > 1: 159 MolCount -= 1 160 break 161 162 if not CheckAndValidateMolecule(Mol, MolCount): 163 continue 164 165 # Setup 2D coordinates for SMILES input file... 166 if OptionsInfo["SMILESInfileStatus"]: 167 AllChem.Compute2DCoords(Mol) 168 169 ValidMolCount += 1 170 171 Mol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = PerformMinimizationAndTorsionScan(Mol, MolCount) 172 173 if not MinimizationCalcStatus: 174 MinimizationFailedCount += 1 175 continue 176 177 if not TorsionsMatchStatus: 178 TorsionsMissingCount += 1 179 continue 180 181 if not TorsionsScanCalcStatus: 182 TorsionsScanFailedCount += 1 183 continue 184 185 return (MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount) 186 187 def ProcessMoleculesUsingMultipleProcesses(Mols): 188 """Process and minimize molecules using multiprocessing.""" 189 190 if OptionsInfo["MPLevelTorsionAnglesMode"]: 191 return ProcessMoleculesUsingMultipleProcessesAtTorsionAnglesLevel(Mols) 192 elif OptionsInfo["MPLevelMoleculesMode"]: 193 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols) 194 else: 195 MiscUtil.PrintError("The value, %s, option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"])) 196 197 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(Mols): 198 """Process molecules to perform torsion scan using multiprocessing at molecules level.""" 199 200 MolInfoText = "first molecule" 201 if not OptionsInfo["FirstMolMode"]: 202 MolInfoText = "all molecules" 203 204 if OptionsInfo["TorsionMinimize"]: 205 MiscUtil.PrintInfo("\nPerforming torsion scan on %s using multiprocessing at molecules level by generating conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 206 else: 207 MiscUtil.PrintInfo("\nPerforming torsion scan %s using multiprocessing at molecules level by skipping generation of conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 208 209 MPParams = OptionsInfo["MPParams"] 210 211 # Setup data for initializing a worker process... 212 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 213 214 if OptionsInfo["FirstMolMode"]: 215 Mol = Mols[0] 216 Mols = [Mol] 217 218 # Setup a encoded mols data iterable for a worker process... 219 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 220 221 # Setup process pool along with data initialization for each process... 222 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 223 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 224 225 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 226 227 # Start processing... 228 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 229 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 230 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 231 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 232 else: 233 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 234 235 (MolCount, ValidMolCount, TorsionsMissingCount, MinimizationFailedCount, TorsionsScanFailedCount) = [0] * 5 236 237 for Result in Results: 238 MolCount += 1 239 240 MolIndex, EncodedMol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = Result 241 242 if EncodedMol is None: 243 continue 244 ValidMolCount += 1 245 246 if not MinimizationCalcStatus: 247 MinimizationFailedCount += 1 248 continue 249 250 if not TorsionsMatchStatus: 251 TorsionsMissingCount += 1 252 continue 253 254 if not TorsionsScanCalcStatus: 255 TorsionsScanFailedCount += 1 256 continue 257 258 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 259 260 return (MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount) 261 262 def InitializeWorkerProcess(*EncodedArgs): 263 """Initialize data for a worker process.""" 264 265 global Options, OptionsInfo 266 267 if not OptionsInfo["QuietMode"]: 268 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 269 270 # Decode Options and OptionInfo... 271 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 272 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 273 274 # Initialize torsion patterns info... 275 SetupTorsionsPatternsInfo() 276 277 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 278 # output files for each process... 279 OptionsInfo["Psi4Initialized"] = False 280 281 def WorkerProcess(EncodedMolInfo): 282 """Process data for a worker process.""" 283 284 if not OptionsInfo["Psi4Initialized"]: 285 InitializePsi4ForWorkerProcess() 286 287 MolIndex, EncodedMol = EncodedMolInfo 288 289 MolNum = MolIndex + 1 290 (MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus) = [False] * 3 291 292 if EncodedMol is None: 293 return [MolIndex, None, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus] 294 295 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 296 if not CheckAndValidateMolecule(Mol, MolNum): 297 return [MolIndex, None, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus] 298 299 # Setup 2D coordinates for SMILES input file... 300 if OptionsInfo["SMILESInfileStatus"]: 301 AllChem.Compute2DCoords(Mol) 302 303 Mol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = PerformMinimizationAndTorsionScan(Mol, MolNum) 304 305 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus] 306 307 def ProcessMoleculesUsingMultipleProcessesAtTorsionAnglesLevel(Mols): 308 """Process molecules to perform torsion scan using multiprocessing at torsion angles level.""" 309 310 MolInfoText = "first molecule" 311 if not OptionsInfo["FirstMolMode"]: 312 MolInfoText = "all molecules" 313 314 if OptionsInfo["TorsionMinimize"]: 315 MiscUtil.PrintInfo("\nPerforming torsion scan on %s using multiprocessing at torsion angles level by generating conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 316 else: 317 MiscUtil.PrintInfo("\nPerforming torsion scan %s using multiprocessing at torsion angles level by skipping generation of conformation ensembles for specific torsion angles and constrained energy minimization of the ensembles..." % (MolInfoText)) 318 319 SetupTorsionsPatternsInfo() 320 321 (MolCount, ValidMolCount, TorsionsMissingCount, MinimizationFailedCount, TorsionsScanFailedCount) = [0] * 5 322 323 for Mol in Mols: 324 MolCount += 1 325 326 if OptionsInfo["FirstMolMode"] and MolCount > 1: 327 MolCount -= 1 328 break 329 330 if not CheckAndValidateMolecule(Mol, MolCount): 331 continue 332 333 # Setup 2D coordinates for SMILES input file... 334 if OptionsInfo["SMILESInfileStatus"]: 335 AllChem.Compute2DCoords(Mol) 336 337 ValidMolCount += 1 338 339 Mol, MinimizationCalcStatus, TorsionsMatchStatus, TorsionsScanCalcStatus = PerformMinimizationAndTorsionScan(Mol, MolCount, UseMultiProcessingAtTorsionAnglesLevel = True) 340 341 if not MinimizationCalcStatus: 342 MinimizationFailedCount += 1 343 continue 344 345 if not TorsionsMatchStatus: 346 TorsionsMissingCount += 1 347 continue 348 349 if not TorsionsScanCalcStatus: 350 TorsionsScanFailedCount += 1 351 continue 352 353 return (MolCount, ValidMolCount, MinimizationFailedCount, TorsionsMissingCount, TorsionsScanFailedCount) 354 355 def ScanSingleTorsionInMolUsingMultipleProcessesAtTorsionAnglesLevel(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum): 356 """Perform torsion scan for a molecule using multiple processses at torsion angles 357 level along with constrained energy minimization. 358 """ 359 360 if OptionsInfo["MPLevelMoleculesMode"]: 361 MiscUtil.PrintError("Single torison scanning for a molecule is not allowed in multiprocessing mode at molecules level.\n") 362 363 Mols, Angles = SetupMolsForSingleTorsionScanInMol(Mol, TorsionMatches, MolNum) 364 365 MPParams = OptionsInfo["MPParams"] 366 367 # Setup data for initializing a worker process... 368 369 # Track and avoid encoding TorsionsPatternsInfo as it contains RDKit molecule object... 370 TorsionsPatternsInfo = OptionsInfo["TorsionsPatternsInfo"] 371 OptionsInfo["TorsionsPatternsInfo"] = None 372 373 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 374 375 # Restore TorsionsPatternsInfo... 376 OptionsInfo["TorsionsPatternsInfo"] = TorsionsPatternsInfo 377 378 # Setup a encoded mols data iterable for a worker process... 379 WorkerProcessDataIterable = GenerateBase64EncodedMolStringsWithTorsionScanInfo(Mol, (MolNum -1), Mols, Angles, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches) 380 381 # Setup process pool along with data initialization for each process... 382 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 383 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 384 385 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeTorsionAngleWorkerProcess, InitializeWorkerProcessArgs) 386 387 # Start processing... 388 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 389 Results = ProcessPool.imap(TorsionAngleWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 390 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 391 Results = ProcessPool.map(TorsionAngleWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 392 else: 393 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 394 395 TorsionMols = [] 396 TorsionEnergies = [] 397 TorsionAngles = [] 398 399 for Result in Results: 400 EncodedTorsionMol, CalcStatus, Angle, Energy = Result 401 402 if not CalcStatus: 403 return (Mol, False, None, None, None) 404 405 if EncodedTorsionMol is None: 406 return (Mol, False, None, None, None) 407 TorsionMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedTorsionMol) 408 409 TorsionMols.append(TorsionMol) 410 TorsionEnergies.append(Energy) 411 TorsionAngles.append(Angle) 412 413 return (Mol, True, TorsionMols, TorsionEnergies, TorsionAngles) 414 415 def InitializeTorsionAngleWorkerProcess(*EncodedArgs): 416 """Initialize data for a worker process.""" 417 418 global Options, OptionsInfo 419 420 if not OptionsInfo["QuietMode"]: 421 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 422 423 # Decode Options and OptionInfo... 424 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 425 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 426 427 # Initialize torsion patterns info... 428 SetupTorsionsPatternsInfo() 429 430 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 431 # output files for each process... 432 OptionsInfo["Psi4Initialized"] = False 433 434 def TorsionAngleWorkerProcess(EncodedMolInfo): 435 """Process data for a worker process.""" 436 437 if not OptionsInfo["Psi4Initialized"]: 438 InitializePsi4ForWorkerProcess() 439 440 MolIndex, EncodedMol, EncodedTorsionMol, TorsionAngle, TorsionID, TorsionPattern, EncodedTorsionPatternMol, TorsionMatches = EncodedMolInfo 441 442 if EncodedMol is None or EncodedTorsionMol is None or EncodedTorsionPatternMol is None: 443 return (None, False, None, None) 444 445 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 446 TorsionMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedTorsionMol) 447 TorsionPatternMol = RDKitUtil.MolFromBase64EncodedMolString(EncodedTorsionPatternMol) 448 449 TorsionMol, CalcStatus, Energy = MinimizeCalculateEnergyForTorsionMol(Mol, TorsionMol, TorsionAngle, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, (MolIndex + 1)) 450 451 return (RDKitUtil.MolToBase64EncodedMolString(TorsionMol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, TorsionAngle, Energy) 452 453 def GenerateBase64EncodedMolStringsWithTorsionScanInfo(Mol, MolIndex, TorsionMols, TorsionAngles, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, PropertyPickleFlags = Chem.PropertyPickleOptions.AllProps): 454 """Set up an iterator for generating base64 encoded molecule string for 455 a torsion in a molecule along with appropriate trosion scan information. 456 """ 457 458 for Index, TorsionMol in enumerate(TorsionMols): 459 yield [MolIndex, None, None, TorsionAngles[Index], TorsionID, TorsionPattern, None, TorsionMatches] if (Mol is None or TorsionMol is None) else [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags), RDKitUtil.MolToBase64EncodedMolString(TorsionMol, PropertyPickleFlags), TorsionAngles[Index], TorsionID, TorsionPattern, RDKitUtil.MolToBase64EncodedMolString(TorsionPatternMol, PropertyPickleFlags), TorsionMatches] 460 461 def InitializePsi4ForWorkerProcess(): 462 """Initialize Psi4 for a worker process.""" 463 464 if OptionsInfo["Psi4Initialized"]: 465 return 466 467 OptionsInfo["Psi4Initialized"] = True 468 469 if OptionsInfo["MPLevelTorsionAnglesMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I): 470 # Run Psi4 in quiet mode during multiprocessing at Torsions level for 'auto' OutputFile... 471 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet" 472 else: 473 # Update output file... 474 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 475 476 # Intialize Psi4... 477 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 478 479 # Setup max iterations global variable... 480 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 481 482 # Setup conversion factor for energy units... 483 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 484 485 def PerformMinimizationAndTorsionScan(Mol, MolNum, UseMultiProcessingAtTorsionAnglesLevel = False): 486 """Perform minimization and torsions scan.""" 487 488 if not OptionsInfo["Infile3D"]: 489 # Add hydrogens... 490 Mol = Chem.AddHs(Mol, addCoords = True) 491 492 Mol, MinimizationCalcStatus = MinimizeMolecule(Mol, MolNum) 493 if not MinimizationCalcStatus: 494 return (Mol, False, False, False) 495 496 TorsionsMolInfo = SetupTorsionsMolInfo(Mol, MolNum) 497 if TorsionsMolInfo["NumOfMatches"] == 0: 498 return (Mol, True, False, False) 499 500 Mol, ScanCalcStatus = ScanAllTorsionsInMol(Mol, TorsionsMolInfo, MolNum, UseMultiProcessingAtTorsionAnglesLevel) 501 if not ScanCalcStatus: 502 return (Mol, True, True, False) 503 504 return (Mol, True, True, True) 505 506 def ScanAllTorsionsInMol(Mol, TorsionsMolInfo, MolNum, UseMultiProcessingAtTorsionAnglesLevel = False): 507 """Perform scans on all torsions in a molecule.""" 508 509 if TorsionsMolInfo["NumOfMatches"] == 0: 510 return Mol, True 511 512 MolName = RDKitUtil.GetMolName(Mol, MolNum) 513 514 FirstTorsionMode = OptionsInfo["FirstTorsionMode"] 515 TorsionsPatternsInfo = OptionsInfo["TorsionsPatternsInfo"] 516 517 TorsionPatternCount, TorsionScanCount, TorsionMatchCount = [0] * 3 518 TorsionMaxMatches = OptionsInfo["TorsionMaxMatches"] 519 520 for TorsionID in TorsionsPatternsInfo["IDs"]: 521 TorsionPatternCount += 1 522 TorsionPattern = TorsionsPatternsInfo["Pattern"][TorsionID] 523 TorsionPatternMol = TorsionsPatternsInfo["Mol"][TorsionID] 524 525 TorsionsMatches = TorsionsMolInfo["Matches"][TorsionID] 526 527 if TorsionsMatches is None: 528 continue 529 530 if FirstTorsionMode and TorsionPatternCount > 1: 531 if not OptionsInfo["QuietMode"]: 532 MiscUtil.PrintWarning("Already scaned first torsion pattern, \"%s\" for molecule %s during \"%s\" value of \"--modeTorsions\" option . Abandoning torsion scan...\n" % (TorsionPattern, MolName, OptionsInfo["ModeTorsions"])) 533 break 534 535 for Index, TorsionMatches in enumerate(TorsionsMatches): 536 TorsionMatchNum = Index + 1 537 TorsionMatchCount += 1 538 539 if TorsionMatchCount > TorsionMaxMatches: 540 if not OptionsInfo["QuietMode"]: 541 MiscUtil.PrintWarning("Already scaned a maximum of %s torsion matches for molecule %s specified by \"--torsionMaxMatches\" option. Abandoning torsion scan...\n" % (TorsionMaxMatches, MolName)) 542 break 543 544 TmpMol, TorsionScanStatus, TorsionMols, TorsionEnergies, TorsionAngles = ScanSingleTorsionInMol(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, TorsionMatchNum, MolNum, UseMultiProcessingAtTorsionAnglesLevel) 545 if not TorsionScanStatus: 546 continue 547 548 TorsionScanCount += 1 549 GenerateOutputFiles(Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 550 551 if TorsionMatchCount > TorsionMaxMatches: 552 break 553 554 if TorsionScanCount: 555 GenerateStartingTorsionScanStructureOutfile(Mol, MolNum) 556 557 Status = True if TorsionScanCount else False 558 559 return (Mol, Status) 560 561 def ScanSingleTorsionInMol(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, TorsionMatchNum, MolNum, UseMultiProcessingAtTorsionAnglesLevel): 562 """Perform torsion scan for a molecule along with constrained energy minimization.""" 563 564 if not OptionsInfo["QuietMode"]: 565 MiscUtil.PrintInfo("\nProcessing torsion pattern, %s, match number, %s, in molecule %s..." % (TorsionPattern, TorsionMatchNum, RDKitUtil.GetMolName(Mol, MolNum))) 566 567 if OptionsInfo["TorsionMinimize"]: 568 MiscUtil.PrintInfo("Generating initial ensemble of constrained conformations by distance geometry and forcefield followed by Psi4 constraned minimization to select the lowest energy structure at specific torsion angles for molecule %s..." % (RDKitUtil.GetMolName(Mol, MolNum))) 569 else: 570 MiscUtil.PrintInfo("Calculating single point energy using Psi4 for molecule, %s, at specific torsion angles..." % (RDKitUtil.GetMolName(Mol, MolNum))) 571 572 if UseMultiProcessingAtTorsionAnglesLevel: 573 return ScanSingleTorsionInMolUsingMultipleProcessesAtTorsionAnglesLevel(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum) 574 else: 575 return ScanSingleTorsionInMolUsingSingleProcess(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum) 576 577 def ScanSingleTorsionInMolUsingSingleProcess(Mol, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum): 578 """Perform torsion scan for a molecule using single processs along with constrained 579 energy minimization.""" 580 581 TorsionMols = [] 582 TorsionEnergies = [] 583 TorsionAngles = [] 584 585 Mols, Angles = SetupMolsForSingleTorsionScanInMol(Mol, TorsionMatches, MolNum) 586 587 for Index, Angle in enumerate(Angles): 588 TorsionMol = Mols[Index] 589 TorsionMol, CalcStatus, Energy = MinimizeCalculateEnergyForTorsionMol(Mol, TorsionMol, Angle, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum) 590 591 if not CalcStatus: 592 return (Mol, False, None, None, None) 593 594 TorsionMols.append(TorsionMol) 595 TorsionEnergies.append(Energy) 596 TorsionAngles.append(Angle) 597 598 return (Mol, True, TorsionMols, TorsionEnergies, TorsionAngles) 599 600 def SetupMolsForSingleTorsionScanInMol(Mol, TorsionMatches, MolNum = None): 601 """Setup molecules corresponding to all torsion angles in a molecule.""" 602 603 AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4 = TorsionMatches 604 605 TorsionMols = [] 606 TorsionAngles = OptionsInfo["TorsionAngles"] 607 608 for Angle in TorsionAngles: 609 TorsionMol = Chem.Mol(Mol) 610 TorsionMolConf = TorsionMol.GetConformer(0) 611 612 rdMolTransforms.SetDihedralDeg(TorsionMolConf, AtomIndex1, AtomIndex2, AtomIndex3, AtomIndex4, Angle) 613 TorsionMols.append(TorsionMol) 614 615 return (TorsionMols, TorsionAngles) 616 617 def MinimizeCalculateEnergyForTorsionMol(Mol, TorsionMol, TorsionAngle, TorsionID, TorsionPattern, TorsionPatternMol, TorsionMatches, MolNum): 618 """"Calculate energy of a torsion molecule by performing an optional constrained 619 energy minimzation. 620 """ 621 622 if OptionsInfo["TorsionMinimize"]: 623 if not OptionsInfo["QuietMode"]: 624 MolName = RDKitUtil.GetMolName(Mol, MolNum) 625 MiscUtil.PrintInfo("\nProcessing torsion angle %s for molecule %s..." % (TorsionAngle, MolName)) 626 627 # Perform constrained minimization... 628 TorsionMatchesMol = RDKitUtil.MolFromSubstructureMatch(TorsionMol, TorsionPatternMol, TorsionMatches) 629 TorsionMol, CalcStatus, Energy = ConstrainAndMinimizeMolecule(TorsionMol, TorsionAngle, TorsionMatchesMol, TorsionMatches, MolNum) 630 631 if not CalcStatus: 632 if not OptionsInfo["QuietMode"]: 633 MolName = RDKitUtil.GetMolName(Mol, MolNum) 634 MiscUtil.PrintWarning("Failed to perform constrained minimization for molecule %s with torsion angle set to %s during torsion scan for torsion pattern %s. Abandoning torsion scan..." % (MolName, TorsionAngle, TorsionPattern)) 635 return (TorsionMol, False, None) 636 else: 637 # Setup a Psi4 molecule... 638 Psi4Mol = SetupPsi4Mol(OptionsInfo["psi4"], TorsionMol, MolNum) 639 if Psi4Mol is None: 640 return (TorsionMol, False, None) 641 642 # Calculate single point Psi4 energy... 643 CalcStatus, Energy = CalculateEnergyUsingPsi4(OptionsInfo["psi4"], Psi4Mol, TorsionMol, MolNum) 644 if not CalcStatus: 645 if not OptionsInfo["QuietMode"]: 646 MolName = RDKitUtil.GetMolName(Mol, MolNum) 647 MiscUtil.PrintWarning("Failed to calculate Psi4 energy for molecule %s with torsion angle set to %s during torsion scan for torsion pattern %s. Abandoning torsion scan..." % (MolName, TorsionAngle, TorsionPattern)) 648 return (TorsionMol, False, None) 649 650 return (TorsionMol, CalcStatus, Energy) 651 652 def SetupTorsionsMolInfo(Mol, MolNum = None): 653 """Setup torsions info for a molecule.""" 654 655 TorsionsPatternsInfo = OptionsInfo["TorsionsPatternsInfo"] 656 657 # Initialize... 658 TorsionsMolInfo = {} 659 TorsionsMolInfo["IDs"] = [] 660 TorsionsMolInfo["NumOfMatches"] = 0 661 TorsionsMolInfo["Matches"] = {} 662 for TorsionID in TorsionsPatternsInfo["IDs"]: 663 TorsionsMolInfo["IDs"].append(TorsionID) 664 TorsionsMolInfo["Matches"][TorsionID] = None 665 666 MolName = RDKitUtil.GetMolName(Mol, MolNum) 667 UseChirality = OptionsInfo["UseChirality"] 668 669 for TorsionID in TorsionsPatternsInfo["IDs"]: 670 # Match torsions.. 671 TorsionPattern = TorsionsPatternsInfo["Pattern"][TorsionID] 672 TorsionPatternMol = TorsionsPatternsInfo["Mol"][TorsionID] 673 TorsionsMatches = RDKitUtil.FilterSubstructureMatchesByAtomMapNumbers(Mol, TorsionPatternMol, Mol.GetSubstructMatches(TorsionPatternMol, useChirality = UseChirality)) 674 675 # Validate tosion matches... 676 ValidTorsionsMatches = [] 677 for Index, TorsionMatch in enumerate(TorsionsMatches): 678 if len(TorsionMatch) != 4: 679 if not OptionsInfo["QuietMode"]: 680 MiscUtil.PrintWarning("Ignoring invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: It must match exactly 4 atoms." % (TorsionMatch, TorsionPattern, MolName)) 681 continue 682 683 if not RDKitUtil.AreAtomIndicesSequentiallyConnected(Mol, TorsionMatch): 684 if not OptionsInfo["QuietMode"]: 685 MiscUtil.PrintInfo("") 686 MiscUtil.PrintWarning("Invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices must be sequentially connected." % (TorsionMatch, TorsionPattern, MolName)) 687 MiscUtil.PrintWarning("Reordering matched atom indices in a sequentially connected manner...") 688 689 Status, ReorderdTorsionMatch = RDKitUtil.ReorderAtomIndicesInSequentiallyConnectedManner(Mol, TorsionMatch) 690 if Status: 691 TorsionMatch = ReorderdTorsionMatch 692 if not OptionsInfo["QuietMode"]: 693 MiscUtil.PrintWarning("Successfully reordered torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices are now sequentially connected." % (TorsionMatch, TorsionPattern, MolName)) 694 else: 695 if not OptionsInfo["QuietMode"]: 696 MiscUtil.PrintWarning("Ignoring torsion match. Failed to reorder torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices are not sequentially connected." % (TorsionMatch, TorsionPattern, MolName)) 697 continue 698 699 Bond = Mol.GetBondBetweenAtoms(TorsionMatch[1], TorsionMatch[2]) 700 if Bond.IsInRing(): 701 if not OptionsInfo["QuietMode"]: 702 MiscUtil.PrintWarning("Ignoring invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices, %s and %s, are not allowed to be in a ring." % (TorsionMatch, TorsionPattern, MolName, TorsionMatch[1], TorsionMatch[2])) 703 continue 704 705 # Filter matched torsions... 706 if OptionsInfo["FilterTorsionsByAtomIndicesMode"]: 707 InvalidAtomIndices = [] 708 for AtomIndex in TorsionMatch: 709 if AtomIndex not in OptionsInfo["TorsionsFilterByAtomIndicesList"]: 710 InvalidAtomIndices.append(AtomIndex) 711 if len(InvalidAtomIndices): 712 if not OptionsInfo["QuietMode"]: 713 MiscUtil.PrintWarning("Ignoring invalid torsion match to atom indices, %s, for torsion pattern, %s, in molecule %s: Matched atom indices, %s, must be present in the list, %s, specified using option \"--torsionsFilterbyAtomIndices\"." % (TorsionMatch, TorsionPattern, MolName, InvalidAtomIndices, OptionsInfo["TorsionsFilterByAtomIndicesList"])) 714 continue 715 716 ValidTorsionsMatches.append(TorsionMatch) 717 718 # Track valid matches... 719 if len(ValidTorsionsMatches): 720 TorsionsMolInfo["NumOfMatches"] += len(ValidTorsionsMatches) 721 TorsionsMolInfo["Matches"][TorsionID] = ValidTorsionsMatches 722 723 if TorsionsMolInfo["NumOfMatches"] == 0: 724 if not OptionsInfo["QuietMode"]: 725 MiscUtil.PrintWarning("Failed to match any torsions in molecule %s" % (MolName)) 726 727 return TorsionsMolInfo 728 729 def SetupTorsionsPatternsInfo(): 730 """Setup torsions patterns info.""" 731 732 TorsionsPatternsInfo = {} 733 TorsionsPatternsInfo["IDs"] = [] 734 TorsionsPatternsInfo["Pattern"] = {} 735 TorsionsPatternsInfo["Mol"] = {} 736 737 TorsionID = 0 738 for TorsionPattern in OptionsInfo["TorsionPatternsList"]: 739 TorsionID += 1 740 741 TorsionMol = Chem.MolFromSmarts(TorsionPattern) 742 if TorsionMol is None: 743 MiscUtil.PrintError("Failed to create torsion pattern molecule. The torsion SMILES/SMARTS pattern, \"%s\", specified using \"-t, --torsions\" option is not valid." % (TorsionPattern)) 744 745 TorsionsPatternsInfo["IDs"].append(TorsionID) 746 TorsionsPatternsInfo["Pattern"][TorsionID] = TorsionPattern 747 TorsionsPatternsInfo["Mol"][TorsionID] = TorsionMol 748 749 OptionsInfo["TorsionsPatternsInfo"] = TorsionsPatternsInfo 750 751 def MinimizeMolecule(Mol, MolNum = None): 752 """Minimize molecule.""" 753 754 return GenerateAndMinimizeConformersUsingForceField(Mol, MolNum) 755 756 def GenerateAndMinimizeConformersUsingForceField(Mol, MolNum = None): 757 """Generate and minimize conformers for a molecule to get the lowest energy conformer 758 as the minimized structure.""" 759 760 MolName = RDKitUtil.GetMolName(Mol, MolNum) 761 762 # Setup conformers... 763 ConfIDs = EmbedMolecule(Mol, MolNum) 764 if not len(ConfIDs): 765 if not OptionsInfo["QuietMode"]: 766 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s: Embedding failed...\n" % MolName) 767 return (Mol, False) 768 769 if not OptionsInfo["QuietMode"]: 770 MiscUtil.PrintInfo("Performing initial minimization of molecule, %s, using forcefield by generating a conformation ensemble and selecting the lowest energy conformer - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (MolName, OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"], OptionsInfo["ConfGenerationParams"]["MaxConfs"], len(ConfIDs))) 771 772 # Minimize conformers... 773 CalcEnergyMap = {} 774 for ConfID in ConfIDs: 775 # Perform forcefield minimization... 776 Status, ConvergeStatus = MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID) 777 if not Status: 778 return (Mol, False) 779 780 EnergyStatus, Energy = CalculateEnergyUsingForceField(Mol, ConfID) 781 if not EnergyStatus: 782 if not OptionsInfo["QuietMode"]: 783 MolName = RDKitUtil.GetMolName(Mol, MolNum) 784 MiscUtil.PrintWarning("Failed to retrieve calculated energy for conformation number %d of molecule %s. Try again after removing any salts or cleaing up the molecule...\n" % (ConfID, MolName)) 785 return (Mol, False) 786 787 if ConvergeStatus != 0: 788 if not OptionsInfo["QuietMode"]: 789 MiscUtil.PrintWarning("Minimization using forcefield failed to converge for molecule %s in %d steps. Try using higher value for \"maxIters\" in \"--confParams\" option...\n" % (MolName, OptionsInfo["ConfGenerationParams"]["MaxIters"])) 790 791 CalcEnergyMap[ConfID] = Energy 792 793 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 794 MinEnergyConfID = SortedConfIDs[0] 795 796 for ConfID in [Conf.GetId() for Conf in Mol.GetConformers()]: 797 if ConfID == MinEnergyConfID: 798 continue 799 Mol.RemoveConformer(ConfID) 800 801 # Set ConfID to 0 for MinEnergyConf... 802 Mol.GetConformer(MinEnergyConfID).SetId(0) 803 804 return (Mol, True) 805 806 def ConstrainAndMinimizeMolecule(Mol, TorsionAngle, RefMolCore, RefMolMatches, MolNum = None): 807 """Constrain and minimize molecule.""" 808 809 # TorsionMol, CalcStatus, Energy 810 MolName = RDKitUtil.GetMolName(Mol, MolNum) 811 812 # Setup constrained conformers... 813 MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, RefMolMatches, MolNum) 814 if not MolConfsStatus: 815 if not OptionsInfo["QuietMode"]: 816 MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName) 817 return (Mol, False, None) 818 819 # Minimize conformers... 820 ConfNums = [] 821 CalcEnergyMap = {} 822 MolConfsMap = {} 823 824 for ConfNum, MolConf in enumerate(MolConfs): 825 if not OptionsInfo["QuietMode"]: 826 MiscUtil.PrintInfo("\nPerforming constrained minimization using Psi4 for molecule, %s, conformer number, %s, at torsion angle %s..." % (MolName, ConfNum, TorsionAngle)) 827 828 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], MolConf, RefMolCore, RefMolMatches, MolNum) 829 if not CalcStatus: 830 if not OptionsInfo["QuietMode"]: 831 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 832 return (Mol, False, None) 833 834 ConfNums.append(ConfNum) 835 CalcEnergyMap[ConfNum] = Energy 836 MolConfsMap[ConfNum] = MolConf 837 838 SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum]) 839 MinEnergyConfNum = SortedConfNums[0] 840 841 MinEnergy = CalcEnergyMap[MinEnergyConfNum] 842 MinEnergyMolConf = MolConfsMap[MinEnergyConfNum] 843 844 MinEnergyMolConf.ClearProp('EmbedRMS') 845 846 return (MinEnergyMolConf, True, MinEnergy) 847 848 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, RefMolMatches, MolNum, ConfID = -1): 849 """Minimize molecule using Psi4.""" 850 851 # Setup a list for constrained atoms... 852 ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, RefMolMatches) 853 if len(ConstrainedAtomIndices) == 0: 854 return (False, None) 855 856 # Setup a Psi4Mol... 857 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID) 858 if Psi4Mol is None: 859 return (False, None) 860 861 # Setup reference wave function... 862 Reference = SetupReferenceWavefunction(Mol) 863 Psi4Handle.set_options({'Reference': Reference}) 864 865 # Setup method name and basis set... 866 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 867 868 # Setup freeze list for constrained torsion... 869 FreezeDihedralList = [("%s" % AtomIdex) for AtomIdex in ConstrainedAtomIndices] 870 FreezeDihedralString = "%s" % " ".join(FreezeDihedralList) 871 Psi4Handle.set_options({"OPTKING__frozen_dihedral": FreezeDihedralString}) 872 873 # Optimize geometry... 874 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 875 876 if not Status: 877 PerformPsi4Cleanup(Psi4Handle) 878 return (False, None) 879 880 # Update atom positions... 881 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True) 882 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 883 884 # Convert energy units... 885 if OptionsInfo["ApplyEnergyConversionFactor"]: 886 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 887 888 # Clean up 889 PerformPsi4Cleanup(Psi4Handle) 890 891 return (True, Energy) 892 893 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, RefMolMatches, MolNum = None): 894 """Constrain, embed, and minimize molecule.""" 895 896 # Setup forcefield function to use for constrained minimization... 897 ForceFieldFunction = None 898 ForceFieldName = None 899 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 900 ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId) 901 ForeceFieldName = "UFF" 902 else: 903 ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) , confId = confId) 904 ForeceFieldName = "MMFF" 905 906 if ForceFieldFunction is None: 907 if not OptionsInfo["QuietMode"]: 908 MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))) 909 return (None, False) 910 911 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfsTorsions"] 912 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 913 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 914 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 915 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 916 UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"] 917 918 MolConfs = [] 919 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)] 920 921 for ConfID in ConfIDs: 922 try: 923 MolConf = Chem.Mol(Mol) 924 RDKitUtil.ConstrainAndEmbed(MolConf, RefMolCore, coreMatchesMol = RefMolMatches, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 925 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 926 if not OptionsInfo["QuietMode"]: 927 MolName = RDKitUtil.GetMolName(Mol, MolNum) 928 MiscUtil.PrintWarning("Constrained embedding couldn't be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 929 return (None, False) 930 931 MolConfs.append(MolConf) 932 933 return FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum) 934 935 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum = None): 936 """Filter contarained conformations by RMSD.""" 937 938 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 939 if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0: 940 if not OptionsInfo["QuietMode"]: 941 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))) 942 return (MolConfs, True) 943 944 FirstMolConf = True 945 SelectedMolConfs = [] 946 for MolConfIndex, MolConf in enumerate(MolConfs): 947 if FirstMolConf: 948 FirstMolConf = False 949 SelectedMolConfs.append(MolConf) 950 continue 951 952 # Compare RMSD against all selected conformers... 953 ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf)) 954 IgnoreConf = False 955 for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs): 956 RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf)) 957 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf) 958 959 if CalcRMSD < EmbedRMSDCutoff: 960 IgnoreConf = True 961 break 962 963 if IgnoreConf: 964 continue 965 966 SelectedMolConfs.append(MolConf) 967 968 if not OptionsInfo["QuietMode"]: 969 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))) 970 971 return (SelectedMolConfs, True) 972 973 def EmbedMolecule(Mol, MolNum = None): 974 """Embed conformations.""" 975 976 ConfIDs = [] 977 978 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"] 979 RandomSeed = OptionsInfo["ConfGenerationParams"]["RandomSeed"] 980 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 981 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 982 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 983 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 984 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 985 986 try: 987 ConfIDs = AllChem.EmbedMultipleConfs(Mol, numConfs = MaxConfs, randomSeed = RandomSeed, pruneRmsThresh = EmbedRMSDCutoff, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 988 except ValueError as ErrMsg: 989 if not OptionsInfo["QuietMode"]: 990 MolName = RDKitUtil.GetMolName(Mol, MolNum) 991 MiscUtil.PrintWarning("Embedding failed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 992 ConfIDs = [] 993 994 return ConfIDs 995 996 def MinimizeMoleculeUsingForceField(Mol, MolNum, ConfID = -1): 997 """Minimize molecule using forcefield available in RDKit.""" 998 999 try: 1000 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 1001 ConvergeStatus = AllChem.UFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"]) 1002 elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]: 1003 ConvergeStatus = AllChem.MMFFOptimizeMolecule(Mol, confId = ConfID, maxIters = OptionsInfo["ConfGenerationParams"]["MaxIters"], mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) 1004 else: 1005 MiscUtil.PrintError("Minimization couldn't be performed: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"]) 1006 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 1007 if not OptionsInfo["QuietMode"]: 1008 MolName = RDKitUtil.GetMolName(Mol, MolNum) 1009 MiscUtil.PrintWarning("Minimization using forcefield couldn't be performed for molecule %s:\n%s\n" % (MolName, ErrMsg)) 1010 return (False, None) 1011 1012 return (True, ConvergeStatus) 1013 1014 def CalculateEnergyUsingForceField(Mol, ConfID = None): 1015 """Calculate energy.""" 1016 1017 Status = True 1018 Energy = None 1019 1020 if ConfID is None: 1021 ConfID = -1 1022 1023 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 1024 UFFMoleculeForcefield = AllChem.UFFGetMoleculeForceField(Mol, confId = ConfID) 1025 if UFFMoleculeForcefield is None: 1026 Status = False 1027 else: 1028 Energy = UFFMoleculeForcefield.CalcEnergy() 1029 elif OptionsInfo["ConfGenerationParams"]["UseMMFF"]: 1030 MMFFMoleculeProperties = AllChem.MMFFGetMoleculeProperties(Mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) 1031 MMFFMoleculeForcefield = AllChem.MMFFGetMoleculeForceField(Mol, MMFFMoleculeProperties, confId = ConfID) 1032 if MMFFMoleculeForcefield is None: 1033 Status = False 1034 else: 1035 Energy = MMFFMoleculeForcefield.CalcEnergy() 1036 else: 1037 MiscUtil.PrintError("Couldn't retrieve conformer energy: Specified forcefield, %s, is not supported" % OptionsInfo["ConfGenerationParams"]["ForceField"]) 1038 1039 return (Status, Energy) 1040 1041 def CalculateEnergyUsingPsi4(Psi4Handle, Psi4Mol, Mol, MolNum = None): 1042 """Calculate single point energy using Psi4.""" 1043 1044 Status = False 1045 Energy = None 1046 1047 # Setup reference wave function... 1048 Reference = SetupReferenceWavefunction(Mol) 1049 Psi4Handle.set_options({'Reference': Reference}) 1050 1051 # Setup method name and basis set... 1052 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 1053 1054 Status, Energy = Psi4Util.CalculateSinglePointEnergy(Psi4Handle, Psi4Mol, MethodName, BasisSet, Quiet = OptionsInfo["QuietMode"]) 1055 1056 # Convert energy units... 1057 if Status: 1058 if OptionsInfo["ApplyEnergyConversionFactor"]: 1059 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 1060 1061 # Clean up 1062 PerformPsi4Cleanup(Psi4Handle) 1063 1064 return (Status, Energy) 1065 1066 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, RefMolMatches): 1067 """Setup a list of atom indices to be constrained during Psi4 minimizaiton.""" 1068 1069 AtomIndices = [] 1070 1071 if RefMolMatches is None: 1072 return AtomIndices 1073 else: 1074 ConstrainAtomIndices = RefMolMatches 1075 1076 # Atom indices start from 1 for Psi4 instead 0 for RDKit... 1077 AtomIndices = [ AtomIndex + 1 for AtomIndex in ConstrainAtomIndices] 1078 1079 return AtomIndices 1080 1081 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1): 1082 """Setup a Psi4 molecule object.""" 1083 1084 # Turn off recentering and reorientation to perform optimization in the 1085 # constrained coordinate space... 1086 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True) 1087 1088 try: 1089 Psi4Mol = Psi4Handle.geometry(MolGeometry) 1090 except Exception as ErrMsg: 1091 Psi4Mol = None 1092 if not OptionsInfo["QuietMode"]: 1093 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 1094 MolName = RDKitUtil.GetMolName(Mol, MolNum) 1095 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 1096 1097 return Psi4Mol 1098 1099 def PerformPsi4Cleanup(Psi4Handle): 1100 """Perform clean up.""" 1101 1102 # Clean up after Psi4 run... 1103 Psi4Handle.core.clean() 1104 1105 # Clean up any leftover scratch files... 1106 if OptionsInfo["MPMode"]: 1107 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 1108 1109 def CheckAndValidateMolecule(Mol, MolCount = None): 1110 """Validate molecule for Psi4 calculations.""" 1111 1112 if Mol is None: 1113 if not OptionsInfo["QuietMode"]: 1114 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 1115 return False 1116 1117 MolName = RDKitUtil.GetMolName(Mol, MolCount) 1118 if not OptionsInfo["QuietMode"]: 1119 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 1120 1121 if RDKitUtil.IsMolEmpty(Mol): 1122 if not OptionsInfo["QuietMode"]: 1123 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 1124 return False 1125 1126 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 1127 if not OptionsInfo["QuietMode"]: 1128 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 1129 return False 1130 1131 if OptionsInfo["Infile3D"]: 1132 if not Mol.GetConformer().Is3D(): 1133 if not OptionsInfo["QuietMode"]: 1134 MiscUtil.PrintWarning("3D tag is not set for molecule: %s\n" % MolName) 1135 1136 if OptionsInfo["Infile3D"]: 1137 # Otherwise, Hydrogens are always added... 1138 if RDKitUtil.AreHydrogensMissingInMolecule(Mol): 1139 if not OptionsInfo["QuietMode"]: 1140 MiscUtil.PrintWarning("Missing hydrogens in molecule: %s\n" % MolName) 1141 1142 return True 1143 1144 def SetupMethodNameAndBasisSet(Mol): 1145 """Setup method name and basis set.""" 1146 1147 MethodName = OptionsInfo["MethodName"] 1148 if OptionsInfo["MethodNameAuto"]: 1149 MethodName = "B3LYP" 1150 1151 BasisSet = OptionsInfo["BasisSet"] 1152 if OptionsInfo["BasisSetAuto"]: 1153 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 1154 1155 return (MethodName, BasisSet) 1156 1157 def SetupReferenceWavefunction(Mol): 1158 """Setup reference wavefunction.""" 1159 1160 Reference = OptionsInfo["Reference"] 1161 if OptionsInfo["ReferenceAuto"]: 1162 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 1163 1164 return Reference 1165 1166 def SetupEnergyConversionFactor(Psi4Handle): 1167 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 1168 1169 EnergyUnits = OptionsInfo["EnergyUnits"] 1170 1171 ApplyConversionFactor = True 1172 if re.match("^kcal\/mol$", EnergyUnits, re.I): 1173 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 1174 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 1175 ConversionFactor = Psi4Handle.constants.hartree2kJmol 1176 elif re.match("^eV$", EnergyUnits, re.I): 1177 ConversionFactor = Psi4Handle.constants.hartree2ev 1178 else: 1179 ApplyConversionFactor = False 1180 ConversionFactor = 1.0 1181 1182 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 1183 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 1184 1185 def GenerateStartingTorsionScanStructureOutfile(Mol, MolNum): 1186 """Write out the structure of molecule used for starting tosion scan.""" 1187 1188 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1189 MolName = GetOutputFileMolName(Mol, MolNum) 1190 1191 Outfile = "%s_%s.%s" % (FileName, MolName, FileExt) 1192 1193 # Set up a molecule writer... 1194 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 1195 if Writer is None: 1196 MiscUtil.PrintWarning("Failed to setup a writer for output fie %s " % Outfile) 1197 return 1198 1199 Writer.write(Mol) 1200 1201 if Writer is not None: 1202 Writer.close() 1203 1204 def GenerateOutputFiles(Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1205 """Generate output files.""" 1206 1207 StructureOutfile, EnergyTextOutfile, PlotOutfile = SetupOutputFileNames(Mol, MolNum, TorsionID, TorsionMatchNum) 1208 1209 GenerateScannedTorsionsStructureOutfile(StructureOutfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 1210 GenerateEnergyTextOutfile(EnergyTextOutfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 1211 GeneratePlotOutfile(PlotOutfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles) 1212 1213 def GenerateScannedTorsionsStructureOutfile(Outfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1214 """Write out structures generated after torsion scan along with associated data.""" 1215 1216 # Set up a molecule writer... 1217 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 1218 if Writer is None: 1219 MiscUtil.PrintWarning("Failed to setup a writer for output fie %s " % Outfile) 1220 return 1221 1222 MolName = RDKitUtil.GetMolName(Mol, MolNum) 1223 1224 RelativeTorsionEnergies = SetupRelativeEnergies(TorsionEnergies) 1225 for Index, TorsionMol in enumerate(TorsionMols): 1226 TorsionAngle = "%s" % TorsionAngles[Index] 1227 TorsionMol.SetProp("Torsion_Angle", TorsionAngle) 1228 1229 TorsionEnergy = "%.*f" % (OptionsInfo["Precision"], TorsionEnergies[Index]) 1230 TorsionMol.SetProp(OptionsInfo["EnergyDataFieldLabel"], TorsionEnergy) 1231 1232 RelativeTorsionEnergy = "%.*f" % (OptionsInfo["Precision"], RelativeTorsionEnergies[Index]) 1233 TorsionMol.SetProp(OptionsInfo["EnergyRelativeDataFieldLabel"], RelativeTorsionEnergy) 1234 1235 TorsionMolName = "%s_Deg%s" % (MolName, TorsionAngle) 1236 TorsionMol.SetProp("_Name", TorsionMolName) 1237 1238 Writer.write(TorsionMol) 1239 1240 if Writer is not None: 1241 Writer.close() 1242 1243 def GenerateEnergyTextOutfile(Outfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1244 """Write out torsion angles and energies.""" 1245 1246 # Setup a writer... 1247 Writer = open(Outfile, "w") 1248 if Writer is None: 1249 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 1250 1251 # Write headers... 1252 Writer.write("TorsionAngle,%s,%s\n" % (OptionsInfo["EnergyDataFieldLabel"], OptionsInfo["EnergyRelativeDataFieldLabel"])) 1253 1254 RelativeTorsionEnergies = SetupRelativeEnergies(TorsionEnergies) 1255 for Index, TorsionAngle in enumerate(TorsionAngles): 1256 TorsionEnergy = "%.*f" % (OptionsInfo["Precision"], TorsionEnergies[Index]) 1257 RelativeTorsionEnergy = "%.*f" % (OptionsInfo["Precision"], RelativeTorsionEnergies[Index]) 1258 Writer.write("%d,%s,%s\n" % (TorsionAngle, TorsionEnergy, RelativeTorsionEnergy)) 1259 1260 if Writer is not None: 1261 Writer.close() 1262 1263 def GeneratePlotOutfile(Outfile, Mol, MolNum, TorsionID, TorsionMatchNum, TorsionMols, TorsionEnergies, TorsionAngles): 1264 """Generate a plot corresponding to torsion angles and energies.""" 1265 1266 OutPlotParams = OptionsInfo["OutPlotParams"] 1267 1268 # Initialize seaborn and matplotlib paramaters... 1269 if not OptionsInfo["OutPlotInitialized"]: 1270 OptionsInfo["OutPlotInitialized"] = True 1271 RCParams = {"figure.figsize":(OutPlotParams["Width"], OutPlotParams["Height"]), 1272 "axes.titleweight": OutPlotParams["TitleWeight"], 1273 "axes.labelweight": OutPlotParams["LabelWeight"]} 1274 sns.set(context = OutPlotParams["Context"], style = OutPlotParams["Style"], palette = OutPlotParams["Palette"], font = OutPlotParams["Font"], font_scale = OutPlotParams["FontScale"], rc = RCParams) 1275 1276 # Create a new figure... 1277 plt.figure() 1278 1279 if OptionsInfo["OutPlotRelativeEnergy"]: 1280 TorsionEnergies = SetupRelativeEnergies(TorsionEnergies) 1281 1282 # Draw plot... 1283 PlotType = OutPlotParams["Type"] 1284 if re.match("linepoint", PlotType, re.I): 1285 Axis = sns.lineplot(x = TorsionAngles, y = TorsionEnergies, marker = "o", legend = False) 1286 elif re.match("scatter", PlotType, re.I): 1287 Axis = sns.scatterplot(x = TorsionAngles, y = TorsionEnergies, legend = False) 1288 elif re.match("line", PlotType, re.I): 1289 Axis = sns.lineplot(x = TorsionAngles, y = TorsionEnergies, legend = False) 1290 else: 1291 MiscUtil.PrintError("The value, %s, specified for \"type\" using option \"--outPlotParams\" is not supported. Valid plot types: linepoint, scatter or line" % (PlotType)) 1292 1293 # Setup title and labels... 1294 Title = OutPlotParams["Title"] 1295 if OptionsInfo["OutPlotTitleTorsionSpec"]: 1296 TorsionPattern = OptionsInfo["TorsionsPatternsInfo"]["Pattern"][TorsionID] 1297 Title = "%s: %s" % (OutPlotParams["Title"], TorsionPattern) 1298 1299 # Set labels and title... 1300 Axis.set(xlabel = OutPlotParams["XLabel"], ylabel = OutPlotParams["YLabel"], title = Title) 1301 1302 # Save figure... 1303 plt.savefig(Outfile) 1304 1305 # Close the plot... 1306 plt.close() 1307 1308 def SetupRelativeEnergies(Energies): 1309 """Set up a list of relative energies.""" 1310 1311 SortedEnergies = sorted(Energies) 1312 MinEnergy = SortedEnergies[0] 1313 RelativeEnergies = [(Energy - MinEnergy) for Energy in Energies] 1314 1315 return RelativeEnergies 1316 1317 def SetupOutputFileNames(Mol, MolNum, TorsionID, TorsionMatchNum): 1318 """Setup names of output files.""" 1319 1320 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1321 MolName = GetOutputFileMolName(Mol, MolNum) 1322 1323 OutfileRoot = "%s_%s_Torsion%s_Match%s" % (FileName, MolName, TorsionID, TorsionMatchNum) 1324 1325 StructureOutfile = "%s.%s" % (OutfileRoot, FileExt) 1326 EnergyTextOutfile = "%s_Energies.csv" % (OutfileRoot) 1327 PlotExt = OptionsInfo["OutPlotParams"]["OutExt"] 1328 PlotOutfile = "%s_Plot.%s" % (OutfileRoot, PlotExt) 1329 1330 return (StructureOutfile, EnergyTextOutfile, PlotOutfile) 1331 1332 def GetOutputFileMolName(Mol, MolNum): 1333 """Get output file prefix.""" 1334 1335 MolName = "Mol%s" % MolNum 1336 if OptionsInfo["OutfileMolName"]: 1337 MolName = re.sub("[^a-zA-Z0-9]", "_", RDKitUtil.GetMolName(Mol, MolNum), re.I) 1338 1339 return MolName 1340 1341 def ProcessTorsionRangeOptions(): 1342 """Process tosion range options. """ 1343 1344 TosionRangeMode = Options["--torsionRangeMode"] 1345 OptionsInfo["TosionRangeMode"] = TosionRangeMode 1346 1347 if re.match("^Range$", TosionRangeMode, re.I): 1348 ProcessTorsionRangeValues() 1349 elif re.match("^Angles$", TosionRangeMode, re.I): 1350 ProcessTorsionAnglesValues() 1351 else: 1352 MiscUtil.PrintError("The value, %s, option \"--torsionRangeMode\" is not supported." % TosionRangeMode) 1353 1354 1355 def ProcessTorsionRangeValues(): 1356 """Process tosion range values. """ 1357 1358 TorsionRange = Options["--torsionRange"] 1359 if re.match("^auto$", TorsionRange, re.I): 1360 TorsionRange = "0,360,5" 1361 TorsionRangeWords = TorsionRange.split(",") 1362 1363 TorsionStart = int(TorsionRangeWords[0]) 1364 TorsionStop = int(TorsionRangeWords[1]) 1365 TorsionStep = int(TorsionRangeWords[2]) 1366 1367 if TorsionStart >= TorsionStop: 1368 MiscUtil.PrintError("The start value, %d, specified for option \"--torsionRange\" in string \"%s\" must be less than stop value, %s." % (TorsionStart, Options["--torsionRange"], TorsionStop)) 1369 if TorsionStep == 0: 1370 MiscUtil.PrintError("The step value, %d, specified for option \"--torsonRange\" in string \"%s\" must be > 0." % (TorsionStep, Options["--torsionRange"])) 1371 if TorsionStep >= (TorsionStop - TorsionStart): 1372 MiscUtil.PrintError("The step value, %d, specified for option \"--torsonRange\" in string \"%s\" must be less than, %s." % (TorsionStep, Options["--torsionRange"], (TorsionStop - TorsionStart))) 1373 1374 if TorsionStart < 0: 1375 if TorsionStart < -180: 1376 MiscUtil.PrintError("The start value, %d, specified for option \"--torsionRange\" in string \"%s\" must be >= -180 to use scan range from -180 to 180." % (TorsionStart, Options["--torsionRange"])) 1377 if TorsionStop > 180: 1378 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be <= 180 to use scan range from -180 to 180." % (TorsionStop, Options["--torsionRange"])) 1379 else: 1380 if TorsionStop > 360: 1381 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be <= 360 to use scan range from 0 to 360." % (TorsionStop, Options["--torsionRange"])) 1382 1383 TorsionAngles = [Angle for Angle in range(TorsionStart, TorsionStop, TorsionStep)] 1384 TorsionAngles.append(TorsionStop) 1385 1386 OptionsInfo["TorsionRange"] = TorsionRange 1387 OptionsInfo["TorsionStart"] = TorsionStart 1388 OptionsInfo["TorsionStop"] = TorsionStop 1389 OptionsInfo["TorsionStep"] = TorsionStep 1390 1391 OptionsInfo["TorsionAngles"] = TorsionAngles 1392 1393 def ProcessTorsionAnglesValues(): 1394 """Process tosion angle values. """ 1395 1396 TorsionRange = Options["--torsionRange"] 1397 if re.match("^auto$", TorsionRange, re.I): 1398 MiscUtil.PrintError("The value specified, %s, for option \"--torsionRange\" is not valid." % (TorsionRange)) 1399 1400 TorsionAngles = [] 1401 1402 for TorsionAngle in TorsionRange.split(","): 1403 TorsionAngle = int(TorsionAngle) 1404 1405 if TorsionAngle < -180: 1406 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be >= -180." % (TorsionAngle, TorsionRange)) 1407 1408 if TorsionAngle > 360: 1409 MiscUtil.PrintError("The stop value, %d, specified for option \"--torsionRange\" in string \"%s\" must be <= 360." % (TorsionAngle, TorsionRange)) 1410 1411 TorsionAngles.append(TorsionAngle) 1412 1413 OptionsInfo["TorsionRange"] = TorsionRange 1414 OptionsInfo["TorsionStart"] = None 1415 OptionsInfo["TorsionStop"] = None 1416 OptionsInfo["TorsionStep"] = None 1417 1418 OptionsInfo["TorsionAngles"] = sorted(TorsionAngles) 1419 1420 def ProcessOptions(): 1421 """Process and validate command line arguments and options.""" 1422 1423 MiscUtil.PrintInfo("Processing options...") 1424 1425 # Validate options... 1426 ValidateOptions() 1427 1428 OptionsInfo["ModeMols"] = Options["--modeMols"] 1429 OptionsInfo["FirstMolMode"] = True if re.match("^First$", Options["--modeMols"], re.I) else False 1430 1431 OptionsInfo["ModeTorsions"] = Options["--modeTorsions"] 1432 OptionsInfo["FirstTorsionMode"] = True if re.match("^First$", Options["--modeTorsions"], re.I) else False 1433 1434 OptionsInfo["Infile"] = Options["--infile"] 1435 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False 1436 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 1437 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 1438 OptionsInfo["Infile3D"] = True if re.match("^yes$", Options["--infile3D"], re.I) else False 1439 1440 OptionsInfo["Outfile"] = Options["--outfile"] 1441 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 1442 1443 # Method, basis set, and reference wavefunction... 1444 OptionsInfo["BasisSet"] = Options["--basisSet"] 1445 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 1446 1447 OptionsInfo["MethodName"] = Options["--methodName"] 1448 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 1449 1450 OptionsInfo["Reference"] = Options["--reference"] 1451 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 1452 1453 # Run and options parameters... 1454 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 1455 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 1456 1457 # Conformer generation paramaters... 1458 ParamsDefaultInfoOverride = {"MaxConfs": 250, "MaxConfsTorsions": 50} 1459 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride) 1460 1461 # Energy units and label... 1462 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 1463 1464 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 1465 if re.match("^auto$", EnergyDataFieldLabel, re.I): 1466 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 1467 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 1468 1469 EnergyRelativeDataFieldLabel = Options["--energyRelativeDataFieldLabel"] 1470 if re.match("^auto$", EnergyRelativeDataFieldLabel, re.I): 1471 EnergyRelativeDataFieldLabel = "Psi4_Relative_Energy (%s)" % Options["--energyUnits"] 1472 OptionsInfo["EnergyRelativeDataFieldLabel"] = EnergyRelativeDataFieldLabel 1473 1474 # Plot parameters... 1475 OptionsInfo["OutfileMolName"] = True if re.match("^yes$", Options["--outfileMolName"], re.I) else False 1476 OptionsInfo["OutPlotRelativeEnergy"] = True if re.match("^yes$", Options["--outPlotRelativeEnergy"], re.I) else False 1477 OptionsInfo["OutPlotTitleTorsionSpec"] = True if re.match("^yes$", Options["--outPlotTitleTorsionSpec"], re.I) else False 1478 1479 # The default width and height, 10.0 and 7.5, map to aspect raito of 16/9 (1.778)... 1480 if OptionsInfo["OutPlotRelativeEnergy"]: 1481 EnergyLabel = "Relative Energy (%s)" % OptionsInfo["EnergyUnits"] 1482 else: 1483 EnergyLabel = "Energy (%s)" % OptionsInfo["EnergyUnits"] 1484 1485 DefaultValues = {'Type': 'linepoint', 'Width': 10.0, 'Height': 5.6, 'Title': 'Psi4 Torsion Scan', 'XLabel': 'Torsion Angle (degrees)', 'YLabel': EnergyLabel} 1486 OptionsInfo["OutPlotParams"] = MiscUtil.ProcessOptionSeabornPlotParameters("--outPlotParams", Options["--outPlotParams"], DefaultValues) 1487 if not re.match("^(linepoint|scatter|Line)$", OptionsInfo["OutPlotParams"]["Type"], re.I): 1488 MiscUtil.PrintError("The value, %s, specified for \"type\" using option \"--outPlotParams\" is not supported. Valid plot types: linepoint, scatter or line" % (OptionsInfo["OutPlotParams"]["Type"])) 1489 1490 OptionsInfo["OutPlotInitialized"] = False 1491 1492 OptionsInfo["Overwrite"] = Options["--overwrite"] 1493 1494 OptionsInfo["MaxIters"] = int(Options["--maxIters"]) 1495 1496 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 1497 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 1498 1499 # Multiprocessing level... 1500 MPLevelMoleculesMode = False 1501 MPLevelTorsionAnglesMode = False 1502 MPLevel = Options["--mpLevel"] 1503 if re.match("^Molecules$", MPLevel, re.I): 1504 MPLevelMoleculesMode = True 1505 elif re.match("^TorsionAngles$", MPLevel, re.I): 1506 MPLevelTorsionAnglesMode = True 1507 else: 1508 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel) 1509 OptionsInfo["MPLevel"] = MPLevel 1510 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode 1511 OptionsInfo["MPLevelTorsionAnglesMode"] = MPLevelTorsionAnglesMode 1512 1513 OptionsInfo["Precision"] = int(Options["--precision"]) 1514 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 1515 1516 # Procsss and validate specified SMILES/SMARTS torsion patterns... 1517 TorsionPatterns = Options["--torsions"] 1518 TorsionPatternsList = [] 1519 for TorsionPattern in TorsionPatterns.split(","): 1520 TorsionPattern = TorsionPattern.strip() 1521 if not len(TorsionPattern): 1522 MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in \"-t, --torsions\" option: %s" % TorsionPatterns) 1523 1524 TorsionMol = Chem.MolFromSmarts(TorsionPattern) 1525 if TorsionMol is None: 1526 MiscUtil.PrintError("Failed to create torsion pattern molecule. The torsion SMILES/SMARTS pattern, \"%s\", specified using \"-t, --torsions\" option, \"%s\", is not valid." % (TorsionPattern, TorsionPatterns)) 1527 TorsionPatternsList.append(TorsionPattern) 1528 1529 OptionsInfo["TorsionPatterns"] = TorsionPatterns 1530 OptionsInfo["TorsionPatternsList"] = TorsionPatternsList 1531 1532 # Process and validate any specified torsion atom indices for filtering torsion matches... 1533 TorsionsFilterByAtomIndices = Options["--torsionsFilterbyAtomIndices"] 1534 TorsionsFilterByAtomIndicesList = [] 1535 if not re.match("^None$", TorsionsFilterByAtomIndices, re.I): 1536 for AtomIndex in TorsionsFilterByAtomIndices.split(","): 1537 AtomIndex = AtomIndex.strip() 1538 if not MiscUtil.IsInteger(AtomIndex): 1539 MiscUtil.PrintError("The value specified, %s, for option \"--torsionsFilterbyAtomIndices\" must be an integer." % AtomIndex) 1540 AtomIndex = int(AtomIndex) 1541 if AtomIndex < 0: 1542 MiscUtil.PrintError("The value specified, %s, for option \"--torsionsFilterbyAtomIndices\" must be >= 0." % AtomIdex) 1543 TorsionsFilterByAtomIndicesList.append(AtomIndex) 1544 1545 if len(TorsionsFilterByAtomIndicesList) < 4: 1546 MiscUtil.PrintError("The number of values, %s, specified, %s, for option \"--torsionsFilterbyAtomIndices\" must be >=4." % (len(TorsionsFilterByAtomIndicesList), TorsionsFilterByAtomIndices)) 1547 1548 OptionsInfo["TorsionsFilterByAtomIndices"] = TorsionsFilterByAtomIndices 1549 OptionsInfo["TorsionsFilterByAtomIndicesList"] = TorsionsFilterByAtomIndicesList 1550 OptionsInfo["FilterTorsionsByAtomIndicesMode"] = True if len(TorsionsFilterByAtomIndicesList) > 0 else False 1551 1552 OptionsInfo["TorsionMaxMatches"] = int(Options["--torsionMaxMatches"]) 1553 OptionsInfo["TorsionMinimize"] = True if re.match("^yes$", Options["--torsionMinimize"], re.I) else False 1554 1555 ProcessTorsionRangeOptions() 1556 1557 TorsionRange = Options["--torsionRange"] 1558 TorsionRangeWords = TorsionRange.split(",") 1559 1560 OptionsInfo["UseChirality"] = True if re.match("^yes$", Options["--useChirality"], re.I) else False 1561 1562 def RetrieveOptions(): 1563 """Retrieve command line arguments and options.""" 1564 1565 # Get options... 1566 global Options 1567 Options = docopt(_docoptUsage_) 1568 1569 # Set current working directory to the specified directory... 1570 WorkingDir = Options["--workingdir"] 1571 if WorkingDir: 1572 os.chdir(WorkingDir) 1573 1574 # Handle examples option... 1575 if "--examples" in Options and Options["--examples"]: 1576 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1577 sys.exit(0) 1578 1579 def ValidateOptions(): 1580 """Validate option values.""" 1581 1582 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 1583 1584 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1585 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 1586 MiscUtil.ValidateOptionTextValue("--infile3D", Options["--infile3D"], "yes no") 1587 1588 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1589 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1590 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1591 1592 if not Options["--overwrite"]: 1593 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1594 FileNames = glob.glob("%s_*" % FileName) 1595 if len(FileNames): 1596 MiscUtil.PrintError("The outfile names, %s_*, generated from file specified, %s, for option \"-o, --outfile\" already exist. Use option \"--overwrite\" or \"--ov\" and try again.\n" % (FileName, Options["--outfile"])) 1597 1598 MiscUtil.ValidateOptionTextValue("--outPlotRelativeEnergy", Options["--outPlotRelativeEnergy"], "yes no") 1599 MiscUtil.ValidateOptionTextValue("--outPlotTitleTorsionSpec", Options["--outPlotTitleTorsionSpec"], "yes no") 1600 1601 MiscUtil.ValidateOptionTextValue("--outfileMolName ", Options["--outfileMolName"], "yes no") 1602 1603 MiscUtil.ValidateOptionTextValue("--modeMols", Options["--modeMols"], "First All") 1604 MiscUtil.ValidateOptionTextValue("--modeTorsions", Options["--modeTorsions"], "First All") 1605 1606 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0}) 1607 1608 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1609 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules TorsionAngles") 1610 1611 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 1612 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 1613 1614 MiscUtil.ValidateOptionIntegerValue("--torsionMaxMatches", Options["--torsionMaxMatches"], {">": 0}) 1615 MiscUtil.ValidateOptionTextValue("--torsionMinimize", Options["--torsionMinimize"], "yes no") 1616 1617 MiscUtil.ValidateOptionTextValue("--torsionRangeMode", Options["--torsionRangeMode"], "Range or Angles") 1618 TorsionRange = Options["--torsionRange"] 1619 if re.match("^Range$", Options["--torsionRangeMode"], re.I): 1620 if not re.match("^auto$", TorsionRange, re.I): 1621 MiscUtil.ValidateOptionNumberValues("--torsionRange", TorsionRange, 3, ",", "integer", {}) 1622 else: 1623 if re.match("^auto$", TorsionRange, re.I): 1624 MiscUtil.PrintError("The value, %s, specified for option \"-torsionRange\" is not valid for \"%s\" value of \"--torsionRangeMode\" option. You must specify a torsion angle or a comma delimited list of torsion angles." % (TorsionRange, Options["--torsionRangeMode"])) 1625 TorsionAngles = [] 1626 for TorsionAngle in TorsionRange.split(","): 1627 TorsionAngle = TorsionAngle.strip() 1628 if not MiscUtil.IsInteger(TorsionAngle): 1629 MiscUtil.PrintError("The value specified, %s, for option \"--torsionRange\" in string \"%s\" must be an integer." % (TorsionAngle, TorsionRange)) 1630 if TorsionAngle in TorsionAngles: 1631 MiscUtil.PrintError("The value specified, %s, for option \"--torsionRange\" in string \"%s\" is a duplicate value." % (TorsionAngle, TorsionRange)) 1632 TorsionAngles.append(TorsionAngle) 1633 1634 MiscUtil.ValidateOptionTextValue("--useChirality", Options["--useChirality"], "yes no") 1635 1636 # Setup a usage string for docopt... 1637 _docoptUsage_ = """ 1638 Psi4PerformTorsionScan.py - Perform torsion scan 1639 1640 Usage: 1641 Psi4PerformTorsionScan.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyDataFieldLabel <text>] 1642 [--energyRelativeDataFieldLabel <text>] [--energyUnits <text>] [--infile3D <yes or no>] 1643 [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>] 1644 [--modeMols <First or All>] [--modeTorsions <First or All>] [--mp <yes or no>] 1645 [--mpLevel <Molecules or TorsionAngles>] [--mpParams <Name,Value,...>] 1646 [--outfileMolName <yes or no>] [--outfileParams <Name,Value,...>] [--outPlotParams <Name,Value,...>] 1647 [--outPlotRelativeEnergy <yes or no>] [--outPlotTitleTorsionSpec <yes or no>] [--overwrite] 1648 [--precision <number>] [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 1649 [--quiet <yes or no>] [--reference <text>] [--torsionsFilterbyAtomIndices <Index1, Index2, ...>] 1650 [--torsionMaxMatches <number>] [--torsionMinimize <yes or no>] [--torsionRangeMode <Range or Angles>] 1651 [--torsionRange <Start,Stop,Step or Angle1,Angle2,...>] [--useChirality <yes or no>] 1652 [-w <dir>] -t <torsions> -i <infile> -o <outfile> 1653 Psi4PerformTorsionScan.py -h | --help | -e | --examples 1654 1655 Description: 1656 Perform torsion scan for molecules around torsion angles specified using 1657 SMILES/SMARTS patterns. A molecule is optionally minimized before performing 1658 a torsion scan using a forcefield. A set of initial 3D structures are generated for 1659 a molecule by scanning the torsion angle across the specified range and updating 1660 the 3D coordinates of the molecule. A conformation ensemble is optionally generated 1661 for each 3D structure representing a specific torsion angle using a combination of 1662 distance geometry and forcefield followed by constrained geometry optimization 1663 using a quantum chemistry method. The conformation with the lowest energy is 1664 selected to represent the torsion angle. An option is available to skip the generation 1665 of the conformation ensemble and simply calculate the energy for the initial 3D 1666 structure for a specific torsion torsion angle using a quantum chemistry method. 1667 1668 The torsions are specified using SMILES or SMARTS patterns. A substructure match 1669 is performed to select torsion atoms in a molecule. The SMILES pattern match must 1670 correspond to four torsion atoms. The SMARTS patterns containing atom map numbers 1671 may match more than four atoms. The atom map numbers, however, must match 1672 exactly four torsion atoms. For example: [s:1][c:2]([aX2,cH1])!@[CX3:3](O)=[O:4] for 1673 thiophene esters and carboxylates as specified in Torsion Library (TorLib) [Ref 146]. 1674 1675 A Psi4 XYZ format geometry string is automatically generated for each molecule 1676 in input file. It contains atom symbols and 3D coordinates for each atom in a 1677 molecule. In addition, the formal charge and spin multiplicity are present in the 1678 the geometry string. These values are either retrieved from molecule properties 1679 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 1680 molecule. 1681 1682 A set of four output files is generated for each torsion match in each 1683 molecule. The names of the output files are generated using the root of 1684 the specified output file. They may either contain sequential molecule 1685 numbers or molecule names as shown below: 1686 1687 <OutfileRoot>_Mol<Num>.sdf 1688 <OutfileRoot>_Mol<Num>_Torsion<Num>_Match<Num>.sdf 1689 <OutfileRoot>_Mol<Num>_Torsion<Num>_Match<Num>_Energies.csv 1690 <OutfileRoot>_Mol<Num>_Torsion<Num>_Match<Num>_Plot.<ImgExt> 1691 1692 or 1693 1694 <OutfileRoot>_<MolName>.sdf 1695 <OutfileRoot>_<MolName>_Torsion<Num>_Match<Num>.sdf 1696 <OutfileRoot>_<MolName>_Torsion<Num>_Match<Num>_Energies.csv 1697 <OutfileRoot>_<MolName>_Torsion<Num>_Match<Num>_Plot.<ImgExt> 1698 1699 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 1700 .csv, .tsv, .txt) 1701 1702 The supported output file formats are: SD (.sdf, .sd) 1703 1704 Options: 1705 -b, --basisSet <text> [default: auto] 1706 Basis set to use for energy calculation or constrained energy minimization. 1707 Default: 6-31+G** for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. 1708 The specified value must be a valid Psi4 basis set. No validation is performed. 1709 1710 The following list shows a representative sample of basis sets available 1711 in Psi4: 1712 1713 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 1714 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 1715 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 1716 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 1717 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 1718 1719 --confParams <Name,Value,...> [default: auto] 1720 A comma delimited list of parameter name and value pairs for generating 1721 initial 3D coordinates for molecules in input file at specific torsion angles. A 1722 conformation ensemble is optionally generated for each 3D structure 1723 representing a specific torsion angle using a combination of distance geometry 1724 and forcefield followed by constrained geometry optimization using a quantum 1725 chemistry method. The conformation with the lowest energy is selected to 1726 represent the torsion angle. 1727 1728 The supported parameter names along with their default values are shown 1729 below: 1730 1731 confMethod,ETKDGv2, 1732 forceField,MMFF, forceFieldMMFFVariant,MMFF94, 1733 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,250, 1734 maxConfsTorsions,50,useTethers,yes 1735 1736 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG, 1737 ETKDG , or ETKDGv2] 1738 forceField, MMFF [ Possible values: UFF or MMFF ] 1739 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ] 1740 enforceChirality,yes [ Possible values: yes or no ] 1741 useTethers,yes [ Possible values: yes or no ] 1742 1743 confMethod: Conformation generation methodology for generating initial 3D 1744 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental 1745 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms 1746 with Distance Geometry (KDG) and Experimental Torsion-angle preference 1747 along with basic Knowledge-terms and Distance Geometry (ETKDG or 1748 ETKDGv2) [Ref 129, 167] . 1749 1750 forceField: Forcefield method to use for energy minimization. Possible values: 1751 Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular Mechanics Force 1752 Field [ Ref 83-87 ] . 1753 1754 enforceChirality: Enforce chirality for defined chiral centers during 1755 forcefield minimization. 1756 1757 maxConfs: Maximum number of conformations to generate for each molecule 1758 during the generation of an initial 3D conformation ensemble using a conformation 1759 generation methodology. The conformations are minimized using the specified 1760 forcefield. The lowest energy structure is selected for performing the torsion scan. 1761 1762 maxConfsTorsion: Maximum number of 3D conformations to generate for 1763 conformation ensemble representing a specific torsion. The conformations are 1764 constrained at specific torsions angles and minimized using the specified forcefield 1765 and a quantum chemistry method. The lowest energy conformation is selected to 1766 calculate final torsion energy and written to the output file. 1767 1768 embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded 1769 using distance geometry and forcefield minimization. All embedded conformers 1770 are kept for 'None' value. Otherwise, only those conformers which are different 1771 from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first 1772 embedded conformer is always retained. 1773 1774 useTethers: Use tethers to optimize the final embedded conformation by 1775 applying a series of extra forces to align matching atoms to the positions of 1776 the core atoms. Otherwise, use simple distance constraints during the 1777 optimization. 1778 --energyDataFieldLabel <text> [default: auto] 1779 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 1780 --energyRelativeDataFieldLabel <text> [default: auto] 1781 Relative energy data field label for writing energy values. Default: 1782 Psi4_Relative_Energy (<Units>). 1783 --energyUnits <text> [default: kcal/mol] 1784 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 1785 -e, --examples 1786 Print examples. 1787 -h, --help 1788 Print this help message. 1789 -i, --infile <infile> 1790 Input file name. 1791 --infile3D <yes or no> [default: no] 1792 Skip generation and minimization of initial 3D structures for molecules in 1793 input file containing 3D coordinates. 1794 --infileParams <Name,Value,...> [default: auto] 1795 A comma delimited list of parameter name and value pairs for reading 1796 molecules from files. The supported parameter names for different file 1797 formats, along with their default values, are shown below: 1798 1799 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1800 1801 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 1802 smilesTitleLine,auto,sanitize,yes 1803 1804 Possible values for smilesDelimiter: space, comma or tab. 1805 --maxIters <number> [default: 50] 1806 Maximum number of iterations to perform for each molecule or conformer 1807 during constrained energy minimization by a quantum chemistry method. 1808 -m, --methodName <text> [default: auto] 1809 Method to use for energy calculation or constrained energy minimization. 1810 Default: B3LYP [ Ref 150 ]. The specified value must be a valid Psi4 method 1811 name. No validation is performed. 1812 1813 The following list shows a representative sample of methods available 1814 in Psi4: 1815 1816 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 1817 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 1818 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 1819 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 1820 1821 --modeMols <First or All> [default: First] 1822 Perform torsion scan for the first molecule or all molecules in input 1823 file. 1824 --modeTorsions <First or All> [default: First] 1825 Perform torsion scan for the first or all specified torsion pattern in 1826 molecules up to a maximum number of matches for each torsion 1827 specification as indicated by '--torsionMaxMatches' option. 1828 --mp <yes or no> [default: no] 1829 Use multiprocessing. 1830 1831 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1832 function employing lazy RDKit data iterable. This allows processing of 1833 arbitrary large data sets without any additional requirements memory. 1834 1835 All input data may be optionally loaded into memory by mp.Pool.map() 1836 before starting worker processes in a process pool by setting the value 1837 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1838 1839 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1840 data mode may adversely impact the performance. The '--mpParams' section 1841 provides additional information to tune the value of 'chunkSize'. 1842 --mpLevel <Molecules or TorsionAngles> [default: Molecules] 1843 Perform multiprocessing at molecules or torsion angles level. Possible values: 1844 Molecules or TorsionAngles. The 'Molecules' value starts a process pool at the 1845 molecules level. All torsion angles of a molecule are processed in a single 1846 process. The 'TorsionAngles' value, however, starts a process pool at the 1847 torsion angles level. Each torsion angle in a torsion match for a molecule is 1848 processed in an individual process in the process pool. 1849 --mpParams <Name,Value,...> [default: auto] 1850 A comma delimited list of parameter name and value pairs to configure 1851 multiprocessing. 1852 1853 The supported parameter names along with their default and possible 1854 values are shown below: 1855 1856 chunkSize, auto 1857 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 1858 numProcesses, auto [ Default: mp.cpu_count() ] 1859 1860 These parameters are used by the following functions to configure and 1861 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 1862 mp.Pool.imap(). 1863 1864 The chunkSize determines chunks of input data passed to each worker 1865 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 1866 The default value of chunkSize is dependent on the value of 'inputDataMode'. 1867 1868 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 1869 automatically converts RDKit data iterable into a list, loads all data into 1870 memory, and calculates the default chunkSize using the following method 1871 as shown in its code: 1872 1873 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 1874 if extra: chunkSize += 1 1875 1876 For example, the default chunkSize will be 7 for a pool of 4 worker processes 1877 and 100 data items. 1878 1879 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 1880 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 1881 data into memory. Consequently, the size of input data is not known a priori. 1882 It's not possible to estimate an optimal value for the chunkSize. The default 1883 chunkSize is set to 1. 1884 1885 The default value for the chunkSize during 'Lazy' data mode may adversely 1886 impact the performance due to the overhead associated with exchanging 1887 small chunks of data. It is generally a good idea to explicitly set chunkSize to 1888 a larger value during 'Lazy' input data mode, based on the size of your input 1889 data and number of processes in the process pool. 1890 1891 The mp.Pool.map() function waits for all worker processes to process all 1892 the data and return the results. The mp.Pool.imap() function, however, 1893 returns the the results obtained from worker processes as soon as the 1894 results become available for specified chunks of data. 1895 1896 The order of data in the results returned by both mp.Pool.map() and 1897 mp.Pool.imap() functions always corresponds to the input data. 1898 -o, --outfile <outfile> 1899 Output file name. The output file root is used for generating the names 1900 of the output files corresponding to structures, energies, and plots during 1901 the torsion scan. 1902 --outfileMolName <yes or no> [default: no] 1903 Append molecule name to output file root during the generation of the names 1904 for output files. The default is to use <MolNum>. The non alphabetical 1905 characters in molecule names are replaced by underscores. 1906 --outfileParams <Name,Value,...> [default: auto] 1907 A comma delimited list of parameter name and value pairs for writing 1908 molecules to files. The supported parameter names for different file 1909 formats, along with their default values, are shown below: 1910 1911 SD: kekulize,yes,forceV3000,no 1912 1913 --outPlotParams <Name,Value,...> [default: auto] 1914 A comma delimited list of parameter name and value pairs for generating 1915 plots using Seaborn module. The supported parameter names along with their 1916 default values are shown below: 1917 1918 type,linepoint,outExt,svg,width,10,height,5.6, 1919 title,auto,xlabel,auto,ylabel,auto,titleWeight,bold,labelWeight,bold 1920 style,darkgrid,palette,deep,font,sans-serif,fontScale,1, 1921 context,notebook 1922 1923 Possible values: 1924 1925 type: linepoint, scatter, or line. Both points and lines are drawn 1926 for linepoint plot type. 1927 outExt: Any valid format supported by Python module Matplotlib. 1928 For example: PDF (.pdf), PNG (.png), PS (.ps), SVG (.svg) 1929 titleWeight, labelWeight: Font weight for title and axes labels. 1930 Any valid value. 1931 style: darkgrid, whitegrid, dark, white, ticks 1932 palette: deep, muted, pastel, dark, bright, colorblind 1933 font: Any valid font name 1934 context: paper, notebook, talk, poster, or any valid name 1935 1936 --outPlotRelativeEnergy <yes or no> [default: yes] 1937 Plot relative energies in the torsion plot. The minimum energy value is 1938 subtracted from energy values to calculate relative energies. 1939 --outPlotTitleTorsionSpec <yes or no> [default: yes] 1940 Append torsion specification to the title of the torsion plot. 1941 --overwrite 1942 Overwrite existing files. 1943 --precision <number> [default: 6] 1944 Floating point precision for writing energy values. 1945 --psi4OptionsParams <Name,Value,...> [default: none] 1946 A comma delimited list of Psi4 option name and value pairs for setting 1947 global and module options. The names are 'option_name' for global options 1948 and 'module_name__option_name' for options local to a module. The 1949 specified option names must be valid Psi4 names. No validation is 1950 performed. 1951 1952 The specified option name and value pairs are processed and passed to 1953 psi4.set_options() as a dictionary. The supported value types are float, 1954 integer, boolean, or string. The float value string is converted into a float. 1955 The valid values for a boolean string are yes, no, true, false, on, or off. 1956 --psi4RunParams <Name,Value,...> [default: auto] 1957 A comma delimited list of parameter name and value pairs for configuring 1958 Psi4 jobs. 1959 1960 The supported parameter names along with their default and possible 1961 values are shown below: 1962 1963 MemoryInGB, 1 1964 NumThreads, 1 1965 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 1966 ScratchDir, auto [ Possivle values: DirName] 1967 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 1968 1969 These parameters control the runtime behavior of Psi4. 1970 1971 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 1972 is appended to output file name during multiprocessing as shown: 1973 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 1974 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 1975 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 1976 'Conformers' of '--mpLevel' option. 1977 1978 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 1979 of any existing Psi4 before creating new files to append output from 1980 multiple Psi4 runs. 1981 1982 The option 'ScratchDir' is a directory path to the location of scratch 1983 files. The default value corresponds to Psi4 default. It may be used to 1984 override the deafult path. 1985 -q, --quiet <yes or no> [default: no] 1986 Use quiet mode. The warning and information messages will not be printed. 1987 --reference <text> [default: auto] 1988 Reference wave function to use for energy calculation or constrained energy 1989 minimization. Default: RHF or UHF. The default values are Restricted Hartree-Fock 1990 (RHF) for closed-shell molecules with all electrons paired and Unrestricted 1991 Hartree-Fock (UHF) for open-shell molecules with unpaired electrons. 1992 1993 The specified value must be a valid Psi4 reference wave function. No validation 1994 is performed. For example: ROHF, CUHF, RKS, etc. 1995 1996 The spin multiplicity determines the default value of reference wave function 1997 for input molecules. It is calculated from number of free radical electrons using 1998 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 1999 electron spin. The total spin is 1/2 the number of free radical electrons in a 2000 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 2001 over the calculated value of spin multiplicity. 2002 -t, --torsions <SMILES/SMARTS,...,...> 2003 SMILES/SMARTS patterns corresponding to torsion specifications. It's a 2004 comma delimited list of valid SMILES/SMART patterns. 2005 2006 A substructure match is performed to select torsion atoms in a molecule. 2007 The SMILES pattern match must correspond to four torsion atoms. The 2008 SMARTS patterns containing atom map numbers may match more than four 2009 atoms. The atom map numbers, however, must match exactly four torsion 2010 atoms. For example: [s:1][c:2]([aX2,cH1])!@[CX3:3](O)=[O:4] for thiophene 2011 esters and carboxylates as specified in Torsion Library (TorLib) [Ref 146]. 2012 --torsionsFilterbyAtomIndices <Index1, Index2, ...> [default: none] 2013 Comma delimited list of atom indices for filtering torsion matches 2014 corresponding to torsion specifications "-t, --torsions". The atom indices 2015 must be valid. No explicit validation is performed. The list must contain at 2016 least 4 atom indices. 2017 2018 The torsion atom indices, matched by "-t, --torsions" specifications, must be 2019 present in the list. Otherwise, the torsion matches are ignored. 2020 --torsionMaxMatches <number> [default: 5] 2021 Maximum number of torsions to match for each torsion specification in a 2022 molecule. 2023 --torsionMinimize <yes or no> [default: no] 2024 Perform constrained energy minimization on a conformation ensemble 2025 for a specific torsion angle and select the lowest energy conformation 2026 representing the torsion angle. A conformation ensemble is generated for 2027 each 3D structure representing a specific torsion angle using a combination 2028 of distance geometry and forcefield followed by constrained geometry 2029 optimization using a quantum chemistry method. 2030 --torsionRangeMode <Range or Angles> [default: Range] 2031 Perform torsion scan using torsion angles corresponding to a torsion range 2032 or an explicit list of torsion angles. Possible values: Range or Angles. You 2033 may use '--torsionRange' option to specify values for torsion angle or 2034 torsion angles. 2035 --torsionRange <Start,Stop,Step or Angle1,Angle2...> [default: auto] 2036 Start, stop, and step size angles or a comma delimited list of angles in 2037 degrees for a torsion scan. 2038 2039 This value is '--torsionRangeMode' specific. It must be a triplet corresponding 2040 to 'start,Stop,Step' for 'Range' value of '--torsionRange' option. Otherwise, it 2041 is comma delimited list of one or more torsion angles for 'Angles' value of 2042 '--torsionRange' option. 2043 2044 The default values, based on '--torsionRangeMode' option, are shown below: 2045 2046 TorsionRangeMode Default value 2047 Range 0,360,5 2048 Angles None 2049 2050 You must explicitly provide a list of torsion angle(s) for 'Angles' of 2051 '--torsionRangeMode' option. 2052 --useChirality <yes or no> [default: no] 2053 Use chirrality during substructure matches for identification of torsions. 2054 -w, --workingdir <dir> 2055 Location of working directory which defaults to the current directory. 2056 2057 Examples: 2058 To perform a torsion scan on the first molecule in a SMILES file using a minimum 2059 energy structure of the molecule selected from an initial ensemble of conformations 2060 generated using distance geometry and forcefield, skip generation of conformation 2061 ensembles for specific torsion angles and constrained energy minimization of the 2062 ensemble, calculating single point at a specific torsion angle energy using B3LYP/6-31G** 2063 and B3LYP/6-31+G** for non-sulfur and sulfur containing molecules, generate output files 2064 corresponding to structure, energy and torsion plot, type: 2065 2066 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2067 -o SampleOut.sdf 2068 2069 To run the previous example for performing a torsion scan using a specific list 2070 of torsion angles, type: 2071 2072 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2073 -o SampleOut.sdf --torsionRangeMode Angles 2074 --torsionRange "0,180,360" 2075 2076 To run the previous example on the first molecule in a SD file containing 3D 2077 coordinates and skip the generations of initial 3D structure, type: 2078 2079 % Psi4PerformTorsionScan.py -t "CCCC" --infile3D yes 2080 -i Psi4SampleTorsionScan3D.sdf -o SampleOut.sdf 2081 2082 To run the first example on all molecules in a SD file, type: 2083 2084 % Psi4PerformTorsionScan.py -t "CCCC" --modeMols All 2085 -i Psi4SampleTorsionScan.sdf -o SampleOut.sdf 2086 2087 To run the first example on all molecules in a SD file containing 3D 2088 coordinates and skip the generation of initial 3D structures, type: 2089 2090 % Psi4PerformTorsionScan.py -t "CCCC" --infile3D yes 2091 --modeMols All -i Psi4SampleTorsionScan3D.sdf -o SampleOut.sdf 2092 2093 To perform a torsion scan on the first molecule in a SMILES file using a minimum 2094 energy structure of the molecule selected from an initial ensemble of conformations 2095 generated using distance geometry and forcefield, generate up to 50 conformations 2096 for specific torsion angles using ETKDGv2 methodology followed by initial MMFF 2097 forcefield minimization and final energy minimization using B3LYP/6-31G** and 2098 B3LYP/6-31+G** for non-sulfur and sulfur containing molecules, generate output files 2099 corresponding to minimum energy structure, energy and torsion plot, type: 2100 2101 % Psi4PerformTorsionScan.py -t "CCCC" --torsionMinimize Yes 2102 -i Psi4SampleTorsionScan.smi -o SampleOut.sdf 2103 2104 To run the previous example on all molecules in a SD file, type: 2105 2106 % Psi4PerformTorsionScan.py -t "CCCC" --modeMols All 2107 --torsionMinimize Yes -i Psi4SampleTorsionScan.sdf -o SampleOut.sdf 2108 2109 To run the previous example on all molecules in a SD file containing 3D 2110 coordinates and skip the generation of initial 3D structures, type: 2111 2112 % Psi4PerformTorsionScan.py -t "CCCC" --modeMols All 2113 --infile3D yes --modeMols All --torsionMinimize Yes 2114 -i Psi4SampleTorsionScan.sdf -o SampleOut.sdf 2115 2116 To run the previous example in multiprocessing mode at molecules level 2117 on all available CPUs without loading all data into memory and write out 2118 a SD file, type: 2119 2120 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2121 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2122 2123 To run the previous example in multiprocessing mode at torsion angles level 2124 on all available CPUs without loading all data into memory and write out 2125 a SD file, type: 2126 2127 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2128 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2129 --mpLevel TorsionAngles 2130 2131 To run the previous example in multiprocessing mode on all available CPUs 2132 by loading all data into memory and write out a SD file, type: 2133 2134 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2135 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2136 --mpParams "inputDataMode,InMemory" 2137 2138 To run the previous example in multiprocessing mode on specific number of 2139 CPUs and chunk size without loading all data into memory and write out a SD file, 2140 type: 2141 2142 % Psi4PerformTorsionScan.py -t "CCCC" -i Psi4SampleTorsionScan.smi 2143 -o SampleOut.sdf --modeMols All --torsionMinimize Yes --mp yes 2144 --mpParams "inputDataMode,Lazy,numProcesses,4,chunkSize,8" 2145 2146 Author: 2147 Manish Sud(msud@san.rr.com) 2148 2149 See also: 2150 Psi4CalculateEnergy.py, Psi4GenerateConformers.py, 2151 Psi4GenerateConstrainedConformers.py, Psi4PerformConstrainedMinimization.py 2152 2153 Copyright: 2154 Copyright (C) 2024 Manish Sud. All rights reserved. 2155 2156 The functionality available in this script is implemented using RDKit, an 2157 open source toolkit for cheminformatics developed by Greg Landrum. 2158 2159 This file is part of MayaChemTools. 2160 2161 MayaChemTools is free software; you can redistribute it and/or modify it under 2162 the terms of the GNU Lesser General Public License as published by the Free 2163 Software Foundation; either version 3 of the License, or (at your option) any 2164 later version. 2165 2166 """ 2167 2168 if __name__ == "__main__": 2169 main()