1 #!/bin/env python 2 # 3 # File: Psi4GenerateConstrainedConformers.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 import shutil 38 import multiprocessing as mp 39 40 # Psi4 imports... 41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 42 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 43 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 44 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 45 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 46 sys.stderr.write("Psi4 environment and try again.\n\n") 47 48 # RDKit imports... 49 try: 50 from rdkit import rdBase 51 from rdkit import Chem 52 from rdkit.Chem import AllChem, rdMolAlign 53 from rdkit.Chem import rdFMCS 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 57 sys.exit(1) 58 59 # MayaChemTools imports... 60 try: 61 from docopt import docopt 62 import MiscUtil 63 import Psi4Util 64 import RDKitUtil 65 except ImportError as ErrMsg: 66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 68 sys.exit(1) 69 70 ScriptName = os.path.basename(sys.argv[0]) 71 Options = {} 72 OptionsInfo = {} 73 74 def main(): 75 """Start execution of the script.""" 76 77 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 78 79 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 80 81 # Retrieve command line arguments and options... 82 RetrieveOptions() 83 84 # Process and validate command line arguments and options... 85 ProcessOptions() 86 87 # Perform actions required by the script... 88 GenerateConstrainedConformers() 89 90 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 91 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 92 93 def GenerateConstrainedConformers(): 94 """Generate constrained conformers.""" 95 96 # Read and validate reference molecule... 97 RefMol = RetrieveReferenceMolecule() 98 99 # Setup a molecule reader for input file... 100 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 101 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 102 103 # Set up a molecule writer... 104 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 105 if Writer is None: 106 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 107 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 108 109 MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount = ProcessMolecules(RefMol, Mols, Writer) 110 111 if Writer is not None: 112 Writer.close() 113 114 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 115 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 116 MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount) 117 MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % ConfGenFailedCount) 118 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + ConfGenFailedCount)) 119 120 def ProcessMolecules(RefMol, Mols, Writer): 121 """Process molecules to generate constrained conformers.""" 122 123 if OptionsInfo["MPMode"]: 124 return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer) 125 else: 126 return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer) 127 128 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer): 129 """Process molecules to generate constrained conformers using a single process.""" 130 131 # Intialize Psi4... 132 MiscUtil.PrintInfo("\nInitializing Psi4...") 133 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 134 OptionsInfo["psi4"] = Psi4Handle 135 136 # Setup max iterations global variable... 137 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 138 139 # Setup conversion factor for energy units... 140 SetupEnergyConversionFactor(Psi4Handle) 141 142 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4 143 144 MiscUtil.PrintInfo("\nGenerating constrained conformers and performing energy minimization...") 145 146 for Mol in Mols: 147 MolCount += 1 148 149 if not CheckAndValidateMolecule(Mol, MolCount): 150 continue 151 152 # Setup 2D coordinates for SMILES input file... 153 if OptionsInfo["SMILESInfileStatus"]: 154 AllChem.Compute2DCoords(Mol) 155 156 ValidMolCount += 1 157 158 # Setup a reference molecule core containing common scaffold atoms... 159 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount) 160 if RefMolCore is None: 161 CoreScaffoldMissingCount += 1 162 continue 163 164 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(Psi4Handle, Mol, RefMolCore, MolCount) 165 166 if not CalcStatus: 167 ConfGenFailedCount += 1 168 continue 169 170 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues) 171 172 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) 173 174 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer): 175 """Process molecules to generate constrained conformers using multiprocessing.""" 176 177 if OptionsInfo["MPLevelConformersMode"]: 178 return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer) 179 elif OptionsInfo["MPLevelMoleculesMode"]: 180 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer) 181 else: 182 MiscUtil.PrintError("The value, %s, option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"])) 183 184 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer): 185 """Process molecules to generate constrained conformers using multiprocessing at molecules level.""" 186 187 MiscUtil.PrintInfo("\nGenerating constrained conformers and performing energy minimization using multiprocessing at molecules level...") 188 189 MPParams = OptionsInfo["MPParams"] 190 191 # Setup data for initializing a worker process... 192 OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol) 193 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 194 195 # Setup a encoded mols data iterable for a worker process... 196 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 197 198 # Setup process pool along with data initialization for each process... 199 if not OptionsInfo["QuietMode"]: 200 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 201 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 202 203 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 204 205 # Start processing... 206 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 207 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 208 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 209 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 210 else: 211 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 212 213 # Print out Psi4 version in the main process... 214 MiscUtil.PrintInfo("\nInitializing Psi4...\n") 215 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False) 216 OptionsInfo["psi4"] = Psi4Handle 217 218 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4 219 220 for Result in Results: 221 MolCount += 1 222 MolIndex, EncodedMol, EncodedConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = Result 223 224 if EncodedMol is None: 225 continue 226 ValidMolCount += 1 227 228 if CoreScaffoldMissingStatus: 229 CoreScaffoldMissingCount += 1 230 continue 231 232 if not CalcStatus: 233 ConfGenFailedCount += 1 234 continue 235 236 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 237 ConfMols = [RDKitUtil.MolFromBase64EncodedMolString(EncodedConfMol) for EncodedConfMol in EncodedConfMols] 238 239 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues) 240 241 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) 242 243 def InitializeWorkerProcess(*EncodedArgs): 244 """Initialize data for a worker process.""" 245 246 global Options, OptionsInfo 247 248 if not OptionsInfo["QuietMode"]: 249 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 250 251 # Decode Options and OptionInfo... 252 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 253 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 254 255 # Decode RefMol... 256 OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"]) 257 258 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 259 # output files for each process... 260 OptionsInfo["Psi4Initialized"] = False 261 262 def WorkerProcess(EncodedMolInfo): 263 """Process data for a worker process.""" 264 265 if not OptionsInfo["Psi4Initialized"]: 266 InitializePsi4ForWorkerProcess() 267 268 MolIndex, EncodedMol = EncodedMolInfo 269 270 MolNum = MolIndex + 1 271 ConfMols = None 272 CoreScaffoldMissingStatus = False 273 CalcStatus = False 274 ConfIDs = None 275 ConfEnergyValues = None 276 ConfScaffoldEmbedRMSDValues = None 277 278 if EncodedMol is None: 279 return [MolIndex, None, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 280 281 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 282 if not CheckAndValidateMolecule(Mol, MolNum): 283 return [MolIndex, None, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 284 285 # Setup 2D coordinates for SMILES input file... 286 if OptionsInfo["SMILESInfileStatus"]: 287 AllChem.Compute2DCoords(Mol) 288 289 # Setup a reference molecule core containing common scaffold atoms... 290 RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum) 291 if RefMolCore is None: 292 CoreScaffoldMissingStatus = True 293 return [MolIndex, EncodedMol, ConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 294 295 # Generate conformers... 296 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = GenerateMolConformers(OptionsInfo["psi4"], Mol, RefMolCore, MolNum) 297 298 EncodedConfMols = None 299 if ConfMols is not None: 300 EncodedConfMols = [RDKitUtil.MolToBase64EncodedMolString(ConfMol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps) for ConfMol in ConfMols] 301 302 return [MolIndex, EncodedMol, EncodedConfMols, CoreScaffoldMissingStatus, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues] 303 304 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer): 305 """Process and minimize molecules using multiprocessing at conformers level.""" 306 307 MiscUtil.PrintInfo("\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level...") 308 309 (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) = [0] * 4 310 311 for Mol in Mols: 312 MolCount += 1 313 314 if not CheckAndValidateMolecule(Mol, MolCount): 315 continue 316 317 # Setup 2D coordinates for SMILES input file... 318 if OptionsInfo["SMILESInfileStatus"]: 319 AllChem.Compute2DCoords(Mol) 320 321 ValidMolCount += 1 322 323 # Setup a reference molecule core containing common scaffold atoms... 324 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount) 325 if RefMolCore is None: 326 CoreScaffoldMissingCount += 1 327 continue 328 329 ConfMols, CalcStatus, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues = ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount) 330 331 if not CalcStatus: 332 ConfGenFailedCount += 1 333 continue 334 335 WriteMolConformers(Writer, Mol, MolCount, ConfMols, ConfIDs, ConfEnergyValues, ConfScaffoldEmbedRMSDValues) 336 337 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, ConfGenFailedCount) 338 339 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum): 340 """Generate constrained conformers and minimize them using multiple processes.""" 341 342 # Add hydrogens... 343 Mol = Chem.AddHs(Mol, addCoords = True) 344 345 MolName = RDKitUtil.GetMolName(Mol, MolNum) 346 347 # Setup constrained conformers... 348 MolConfs, MolConfsStatus, MolConfIDs = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum) 349 if not MolConfsStatus: 350 if not OptionsInfo["QuietMode"]: 351 MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName) 352 return (None, False, None, None, None) 353 354 MPParams = OptionsInfo["MPParams"] 355 356 # Setup data for initializing a worker process... 357 OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore) 358 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 359 360 # Setup a encoded mols data iterable for a worker process... 361 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStringsWithIDs(MolConfs, MolConfIDs) 362 363 # Setup process pool along with data initialization for each process... 364 if not OptionsInfo["QuietMode"]: 365 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 366 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 367 368 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs) 369 370 # Start processing... 371 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 372 Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 373 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 374 Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 375 else: 376 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 377 378 MolConfIDs = [] 379 MolConfs = [] 380 CalcEnergyMap = {} 381 ScaffoldEmbedRMSDMap = {} 382 CalcFailedCount = 0 383 384 for Result in Results: 385 ConfID, EncodedMolConf, CalcStatus, Energy, ScaffoldEmbedRMSD = Result 386 387 if EncodedMolConf is None: 388 CalcFailedCount += 1 389 continue 390 391 if not CalcStatus: 392 CalcFailedCount += 1 393 continue 394 395 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf) 396 397 MolConfIDs.append(ConfID) 398 MolConfs.append(MolConf) 399 CalcEnergyMap[ConfID] = Energy 400 ScaffoldEmbedRMSDMap[ConfID] = ScaffoldEmbedRMSD 401 402 if CalcFailedCount: 403 return (None, False, None, None, None) 404 405 # Filter conformers... 406 SelectedMolConfs, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues = FilterMolConformers(Mol, MolNum, MolConfs, MolConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap) 407 408 return (SelectedMolConfs, True, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues) 409 410 def InitializeConformerWorkerProcess(*EncodedArgs): 411 """Initialize data for a conformer worker process.""" 412 413 global Options, OptionsInfo 414 415 if not OptionsInfo["QuietMode"]: 416 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 417 418 # Decode Options and OptionInfo... 419 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 420 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 421 422 # Decode RefMol... 423 OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"]) 424 425 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 426 # output files for each process... 427 OptionsInfo["Psi4Initialized"] = False 428 429 def ConformerWorkerProcess(EncodedMolInfo): 430 """Process data for a conformer worker process.""" 431 432 if not OptionsInfo["Psi4Initialized"]: 433 InitializePsi4ForWorkerProcess() 434 435 MolConfID, EncodedMolConf = EncodedMolInfo 436 437 MolConfNum = MolConfID 438 CalcStatus = False 439 Energy = None 440 ScaffoldEmbedRMSD = None 441 442 if EncodedMolConf is None: 443 return [MolConfNum, None, CalcStatus, Energy, ScaffoldEmbedRMSD] 444 445 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf) 446 MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum) 447 448 if not OptionsInfo["QuietMode"]: 449 MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum)) 450 451 # Perform Psi4 constrained minimization... 452 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum) 453 if not CalcStatus: 454 if not OptionsInfo["QuietMode"]: 455 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 456 return [MolConfNum, None, CalcStatus, Energy, ScaffoldEmbedRMSD] 457 458 if OptionsInfo["ScaffoldRMSDOut"]: 459 ScaffoldEmbedRMSD = "%.4f" % float(MolConf.GetProp('EmbedRMS')) 460 MolConf.ClearProp('EmbedRMS') 461 462 return [MolConfNum, RDKitUtil.MolToBase64EncodedMolString(MolConf, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, Energy, ScaffoldEmbedRMSD] 463 464 def InitializePsi4ForWorkerProcess(): 465 """Initialize Psi4 for a worker process.""" 466 467 if OptionsInfo["Psi4Initialized"]: 468 return 469 470 OptionsInfo["Psi4Initialized"] = True 471 472 if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I): 473 # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile... 474 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet" 475 else: 476 # Update output file... 477 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 478 479 # Intialize Psi4... 480 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 481 482 # Setup max iterations global variable... 483 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 484 485 # Setup conversion factor for energy units... 486 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 487 488 def RetrieveReferenceMolecule(): 489 """Retrieve and validate reference molecule.""" 490 491 RefFile = OptionsInfo["RefFile"] 492 493 MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) 494 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False 495 ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"]) 496 497 if ValidRefMolCount == 0: 498 MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile) 499 elif ValidRefMolCount > 1: 500 MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount)) 501 502 RefMol = ValidRefMols[0] 503 504 if OptionsInfo["UseScaffoldSMARTS"]: 505 ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 506 if ScaffoldPatternMol is None: 507 MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"])) 508 509 if not RefMol.HasSubstructMatch(ScaffoldPatternMol): 510 MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"])) 511 512 return RefMol 513 514 def SetupCoreScaffold(RefMol, Mol, MolCount): 515 """Setup a reference molecule core containing common scaffold atoms between 516 a pair of molecules.""" 517 518 if OptionsInfo["UseScaffoldMCS"]: 519 return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount) 520 elif OptionsInfo["UseScaffoldSMARTS"]: 521 return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount) 522 else: 523 MiscUtil.PrintError("The value, %s, specified for \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"])) 524 525 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount): 526 """Setup a reference molecule core containing common scaffold atoms between 527 a pair of molecules using MCS.""" 528 529 MCSParams = OptionsInfo["MCSParams"] 530 Mols = [RefMol, Mol] 531 532 MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 533 534 if MCSResultObject.canceled: 535 if not OptionsInfo["QuietMode"]: 536 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 537 return None 538 539 CoreNumAtoms = MCSResultObject.numAtoms 540 CoreNumBonds = MCSResultObject.numBonds 541 542 SMARTSCore = MCSResultObject.smartsString 543 544 if not len(SMARTSCore): 545 if not OptionsInfo["QuietMode"]: 546 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 547 return None 548 549 if CoreNumAtoms < MCSParams["MinNumAtoms"]: 550 if not OptionsInfo["QuietMode"]: 551 MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"])) 552 return None 553 554 if CoreNumBonds < MCSParams["MinNumBonds"]: 555 if not OptionsInfo["QuietMode"]: 556 MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"])) 557 return None 558 559 return GenerateCoreMol(RefMol, SMARTSCore) 560 561 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount): 562 """Setup a reference molecule core containing common scaffold atoms between 563 a pair of molecules using specified SMARTS.""" 564 565 if OptionsInfo["ScaffoldPatternMol"] is None: 566 OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 567 568 if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]): 569 if not OptionsInfo["QuietMode"]: 570 MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule, %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))) 571 return None 572 573 return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"]) 574 575 def GenerateCoreMol(RefMol, SMARTSCore): 576 """Generate core molecule for embedding.""" 577 578 # Create a molecule corresponding to core atoms... 579 SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore) 580 581 # Setup a ref molecule containing core atoms with dummy atoms as 582 # attachment points for atoms around the core atoms... 583 Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol) 584 585 # Delete any substructures containing dummy atoms.. 586 RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*')) 587 RefMolCore.UpdatePropertyCache() 588 589 return RefMolCore 590 591 def GenerateMolConformers(Psi4Handle, Mol, RefMolCore, MolNum = None): 592 """Generate constrained conformers.""" 593 594 # Add hydrogens... 595 Mol = Chem.AddHs(Mol, addCoords = True) 596 597 MolName = RDKitUtil.GetMolName(Mol, MolNum) 598 599 # Setup constrained conformers... 600 MolConfs, MolConfsStatus, MolConfIDs = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum) 601 if not MolConfsStatus: 602 if not OptionsInfo["QuietMode"]: 603 MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName) 604 return (None, False, None, None, None) 605 606 CalcEnergyMap = {} 607 ScaffoldEmbedRMSDMap = {} 608 609 for MolConfIndex, MolConf in enumerate(MolConfs): 610 MolConfID = MolConfIDs[MolConfIndex] 611 612 if not OptionsInfo["QuietMode"]: 613 MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, MolConfID)) 614 615 # Perform Psi4 minimization... 616 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum) 617 if not CalcStatus: 618 if not OptionsInfo["QuietMode"]: 619 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 620 return (None, False, None, None, None) 621 622 CalcEnergyMap[MolConfID] = Energy 623 624 if OptionsInfo["ScaffoldRMSDOut"]: 625 ScaffoldEmbedRMSDMap[MolConfID] = "%.4f" % float(MolConf.GetProp('EmbedRMS')) 626 MolConf.ClearProp('EmbedRMS') 627 628 # Filter conformers... 629 SelectedMolConfs, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues = FilterMolConformers(Mol, MolNum, MolConfs, MolConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap) 630 631 return (SelectedMolConfs, True, SelectedMolConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues) 632 633 def FilterMolConformers(Mol, MolNum, MolConfs, ConfIDs, CalcEnergyMap, ScaffoldEmbedRMSDMap): 634 """Filter conformers for a molecule.""" 635 636 # Sort conformers by energy... 637 SortedConfIDs = sorted(ConfIDs, key = lambda ConfID: CalcEnergyMap[ConfID]) 638 639 # Setup a map from ConfID to MolConf... 640 MolConfsMap = {} 641 for MolConfIndex, ConfID in enumerate(ConfIDs): 642 MolConfsMap[ConfID] = MolConfs[MolConfIndex] 643 644 MinEnergyConfID = SortedConfIDs[0] 645 MinConfEnergy = CalcEnergyMap[MinEnergyConfID] 646 MinEnergyMolConf = MolConfsMap[MinEnergyConfID] 647 648 EnergyWindow = OptionsInfo["EnergyWindow"] 649 650 EnergyRMSDCutoff = OptionsInfo["EnergyRMSDCutoff"] 651 ApplyEnergyRMSDCutoff = True if EnergyRMSDCutoff > 0 else False 652 653 EnergyRMSDCutoffLowest = OptionsInfo["EnergyRMSDCutoffModeLowest"] 654 EnergyRMSDCalcModeBest = OptionsInfo["EnergyRMSDCalcModeBest"] 655 656 SelectedConfIDs = [] 657 658 ConfCount = 0 659 IgnoredByEnergyConfCount = 0 660 IgnoredByRMSDConfCount = 0 661 662 FirstConf = True 663 664 for ConfID in SortedConfIDs: 665 if FirstConf: 666 FirstConf = False 667 ConfCount += 1 668 SelectedConfIDs.append(ConfID) 669 continue 670 671 ConfEnergyDiff = abs(CalcEnergyMap[ConfID] - MinConfEnergy) 672 if ConfEnergyDiff > EnergyWindow: 673 IgnoredByEnergyConfCount += 1 674 continue 675 676 if ApplyEnergyRMSDCutoff: 677 IgnoreConf = False 678 ProbeMolConf = Chem.Mol(MolConfsMap[ConfID]) 679 680 if EnergyRMSDCutoffLowest: 681 # Compare RMSD with the lowest energy conformation... 682 RefMolConf = Chem.Mol(MinEnergyMolConf) 683 if EnergyRMSDCalcModeBest: 684 CalcRMSD = AllChem.GetBestRMS(ProbeMolConf, RefMolConf) 685 else: 686 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf) 687 if CalcRMSD < EnergyRMSDCutoff: 688 IgnoreConf = True 689 else: 690 for SelectedConfID in SelectedConfIDs: 691 RefMolConf = Chem.Mol(MolConfsMap[SelectedConfID]) 692 if EnergyRMSDCalcModeBest: 693 CalcRMSD = AllChem.GetBestRMS(ProbeMolConf, RefMolConf) 694 else: 695 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf) 696 if CalcRMSD < EnergyRMSDCutoff: 697 IgnoreConf = True 698 break 699 if IgnoreConf: 700 IgnoredByRMSDConfCount += 1 701 continue 702 703 ConfCount += 1 704 SelectedConfIDs.append(ConfID) 705 706 if not OptionsInfo["QuietMode"]: 707 MiscUtil.PrintInfo("\nTotal Number of conformations generated for %s: %d" % (RDKitUtil.GetMolName(Mol, MolNum), ConfCount)) 708 MiscUtil.PrintInfo("Number of conformations ignored due to energy window cutoff: %d" % (IgnoredByEnergyConfCount)) 709 if ApplyEnergyRMSDCutoff: 710 MiscUtil.PrintInfo("Number of conformations ignored due to energy RMSD cutoff: %d" % (IgnoredByRMSDConfCount)) 711 712 # Setup selected conformer molecules... 713 SelectedConfMols = [MolConfsMap[ConfID] for ConfID in SelectedConfIDs] 714 715 # Setup energies... 716 SelectedConfEnergies = None 717 if OptionsInfo["EnergyOut"]: 718 SelectedConfEnergies = [] 719 for ConfID in SelectedConfIDs: 720 Energy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[ConfID]) 721 SelectedConfEnergies.append(Energy) 722 723 # Setup scaffold RMSD values... 724 SelectedConfScaffoldEmbedRMSDValues = None 725 if OptionsInfo["ScaffoldRMSDOut"]: 726 SelectedConfScaffoldEmbedRMSDValues = [] 727 for ConfID in SelectedConfIDs: 728 MolConf = MolConfsMap[ConfID] 729 SelectedConfScaffoldEmbedRMSDValues.append(ScaffoldEmbedRMSDMap[ConfID]) 730 731 return (SelectedConfMols, SelectedConfIDs, SelectedConfEnergies, SelectedConfScaffoldEmbedRMSDValues) 732 733 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID = -1): 734 """Constrain and Minimize molecule using Psi4.""" 735 736 # Setup a list for constrained atoms... 737 ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore) 738 if len(ConstrainedAtomIndices) == 0: 739 return (False, None) 740 741 # Setup a Psi4Mol... 742 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID) 743 if Psi4Mol is None: 744 return (False, None) 745 746 # Setup reference wave function... 747 Reference = SetupReferenceWavefunction(Mol) 748 Psi4Handle.set_options({'Reference': Reference}) 749 750 # Setup method name and basis set... 751 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 752 753 # Setup freeze list for constrained atoms... 754 FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices] 755 FreezeXYZString = " %s " % " ".join(FreezeXYZList) 756 Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString}) 757 758 # Optimize geometry... 759 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 760 761 if not Status: 762 PerformPsi4Cleanup(Psi4Handle) 763 return (False, None) 764 765 # Update atom positions... 766 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True) 767 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 768 769 # Convert energy units... 770 if OptionsInfo["ApplyEnergyConversionFactor"]: 771 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 772 773 # Clean up 774 PerformPsi4Cleanup(Psi4Handle) 775 776 return (True, Energy) 777 778 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum = None): 779 """Constrain, embed, and minimize molecule.""" 780 781 # Setup forcefield function to use for constrained minimization... 782 ForceFieldFunction = None 783 ForceFieldName = None 784 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 785 ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId) 786 ForeceFieldName = "UFF" 787 else: 788 ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) , confId = confId) 789 ForeceFieldName = "MMFF" 790 791 if ForceFieldFunction is None: 792 if not OptionsInfo["QuietMode"]: 793 MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))) 794 return (None, False, None) 795 796 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"] 797 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 798 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 799 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 800 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 801 UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"] 802 803 MolConfs = [] 804 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)] 805 806 for ConfID in ConfIDs: 807 try: 808 MolConf = Chem.Mol(Mol) 809 AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 810 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 811 if not OptionsInfo["QuietMode"]: 812 MolName = RDKitUtil.GetMolName(Mol, MolNum) 813 MiscUtil.PrintWarning("Constrained embedding couldn't be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 814 return (None, False, None) 815 816 MolConfs.append(MolConf) 817 818 return FilterConstrainedConformationsByRMSD(Mol, MolConfs, ConfIDs, MolNum) 819 820 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolConfIDs, MolNum = None): 821 """Filter contarained conformations by RMSD.""" 822 823 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 824 if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0: 825 if not OptionsInfo["QuietMode"]: 826 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))) 827 return (MolConfs, True, MolConfIDs) 828 829 FirstMolConf = True 830 SelectedMolConfs = [] 831 SelectedMolConfIDs = [] 832 for MolConfIndex, MolConf in enumerate(MolConfs): 833 if FirstMolConf: 834 FirstMolConf = False 835 SelectedMolConfs.append(MolConf) 836 SelectedMolConfIDs.append(MolConfIDs[MolConfIndex]) 837 continue 838 839 # Compare RMSD against all selected conformers... 840 IgnoreConf = False 841 ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf)) 842 for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs): 843 RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf)) 844 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf) 845 846 if CalcRMSD < EmbedRMSDCutoff: 847 IgnoreConf = True 848 break 849 850 if IgnoreConf: 851 continue 852 853 SelectedMolConfs.append(MolConf) 854 SelectedMolConfIDs.append(MolConfIDs[MolConfIndex]) 855 856 if not OptionsInfo["QuietMode"]: 857 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))) 858 859 return (SelectedMolConfs, True, SelectedMolConfIDs) 860 861 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens = False): 862 """Setup a list of atom indices to be constrained during Psi4 minimizaiton.""" 863 864 AtomIndices = [] 865 866 # Collect matched heavy atoms along with attached hydrogens... 867 for AtomIndex in Mol.GetSubstructMatch(RefMolCore): 868 Atom = Mol.GetAtomWithIdx(AtomIndex) 869 if Atom.GetAtomicNum() == 1: 870 continue 871 872 AtomIndices.append(AtomIndex) 873 for AtomNbr in Atom.GetNeighbors(): 874 if AtomNbr.GetAtomicNum() == 1: 875 if ConstrainHydrogens: 876 AtomNbrIndex = AtomNbr.GetIdx() 877 AtomIndices.append(AtomNbrIndex) 878 879 AtomIndices = sorted(AtomIndices) 880 881 # Atom indices start from 1 for Psi4 instead 0 for RDKit... 882 AtomIndices = [ AtomIndex + 1 for AtomIndex in AtomIndices] 883 884 return AtomIndices 885 886 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1): 887 """Setup a Psi4 molecule object.""" 888 889 # Turn off recentering and reorientation to perform optimization in the 890 # constrained coordinate space... 891 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True) 892 893 try: 894 Psi4Mol = Psi4Handle.geometry(MolGeometry) 895 except Exception as ErrMsg: 896 Psi4Mol = None 897 if not OptionsInfo["QuietMode"]: 898 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 899 MolName = RDKitUtil.GetMolName(Mol, MolNum) 900 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 901 902 return Psi4Mol 903 904 def PerformPsi4Cleanup(Psi4Handle): 905 """Perform clean up.""" 906 907 # Clean up after Psi4 run... 908 Psi4Handle.core.clean() 909 910 # Clean up any leftover scratch files... 911 if OptionsInfo["MPMode"]: 912 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 913 914 def CheckAndValidateMolecule(Mol, MolCount = None): 915 """Validate molecule for Psi4 calculations.""" 916 917 if Mol is None: 918 if not OptionsInfo["QuietMode"]: 919 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 920 return False 921 922 MolName = RDKitUtil.GetMolName(Mol, MolCount) 923 if not OptionsInfo["QuietMode"]: 924 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 925 926 if RDKitUtil.IsMolEmpty(Mol): 927 if not OptionsInfo["QuietMode"]: 928 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 929 return False 930 931 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 932 if not OptionsInfo["QuietMode"]: 933 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 934 return False 935 936 return True 937 938 def SetupMethodNameAndBasisSet(Mol): 939 """Setup method name and basis set.""" 940 941 MethodName = OptionsInfo["MethodName"] 942 if OptionsInfo["MethodNameAuto"]: 943 MethodName = "B3LYP" 944 945 BasisSet = OptionsInfo["BasisSet"] 946 if OptionsInfo["BasisSetAuto"]: 947 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 948 949 return (MethodName, BasisSet) 950 951 def SetupReferenceWavefunction(Mol): 952 """Setup reference wavefunction.""" 953 954 Reference = OptionsInfo["Reference"] 955 if OptionsInfo["ReferenceAuto"]: 956 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 957 958 return Reference 959 960 def SetupEnergyConversionFactor(Psi4Handle): 961 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 962 963 EnergyUnits = OptionsInfo["EnergyUnits"] 964 965 ApplyConversionFactor = True 966 if re.match("^kcal\/mol$", EnergyUnits, re.I): 967 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 968 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 969 ConversionFactor = Psi4Handle.constants.hartree2kJmol 970 elif re.match("^eV$", EnergyUnits, re.I): 971 ConversionFactor = Psi4Handle.constants.hartree2ev 972 else: 973 ApplyConversionFactor = False 974 ConversionFactor = 1.0 975 976 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 977 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 978 979 def WriteMolConformers(Writer, Mol, MolNum, ConfMols, ConfIDs, ConfEnergyValues = None, ConfScaffoldEmbedRMSDValues = None): 980 """Write molecule coformers.""" 981 982 if ConfMols is None: 983 return 984 985 for Index, ConfMol in enumerate(ConfMols): 986 ConfMolName = RDKitUtil.GetMolName(Mol, MolNum) 987 SetConfMolName(ConfMol, ConfMolName, ConfIDs[Index]) 988 989 if ConfScaffoldEmbedRMSDValues is not None: 990 ConfMol.SetProp("CoreScaffoldEmbedRMSD", ConfScaffoldEmbedRMSDValues[Index]) 991 992 if ConfEnergyValues is not None: 993 ConfMol.SetProp(OptionsInfo["EnergyDataFieldLabel"], ConfEnergyValues[Index]) 994 995 Writer.write(ConfMol) 996 997 def SetConfMolName(Mol, MolName, ConfCount): 998 """Set conf mol name.""" 999 1000 ConfName = "%s_Conf%d" % (MolName, ConfCount) 1001 Mol.SetProp("_Name", ConfName) 1002 1003 def ProcessMCSParameters(): 1004 """Set up and process MCS parameters.""" 1005 1006 SetupMCSParameters() 1007 ProcessSpecifiedMCSParameters() 1008 1009 def SetupMCSParameters(): 1010 """Set up default MCS parameters.""" 1011 1012 OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0} 1013 1014 def ProcessSpecifiedMCSParameters(): 1015 """Process specified MCS parameters.""" 1016 1017 if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I): 1018 # Nothing to process... 1019 return 1020 1021 # Parse specified parameters... 1022 MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"]) 1023 if not MCSParams: 1024 MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.") 1025 1026 MCSParamsWords = MCSParams.split(",") 1027 if len(MCSParamsWords) % 2: 1028 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords))) 1029 1030 # Setup canonical parameter names... 1031 ValidParamNames = [] 1032 CanonicalParamNamesMap = {} 1033 for ParamName in sorted(OptionsInfo["MCSParams"]): 1034 ValidParamNames.append(ParamName) 1035 CanonicalParamNamesMap[ParamName.lower()] = ParamName 1036 1037 # Validate and set paramater names and value... 1038 for Index in range(0, len(MCSParamsWords), 2): 1039 Name = MCSParamsWords[Index] 1040 Value = MCSParamsWords[Index + 1] 1041 1042 CanonicalName = Name.lower() 1043 if not CanonicalName in CanonicalParamNamesMap: 1044 MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name, " ".join(ValidParamNames))) 1045 1046 ParamName = CanonicalParamNamesMap[CanonicalName] 1047 if re.match("^Threshold$", ParamName, re.I): 1048 Value = float(Value) 1049 if Value <= 0.0 or Value > 1.0 : 1050 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name)) 1051 ParamValue = Value 1052 elif re.match("^Timeout$", ParamName, re.I): 1053 Value = int(Value) 1054 if Value <= 0: 1055 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name)) 1056 ParamValue = Value 1057 elif re.match("^MinNumAtoms$", ParamName, re.I): 1058 Value = int(Value) 1059 if Value < 1: 1060 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name)) 1061 ParamValue = Value 1062 elif re.match("^MinNumBonds$", ParamName, re.I): 1063 Value = int(Value) 1064 if Value < 0: 1065 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name)) 1066 ParamValue = Value 1067 elif re.match("^AtomCompare$", ParamName, re.I): 1068 if re.match("^CompareAny$", Value, re.I): 1069 ParamValue = rdFMCS.AtomCompare.CompareAny 1070 elif re.match("^CompareElements$", Value, re.I): 1071 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements 1072 elif re.match("^CompareIsotopes$", Value, re.I): 1073 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes 1074 else: 1075 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name)) 1076 elif re.match("^BondCompare$", ParamName, re.I): 1077 if re.match("^CompareAny$", Value, re.I): 1078 ParamValue = Chem.rdFMCS.BondCompare.CompareAny 1079 elif re.match("^CompareOrder$", Value, re.I): 1080 ParamValue = rdFMCS.BondCompare.CompareOrder 1081 elif re.match("^CompareOrderExact$", Value, re.I): 1082 ParamValue = rdFMCS.BondCompare.CompareOrderExact 1083 else: 1084 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name)) 1085 elif re.match("^SeedSMARTS$", ParamName, re.I): 1086 if not len(Value): 1087 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option for parameter, %s, is empty. " % (Name)) 1088 ParamValue = Value 1089 else: 1090 if not re.match("^(Yes|No|True|False)$", Value, re.I): 1091 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name)) 1092 ParamValue = False 1093 if re.match("^(Yes|True)$", Value, re.I): 1094 ParamValue = True 1095 1096 # Set value... 1097 OptionsInfo["MCSParams"][ParamName] = ParamValue 1098 1099 def ProcessOptions(): 1100 """Process and validate command line arguments and options.""" 1101 1102 MiscUtil.PrintInfo("Processing options...") 1103 1104 # Validate options... 1105 ValidateOptions() 1106 1107 OptionsInfo["Infile"] = Options["--infile"] 1108 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False 1109 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 1110 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 1111 1112 OptionsInfo["RefFile"] = Options["--reffile"] 1113 1114 OptionsInfo["Outfile"] = Options["--outfile"] 1115 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 1116 1117 OptionsInfo["Overwrite"] = Options["--overwrite"] 1118 1119 OptionsInfo["Scaffold"] = Options["--scaffold"] 1120 if re.match("^auto$", Options["--scaffold"], re.I): 1121 UseScaffoldMCS = True 1122 UseScaffoldSMARTS = False 1123 ScaffoldSMARTS = None 1124 else: 1125 UseScaffoldMCS = False 1126 UseScaffoldSMARTS = True 1127 ScaffoldSMARTS = OptionsInfo["Scaffold"] 1128 1129 OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS 1130 OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS 1131 OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS 1132 OptionsInfo["ScaffoldPatternMol"] = None 1133 1134 OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"] 1135 ProcessMCSParameters() 1136 1137 OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False 1138 1139 # Method, basis set, and reference wavefunction... 1140 OptionsInfo["BasisSet"] = Options["--basisSet"] 1141 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 1142 1143 OptionsInfo["MethodName"] = Options["--methodName"] 1144 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 1145 1146 OptionsInfo["Reference"] = Options["--reference"] 1147 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 1148 1149 # Run and options parameters... 1150 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 1151 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 1152 1153 # Conformer generation paramaters... 1154 ParamsDefaultInfoOverride = {"MaxConfs": 50} 1155 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride) 1156 1157 # Write energy parameters... 1158 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False 1159 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 1160 1161 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 1162 if re.match("^auto$", EnergyDataFieldLabel, re.I): 1163 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 1164 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 1165 1166 OptionsInfo["EnergyRMSDCalcMode"] = Options["--energyRMSDCalcMode"] 1167 OptionsInfo["EnergyRMSDCalcModeBest"] = True if re.match("^BestRMSD$", Options["--energyRMSDCalcMode"], re.I) else False 1168 1169 OptionsInfo["EnergyRMSDCutoff"] = float(Options["--energyRMSDCutoff"]) 1170 1171 OptionsInfo["EnergyRMSDCutoffMode"] = Options["--energyRMSDCutoffMode"] 1172 OptionsInfo["EnergyRMSDCutoffModeLowest"] = True if re.match("^Lowest$", Options["--energyRMSDCutoffMode"], re.I) else False 1173 1174 # Process energy window... 1175 EnergyWindow = Options["--energyWindow"] 1176 if re.match("^auto$", EnergyWindow, re.I): 1177 # Set default energy window based on units... 1178 EnergyUnits = Options["--energyUnits"] 1179 if re.match("^kcal\/mol$", EnergyUnits, re.I): 1180 EnergyWindow = 20 1181 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 1182 EnergyWindow = 83.68 1183 elif re.match("^eV$", EnergyUnits, re.I): 1184 EnergyWindow = 0.8673 1185 elif re.match("^Hartrees$", EnergyUnits, re.I): 1186 EnergyWindow = 0.03188 1187 else: 1188 MiscUtil.PrintError("Failed to set default value for \"--energyWindow\". The value, %s, specified for option \"--energyUnits\" is not valid. " % EnergyUnits) 1189 else: 1190 EnergyWindow = float(EnergyWindow) 1191 OptionsInfo["EnergyWindow"] = EnergyWindow 1192 1193 OptionsInfo["MaxIters"] = int(Options["--maxIters"]) 1194 1195 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 1196 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 1197 1198 # Multiprocessing level... 1199 MPLevelMoleculesMode = False 1200 MPLevelConformersMode = False 1201 MPLevel = Options["--mpLevel"] 1202 if re.match("^Molecules$", MPLevel, re.I): 1203 MPLevelMoleculesMode = True 1204 elif re.match("^Conformers$", MPLevel, re.I): 1205 MPLevelConformersMode = True 1206 else: 1207 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel) 1208 OptionsInfo["MPLevel"] = MPLevel 1209 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode 1210 OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode 1211 1212 OptionsInfo["Precision"] = int(Options["--precision"]) 1213 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 1214 1215 def RetrieveOptions(): 1216 """Retrieve command line arguments and options.""" 1217 1218 # Get options... 1219 global Options 1220 Options = docopt(_docoptUsage_) 1221 1222 # Set current working directory to the specified directory... 1223 WorkingDir = Options["--workingdir"] 1224 if WorkingDir: 1225 os.chdir(WorkingDir) 1226 1227 # Handle examples option... 1228 if "--examples" in Options and Options["--examples"]: 1229 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1230 sys.exit(0) 1231 1232 def ValidateOptions(): 1233 """Validate option values.""" 1234 1235 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no") 1236 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 1237 1238 MiscUtil.ValidateOptionTextValue(" --energyRMSDCalcMode", Options["--energyRMSDCalcMode"], "RMSD BestRMSD") 1239 1240 MiscUtil.ValidateOptionFloatValue("--energyRMSDCutoff", Options["--energyRMSDCutoff"], {">=": 0}) 1241 MiscUtil.ValidateOptionTextValue(" --energyRMSDCutoffMode", Options["--energyRMSDCutoffMode"], "All Lowest") 1242 1243 if not re.match("^auto$", Options["--energyWindow"], re.I): 1244 MiscUtil.ValidateOptionFloatValue("--energyWindow", Options["--energyWindow"], {">": 0}) 1245 1246 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1247 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 1248 1249 MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"]) 1250 MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol") 1251 1252 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0}) 1253 1254 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1255 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1256 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1257 1258 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1259 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers") 1260 1261 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 1262 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 1263 1264 MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no") 1265 1266 # Setup a usage string for docopt... 1267 _docoptUsage_ = """ 1268 Psi4GenerateConstrainedConformers.py - Generate constrained molecular conformations 1269 1270 Usage: 1271 Psi4GenerateConstrainedConformers.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>] 1272 [--energyDataFieldLabel <text>] [--energyUnits <text>] [--energyRMSDCalcMode <RMSD or BestRMSD>] 1273 [--energyRMSDCutoff <number>] [--energyRMSDCutoffMode <All or Lowest>] [--energyWindow <number>] 1274 [--infileParams <Name,Value,...>] [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>] 1275 [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>] 1276 [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ] 1277 [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 1278 [--quiet <yes or no>] [--reference <text>] [--scaffold <auto or SMARTS>] 1279 [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile> 1280 Psi4GenerateConstrainedConformers.py -h | --help | -e | --examples 1281 1282 Description: 1283 Generate molecular conformations by performing a constrained energy 1284 minimization against a reference molecule. The molecular conformations 1285 are generated using a combination of distance geometry and forcefield 1286 minimization followed by geometry optimization using a quantum chemistry 1287 method. 1288 1289 An initial set of 3D conformers are generated for input molecules using 1290 distance geometry. A common core scaffold, corresponding to a Maximum 1291 Common Substructure (MCS) or an explicit SMARTS pattern, is identified 1292 between a pair of input and reference molecules. The core scaffold atoms in 1293 input molecules are aligned against the same atoms in the reference molecule. 1294 The energy of aligned structures are sequentially minimized using the forcefield 1295 and a quantum chemistry method to generate the final 3D structures. 1296 1297 A Psi4 XYZ format geometry string is automatically generated for each molecule 1298 in input file. It contains atom symbols and 3D coordinates for each atom in a 1299 molecule. In addition, the formal charge and spin multiplicity are present in the 1300 the geometry string. These values are either retrieved from molecule properties 1301 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 1302 molecule. 1303 1304 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 1305 .csv, .tsv, .txt) 1306 1307 The supported output file formats are: SD (.sdf, .sd) 1308 1309 Options: 1310 -b, --basisSet <text> [default: auto] 1311 Basis set to use for constrained energy minimization. Default: 6-31+G** 1312 for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 1313 value must be a valid Psi4 basis set. No validation is performed. 1314 1315 The following list shows a representative sample of basis sets available 1316 in Psi4: 1317 1318 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 1319 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 1320 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 1321 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 1322 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 1323 1324 --confParams <Name,Value,...> [default: auto] 1325 A comma delimited list of parameter name and value pairs for generating 1326 initial 3D coordinates for molecules in input file. A common core scaffold is 1327 identified between a pair of input and reference molecules. The atoms in 1328 common core scaffold of input molecules are aligned against the reference 1329 molecule followed by constrained energy minimization using forcefield 1330 available in RDKit. The 3D structures are subsequently constrained and 1331 minimized by a quantum chemistry method available in Psi4. 1332 1333 The supported parameter names along with their default values are shown 1334 below: 1335 1336 confMethod,ETKDGv2, 1337 forceField,MMFF, forceFieldMMFFVariant,MMFF94, 1338 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50, 1339 useTethers,yes 1340 1341 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG, 1342 ETKDG , or ETKDGv2] 1343 forceField, MMFF [ Possible values: UFF or MMFF ] 1344 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ] 1345 enforceChirality,yes [ Possible values: yes or no ] 1346 useTethers,yes [ Possible values: yes or no ] 1347 1348 confMethod: Conformation generation methodology for generating initial 3D 1349 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental 1350 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms 1351 with Distance Geometry (KDG) and Experimental Torsion-angle preference 1352 along with basic Knowledge-terms and Distance Geometry (ETKDG or 1353 ETKDGv2) [Ref 129, 167] . 1354 1355 forceField: Forcefield method to use for constrained energy minimization. 1356 Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular 1357 Mechanics Force Field [ Ref 83-87 ] . 1358 1359 enforceChirality: Enforce chirality for defined chiral centers during 1360 forcefield minimization. 1361 1362 maxConfs: Maximum number of conformations to generate for each molecule 1363 during the generation of an initial 3D conformation ensemble using conformation 1364 generation methodology. The conformations are constrained and minimized using 1365 the specified forcefield and a quantum chemistry method. The lowest energy 1366 conformation is written to the output file. 1367 1368 embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded 1369 using distance geometry and forcefield minimization. All embedded conformers 1370 are kept for 'None' value. Otherwise, only those conformers which are different 1371 from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first 1372 embedded conformer is always retained. 1373 1374 useTethers: Use tethers to optimize the final embedded conformation by 1375 applying a series of extra forces to align matching atoms to the positions of 1376 the core atoms. Otherwise, use simple distance constraints during the 1377 optimization. 1378 --energyOut <yes or no> [default: yes] 1379 Write out energy values. 1380 --energyDataFieldLabel <text> [default: auto] 1381 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 1382 --energyUnits <text> [default: kcal/mol] 1383 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 1384 --energyRMSDCalcMode <RMSD or BestRMSD> [default: RMSD] 1385 Methodology for calculating RMSD values during the application of RMSD 1386 cutoff for retaining conformations after the final energy minimization. Possible 1387 values: RMSD or BestRMSD. This option is ignore during 'None' value of 1388 '--energyRMSDCutoff' option. 1389 1390 During BestRMSMode mode, the RDKit 'function AllChem.GetBestRMS' is used to 1391 align and calculate RMSD. This function calculates optimal RMSD for aligning two 1392 molecules, taking symmetry into account. Otherwise, the RMSD value is calculated 1393 using 'AllChem.GetConformerRMS' without changing the atom order. A word to the 1394 wise from RDKit documentation: The AllChem.GetBestRMS function will attempt to 1395 align all permutations of matching atom orders in both molecules, for some molecules 1396 it will lead to 'combinatorial explosion'. 1397 --energyRMSDCutoff <number> [default: 0.5] 1398 RMSD cutoff for retaining conformations after the final energy minimization. 1399 By default, only those conformations which are different from other low 1400 energy conformation by the specified RMSD cutoff and are with in the 1401 specified energy window are kept. The lowest energy conformation is always 1402 retained. A value of zero keeps all minimized conformations with in the 1403 specified energy window from the lowest energy. 1404 --energyRMSDCutoffMode <All or Lowest> [default: All] 1405 RMSD cutoff mode for retaining conformations after the final energy 1406 minimization. Possible values: All or Lowest. The RMSD values are compared 1407 against all the selected conformations or the lowest energy conformation during 1408 'All' and 'Lowest' value of '--energyRMSDCutoffMode'. This option is ignored 1409 during zero value of '--energyRMSDCutoff'. 1410 1411 By default, only those conformations which all different from all selected 1412 low energy conformations by the specified RMSD cutoff and are with in the 1413 specified energy window are kept. 1414 --energyWindow <number> [default: auto] 1415 Psi4 Energy window for selecting conformers after the final energy minimization. 1416 The default value is dependent on '--energyUnits': 20 kcal/mol, 83.68 kJ/mol, 1417 0.8673 ev, or 0.03188 Hartrees. The specified value must be in '--energyUnits'. 1418 -e, --examples 1419 Print examples. 1420 -h, --help 1421 Print this help message. 1422 -i, --infile <infile> 1423 Input file name. 1424 --infileParams <Name,Value,...> [default: auto] 1425 A comma delimited list of parameter name and value pairs for reading 1426 molecules from files. The supported parameter names for different file 1427 formats, along with their default values, are shown below: 1428 1429 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1430 1431 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 1432 smilesTitleLine,auto,sanitize,yes 1433 1434 Possible values for smilesDelimiter: space, comma or tab. 1435 --maxIters <number> [default: 50] 1436 Maximum number of iterations to perform for each molecule or conformer 1437 during energy minimization by a quantum chemistry method. 1438 -m, --methodName <text> [default: auto] 1439 Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ]. 1440 The specified value must be a valid Psi4 method name. No validation is 1441 performed. 1442 1443 The following list shows a representative sample of methods available 1444 in Psi4: 1445 1446 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 1447 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 1448 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 1449 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 1450 1451 --mcsParams <Name,Value,...> [default: auto] 1452 Parameter values to use for identifying a maximum common substructure 1453 (MCS) in between a pair of reference and input molecules.In general, it is a 1454 comma delimited list of parameter name and value pairs. The supported 1455 parameter names along with their default values are shown below: 1456 1457 atomCompare,CompareElements,bondCompare,CompareOrder, 1458 maximizeBonds,yes,matchValences,yes,matchChiralTag,no, 1459 minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes, 1460 completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none 1461 1462 Possible values for atomCompare: CompareAny, CompareElements, 1463 CompareIsotopes. Possible values for bondCompare: CompareAny, 1464 CompareOrder, CompareOrderExact. 1465 1466 A brief description of MCS parameters taken from RDKit documentation is 1467 as follows: 1468 1469 atomCompare - Controls match between two atoms 1470 bondCompare - Controls match between two bonds 1471 maximizeBonds - Maximize number of bonds instead of atoms 1472 matchValences - Include atom valences in the MCS match 1473 matchChiralTag - Include atom chirality in the MCS match 1474 minNumAtoms - Minimum number of atoms in the MCS match 1475 minNumBonds - Minimum number of bonds in the MCS match 1476 ringMatchesRingOnly - Ring bonds only match other ring bonds 1477 completeRingsOnly - Partial rings not allowed during the match 1478 threshold - Fraction of the dataset that must contain the MCS 1479 seedSMARTS - SMARTS string as the seed of the MCS 1480 timeout - Timeout for the MCS calculation in seconds 1481 1482 --mp <yes or no> [default: no] 1483 Use multiprocessing. 1484 1485 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1486 function employing lazy RDKit data iterable. This allows processing of 1487 arbitrary large data sets without any additional requirements memory. 1488 1489 All input data may be optionally loaded into memory by mp.Pool.map() 1490 before starting worker processes in a process pool by setting the value 1491 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1492 1493 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1494 data mode may adversely impact the performance. The '--mpParams' section 1495 provides additional information to tune the value of 'chunkSize'. 1496 --mpLevel <Molecules or Conformers> [default: Molecules] 1497 Perform multiprocessing at molecules or conformers level. Possible values: 1498 Molecules or Conformers. The 'Molecules' value starts a process pool at the 1499 molecules level. All conformers of a molecule are processed in a single 1500 process. The 'Conformers' value, however, starts a process pool at the 1501 conformers level. Each conformer of a molecule is processed in an individual 1502 process in the process pool. The default Psi4 'OutputFile' is set to 'quiet' 1503 using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate 1504 a large number of Psi4 output files. 1505 --mpParams <Name,Value,...> [default: auto] 1506 A comma delimited list of parameter name and value pairs to configure 1507 multiprocessing. 1508 1509 The supported parameter names along with their default and possible 1510 values are shown below: 1511 1512 chunkSize, auto 1513 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 1514 numProcesses, auto [ Default: mp.cpu_count() ] 1515 1516 These parameters are used by the following functions to configure and 1517 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 1518 mp.Pool.imap(). 1519 1520 The chunkSize determines chunks of input data passed to each worker 1521 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 1522 The default value of chunkSize is dependent on the value of 'inputDataMode'. 1523 1524 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 1525 automatically converts RDKit data iterable into a list, loads all data into 1526 memory, and calculates the default chunkSize using the following method 1527 as shown in its code: 1528 1529 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 1530 if extra: chunkSize += 1 1531 1532 For example, the default chunkSize will be 7 for a pool of 4 worker processes 1533 and 100 data items. 1534 1535 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 1536 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 1537 data into memory. Consequently, the size of input data is not known a priori. 1538 It's not possible to estimate an optimal value for the chunkSize. The default 1539 chunkSize is set to 1. 1540 1541 The default value for the chunkSize during 'Lazy' data mode may adversely 1542 impact the performance due to the overhead associated with exchanging 1543 small chunks of data. It is generally a good idea to explicitly set chunkSize to 1544 a larger value during 'Lazy' input data mode, based on the size of your input 1545 data and number of processes in the process pool. 1546 1547 The mp.Pool.map() function waits for all worker processes to process all 1548 the data and return the results. The mp.Pool.imap() function, however, 1549 returns the the results obtained from worker processes as soon as the 1550 results become available for specified chunks of data. 1551 1552 The order of data in the results returned by both mp.Pool.map() and 1553 mp.Pool.imap() functions always corresponds to the input data. 1554 -o, --outfile <outfile> 1555 Output file name. 1556 --outfileParams <Name,Value,...> [default: auto] 1557 A comma delimited list of parameter name and value pairs for writing 1558 molecules to files. The supported parameter names for different file 1559 formats, along with their default values, are shown below: 1560 1561 SD: kekulize,yes,forceV3000,no 1562 1563 --overwrite 1564 Overwrite existing files. 1565 --precision <number> [default: 6] 1566 Floating point precision for writing energy values. 1567 --psi4OptionsParams <Name,Value,...> [default: none] 1568 A comma delimited list of Psi4 option name and value pairs for setting 1569 global and module options. The names are 'option_name' for global options 1570 and 'module_name__option_name' for options local to a module. The 1571 specified option names must be valid Psi4 names. No validation is 1572 performed. 1573 1574 The specified option name and value pairs are processed and passed to 1575 psi4.set_options() as a dictionary. The supported value types are float, 1576 integer, boolean, or string. The float value string is converted into a float. 1577 The valid values for a boolean string are yes, no, true, false, on, or off. 1578 --psi4RunParams <Name,Value,...> [default: auto] 1579 A comma delimited list of parameter name and value pairs for configuring 1580 Psi4 jobs. 1581 1582 The supported parameter names along with their default and possible 1583 values are shown below: 1584 1585 MemoryInGB, 1 1586 NumThreads, 1 1587 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 1588 ScratchDir, auto [ Possivle values: DirName] 1589 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 1590 1591 These parameters control the runtime behavior of Psi4. 1592 1593 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 1594 is appended to output file name during multiprocessing as shown: 1595 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 1596 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 1597 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 1598 'Conformers' of '--mpLevel' option. 1599 1600 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 1601 of any existing Psi4 before creating new files to append output from 1602 multiple Psi4 runs. 1603 1604 The option 'ScratchDir' is a directory path to the location of scratch 1605 files. The default value corresponds to Psi4 default. It may be used to 1606 override the deafult path. 1607 -q, --quiet <yes or no> [default: no] 1608 Use quiet mode. The warning and information messages will not be printed. 1609 -r, --reffile <reffile> 1610 Reference input file name containing a 3D reference molecule. A common 1611 core scaffold must be present in a pair of an input and reference molecules. 1612 Otherwise, no constrained minimization is performed on the input molecule. 1613 --reference <text> [default: auto] 1614 Reference wave function to use for energy calculation. Default: RHF or UHF. 1615 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules 1616 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell 1617 molecules with unpaired electrons. 1618 1619 The specified value must be a valid Psi4 reference wave function. No validation 1620 is performed. For example: ROHF, CUHF, RKS, etc. 1621 1622 The spin multiplicity determines the default value of reference wave function 1623 for input molecules. It is calculated from number of free radical electrons using 1624 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 1625 electron spin. The total spin is 1/2 the number of free radical electrons in a 1626 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 1627 over the calculated value of spin multiplicity. 1628 -s, --scaffold <auto or SMARTS> [default: auto] 1629 Common core scaffold between a pair of input and reference molecules used for 1630 constrained minimization of molecules in input file. Possible values: Auto or a 1631 valid SMARTS pattern. The common core scaffold is automatically detected 1632 corresponding to the Maximum Common Substructure (MCS) between a pair of 1633 reference and input molecules. A valid SMARTS pattern may be optionally specified 1634 for the common core scaffold. 1635 --scaffoldRMSDOut <yes or no> [default: No] 1636 Write out RMSD value for common core alignment between a pair of input and 1637 reference molecules. 1638 -w, --workingdir <dir> 1639 Location of working directory which defaults to the current directory. 1640 1641 Examples: 1642 To generate conformers by performing constrained energy minimization for molecules 1643 in a SMILES file against a reference 3D molecule in a SD file using a common core 1644 scaffold between pairs of input and reference molecules identified using MCS, 1645 generating up to 50 conformations using ETKDGv2 methodology followed by initial 1646 MMFF forcefield minimization and final energy minimization using B3LYP/6-31G** 1647 and B3LYP/6-31+G** for non-sulfur and sulfur containing molecules, applying 1648 energy RMSD cutoff of 0.5 and energy window value value of 20 kcal/mol, and 1649 write out a SD file: 1650 1651 % Psi4GenerateConstrainedConformers.py -i Psi4SampleAlkanes.smi 1652 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf 1653 1654 To run the first example in a quiet mode and write out a SD file, type: 1655 1656 % Psi4GenerateConstrainedConformers.py -q yes -i Psi4SampleAlkanes.smi 1657 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf 1658 1659 To rerun the first example in multiprocessing mode on all available CPUs 1660 without loading all data into memory and write out a SD file, type: 1661 1662 % Psi4GenerateConstrainedConformers.py --mp yes 1663 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1664 -o Psi4SampleAlkanesOut.sdf 1665 1666 To run the first example in multiprocessing mode on all available CPUs 1667 by loading all data into memory and write out a SD file, type: 1668 1669 % Psi4GenerateConstrainedConformers.py --mp yes --mpParams 1670 "inputDataMode,InMemory"-i Psi4SampleAlkanes.smi 1671 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf 1672 1673 To rerun the first example in multiprocessing mode on specific number of 1674 CPUs and chunk size without loading all data into memory and write out a SD file, 1675 type: 1676 1677 % Psi4GenerateConstrainedConformers.py --mp yes --mpParams 1678 "inputDataMode,Lazy,numProcesses,4,chunkSize,8" 1679 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1680 -o Psi4SampleAlkanesOut.sdf 1681 1682 To rerun the first example using an explicit SMARTS string for a common core 1683 scaffold and write out a SD file, type: 1684 1685 % Psi4GenerateConstrainedConformers.py --scaffold "CC" 1686 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1687 -o Psi4SampleAlkanesOut.sdf 1688 1689 To run the first example using a specific set of parameters for generating 1690 an initial set of conformers followed by energy minimization using forcefield 1691 and a quantum chemistry method and write out a SD file type: 1692 1693 % Psi4GenerateConstrainedConformers.py --confParams "confMethod, 1694 ETKDGv2, forceField,MMFF, forceFieldMMFFVariant,MMFF94s, maxConfs,20, 1695 embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP 1696 -b "6-31+G**" --maxIters 20 --energyRMSDCutoff 0.5 1697 --energyRMSDCutoffMode All -i Psi4SampleAlkanes.sdf 1698 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf 1699 1700 To rerun the first example using molecules in a CSV SMILES file, SMILES 1701 strings in column 1, name in column2, and write out a SD file, type: 1702 1703 % Psi4GenerateConstrainedConformers.py --infileParams 1704 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1, 1705 smilesNameColumn,2" -i Psi4SampleAlkanes.csv 1706 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf 1707 1708 Author: 1709 1710 Manish Sud(msud@san.rr.com) 1711 1712 See also: 1713 Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py, 1714 Psi4PerformConstrainedMinimization.py, Psi4PerformMinimization.py 1715 1716 Copyright: 1717 Copyright (C) 2024 Manish Sud. All rights reserved. 1718 1719 The functionality available in this script is implemented using Psi4, an 1720 open source quantum chemistry software package, and RDKit, an open 1721 source toolkit for cheminformatics developed by Greg Landrum. 1722 1723 This file is part of MayaChemTools. 1724 1725 MayaChemTools is free software; you can redistribute it and/or modify it under 1726 the terms of the GNU Lesser General Public License as published by the Free 1727 Software Foundation; either version 3 of the License, or (at your option) any 1728 later version. 1729 1730 """ 1731 1732 if __name__ == "__main__": 1733 main()