1 #!/bin/env python 2 # 3 # File: Psi4PerformConstrainedMinimization.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using Psi4, an 9 # open source quantum chemistry software package, and RDKit, an open 10 # source toolkit for cheminformatics developed by Greg Landrum. 11 # 12 # This file is part of MayaChemTools. 13 # 14 # MayaChemTools is free software; you can redistribute it and/or modify it under 15 # the terms of the GNU Lesser General Public License as published by the Free 16 # Software Foundation; either version 3 of the License, or (at your option) any 17 # later version. 18 # 19 # MayaChemTools is distributed in the hope that it will be useful, but without 20 # any warranty; without even the implied warranty of merchantability of fitness 21 # for a particular purpose. See the GNU Lesser General Public License for more 22 # details. 23 # 24 # You should have received a copy of the GNU Lesser General Public License 25 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 26 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 27 # Boston, MA, 02111-1307, USA. 28 # 29 30 from __future__ import print_function 31 32 # Add local python path to the global path and import standard library modules... 33 import os 34 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 35 import time 36 import re 37 import shutil 38 import multiprocessing as mp 39 40 # Psi4 imports... 41 if (hasattr(shutil, 'which') and shutil.which("psi4") is None): 42 sys.stderr.write("\nWarning: Failed to find 'psi4' in your PATH indicating potential issues with your\n") 43 sys.stderr.write("Psi4 environment. The 'import psi4' directive in the global scope of the script\n") 44 sys.stderr.write("interferes with the multiprocessing functionality. It is imported later in the\n") 45 sys.stderr.write("local scope during the execution of the script and may fail. Check/update your\n") 46 sys.stderr.write("Psi4 environment and try again.\n\n") 47 48 # RDKit imports... 49 try: 50 from rdkit import rdBase 51 from rdkit import Chem 52 from rdkit.Chem import AllChem, rdMolAlign 53 from rdkit.Chem import rdFMCS 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 57 sys.exit(1) 58 59 # MayaChemTools imports... 60 try: 61 from docopt import docopt 62 import MiscUtil 63 import Psi4Util 64 import RDKitUtil 65 except ImportError as ErrMsg: 66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 68 sys.exit(1) 69 70 ScriptName = os.path.basename(sys.argv[0]) 71 Options = {} 72 OptionsInfo = {} 73 74 def main(): 75 """Start execution of the script.""" 76 77 MiscUtil.PrintInfo("\n%s (Psi4: Imported later; RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 78 79 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 80 81 # Retrieve command line arguments and options... 82 RetrieveOptions() 83 84 # Process and validate command line arguments and options... 85 ProcessOptions() 86 87 # Perform actions required by the script... 88 PerformConstrainedMinimization() 89 90 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 91 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 92 93 def PerformConstrainedMinimization(): 94 """Perform constrained minimization.""" 95 96 # Read and validate reference molecule... 97 RefMol = RetrieveReferenceMolecule() 98 99 # Setup a molecule reader for input file... 100 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 101 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 102 103 # Set up a molecule writer... 104 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 105 if Writer is None: 106 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 107 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"]) 108 109 MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount = ProcessMolecules(RefMol, Mols, Writer) 110 111 if Writer is not None: 112 Writer.close() 113 114 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 115 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 116 MiscUtil.PrintInfo("Number of molecules with missing core scaffold: %d" % CoreScaffoldMissingCount) 117 MiscUtil.PrintInfo("Number of molecules failed during conformation generation or minimization: %d" % MinimizationFailedCount) 118 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount) 119 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + CoreScaffoldMissingCount + MinimizationFailedCount + WriteFailedCount)) 120 121 def ProcessMolecules(RefMol, Mols, Writer): 122 """Process and minimize molecules.""" 123 124 if OptionsInfo["MPMode"]: 125 return ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer) 126 else: 127 return ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer) 128 129 def ProcessMoleculesUsingSingleProcess(RefMol, Mols, Writer): 130 """Process and minimize molecules using a single process.""" 131 132 # Intialize Psi4... 133 MiscUtil.PrintInfo("\nInitializing Psi4...") 134 Psi4Handle = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = True, PrintHeader = True) 135 OptionsInfo["psi4"] = Psi4Handle 136 137 # Setup max iterations global variable... 138 Psi4Util.UpdatePsi4OptionsParameters(Psi4Handle, {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 139 140 # Setup conversion factor for energy units... 141 SetupEnergyConversionFactor(Psi4Handle) 142 143 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5 144 145 for Mol in Mols: 146 MolCount += 1 147 148 if not CheckAndValidateMolecule(Mol, MolCount): 149 continue 150 151 # Setup 2D coordinates for SMILES input file... 152 if OptionsInfo["SMILESInfileStatus"]: 153 AllChem.Compute2DCoords(Mol) 154 155 ValidMolCount += 1 156 157 # Setup a reference molecule core containing common scaffold atoms... 158 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount) 159 if RefMolCore is None: 160 CoreScaffoldMissingCount += 1 161 continue 162 163 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolCount) 164 165 if not CalcStatus: 166 MinimizationFailedCount += 1 167 continue 168 169 WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD) 170 171 if not WriteStatus: 172 WriteFailedCount += 1 173 174 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) 175 176 def ProcessMoleculesUsingMultipleProcesses(RefMol, Mols, Writer): 177 """Process and minimize molecules using multiprocessing.""" 178 179 if OptionsInfo["MPLevelConformersMode"]: 180 return ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer) 181 elif OptionsInfo["MPLevelMoleculesMode"]: 182 return ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer) 183 else: 184 MiscUtil.PrintError("The value, %s, option \"--mpLevel\" is not supported." % (OptionsInfo["MPLevel"])) 185 186 def ProcessMoleculesUsingMultipleProcessesAtMoleculesLevel(RefMol, Mols, Writer): 187 """Process and minimize molecules using multiprocessing at molecules level.""" 188 189 MiscUtil.PrintInfo("\nPerforming constrained minimization with generation of conformers using multiprocessing at molecules level...") 190 191 MPParams = OptionsInfo["MPParams"] 192 193 # Setup data for initializing a worker process... 194 OptionsInfo["EncodedRefMol"] = RDKitUtil.MolToBase64EncodedMolString(RefMol) 195 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 196 197 # Setup a encoded mols data iterable for a worker process... 198 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 199 200 # Setup process pool along with data initialization for each process... 201 if not OptionsInfo["QuietMode"]: 202 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 203 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 204 205 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 206 207 # Start processing... 208 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 209 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 210 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 211 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 212 else: 213 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 214 215 # Print out Psi4 version in the main process... 216 MiscUtil.PrintInfo("\nInitializing Psi4...\n") 217 Psi4Handle = Psi4Util.InitializePsi4(PrintVersion = True, PrintHeader = False) 218 OptionsInfo["psi4"] = Psi4Handle 219 220 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5 221 222 for Result in Results: 223 MolCount += 1 224 MolIndex, EncodedMol, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD = Result 225 226 if EncodedMol is None: 227 continue 228 ValidMolCount += 1 229 230 if CoreScaffoldMissingStatus: 231 CoreScaffoldMissingStatus += 1 232 continue 233 234 if not CalcStatus: 235 MinimizationFailedCount += 1 236 continue 237 238 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 239 WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD) 240 241 if not WriteStatus: 242 WriteFailedCount += 1 243 244 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) 245 246 def InitializeWorkerProcess(*EncodedArgs): 247 """Initialize data for a worker process.""" 248 249 global Options, OptionsInfo 250 251 if not OptionsInfo["QuietMode"]: 252 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 253 254 # Decode Options and OptionInfo... 255 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 256 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 257 258 # Decode RefMol... 259 OptionsInfo["RefMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMol"]) 260 261 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 262 # output files for each process... 263 OptionsInfo["Psi4Initialized"] = False 264 265 def WorkerProcess(EncodedMolInfo): 266 """Process data for a worker process.""" 267 268 if not OptionsInfo["Psi4Initialized"]: 269 InitializePsi4ForWorkerProcess() 270 271 MolIndex, EncodedMol = EncodedMolInfo 272 273 MolNum = MolIndex + 1 274 CoreScaffoldMissingStatus = False 275 CalcStatus = False 276 Energy = None 277 ScaffoldEmbedRMSD = None 278 279 if EncodedMol is None: 280 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] 281 282 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 283 if not CheckAndValidateMolecule(Mol, MolNum): 284 return [MolIndex, None, CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] 285 286 # Setup 2D coordinates for SMILES input file... 287 if OptionsInfo["SMILESInfileStatus"]: 288 AllChem.Compute2DCoords(Mol) 289 290 # Setup a reference molecule core containing common scaffold atoms... 291 RefMolCore = SetupCoreScaffold(OptionsInfo["RefMol"], Mol, MolNum) 292 if RefMolCore is None: 293 CoreScaffoldMissingStatus = True 294 return [MolIndex, EncodedMol, CalcStatus, CoreScaffoldMissingStatus, Energy, ScaffoldEmbedRMSD] 295 296 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ConstrainAndMinimizeMolecule(OptionsInfo["psi4"], Mol, RefMolCore, MolNum) 297 298 return [MolIndex, RDKitUtil.MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CoreScaffoldMissingStatus, CalcStatus, Energy, ScaffoldEmbedRMSD] 299 300 def ProcessMoleculesUsingMultipleProcessesAtConformersLevel(RefMol, Mols, Writer): 301 """Process and minimize molecules using multiprocessing at conformers level.""" 302 303 MiscUtil.PrintInfo("\nPerforming constrained minimization with generation of conformers using multiprocessing at conformers level...") 304 305 (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) = [0] * 5 306 307 for Mol in Mols: 308 MolCount += 1 309 310 if not CheckAndValidateMolecule(Mol, MolCount): 311 continue 312 313 # Setup 2D coordinates for SMILES input file... 314 if OptionsInfo["SMILESInfileStatus"]: 315 AllChem.Compute2DCoords(Mol) 316 317 ValidMolCount += 1 318 319 # Setup a reference molecule core containing common scaffold atoms... 320 RefMolCore = SetupCoreScaffold(RefMol, Mol, MolCount) 321 if RefMolCore is None: 322 CoreScaffoldMissingCount += 1 323 continue 324 325 Mol, CalcStatus, Energy, ScaffoldEmbedRMSD = ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolCount) 326 327 if not CalcStatus: 328 MinimizationFailedCount += 1 329 continue 330 331 WriteStatus = WriteMolecule(Writer, Mol, MolCount, Energy, ScaffoldEmbedRMSD) 332 333 if not WriteStatus: 334 WriteFailedCount += 1 335 336 return (MolCount, ValidMolCount, CoreScaffoldMissingCount, MinimizationFailedCount, WriteFailedCount) 337 338 def ProcessConformersUsingMultipleProcesses(Mol, RefMolCore, MolNum): 339 """Generate conformers and minimize them using multiple processes. """ 340 341 # Add hydrogens... 342 Mol = Chem.AddHs(Mol, addCoords = True) 343 344 MolName = RDKitUtil.GetMolName(Mol, MolNum) 345 346 # Setup constrained conformers... 347 MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum) 348 if not MolConfsStatus: 349 if not OptionsInfo["QuietMode"]: 350 MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName) 351 return (Mol, False, None, None) 352 353 MPParams = OptionsInfo["MPParams"] 354 355 # Setup data for initializing a worker process... 356 OptionsInfo["EncodedRefMolCore"] = RDKitUtil.MolToBase64EncodedMolString(RefMolCore) 357 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 358 359 # Setup a encoded mols data iterable for a worker process... 360 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(MolConfs) 361 362 # Setup process pool along with data initialization for each process... 363 if not OptionsInfo["QuietMode"]: 364 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 365 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 366 367 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeConformerWorkerProcess, InitializeWorkerProcessArgs) 368 369 # Start processing... 370 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 371 Results = ProcessPool.imap(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 372 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 373 Results = ProcessPool.map(ConformerWorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 374 else: 375 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 376 377 ConfNums = [] 378 MolConfsMap = {} 379 CalcEnergyMap = {} 380 CalcFailedCount = 0 381 382 for Result in Results: 383 ConfNum, EncodedMolConf, CalcStatus, Energy = Result 384 385 if EncodedMolConf is None: 386 CalcFailedCount += 1 387 continue 388 389 if not CalcStatus: 390 CalcFailedCount += 1 391 continue 392 393 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf) 394 395 ConfNums.append(ConfNum) 396 CalcEnergyMap[ConfNum] = Energy 397 MolConfsMap[ConfNum] = MolConf 398 399 if CalcFailedCount: 400 return (Mol, False, None, None) 401 402 SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum]) 403 MinEnergyConfNum = SortedConfNums[0] 404 405 MinEnergy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum]) if OptionsInfo["EnergyOut"] else None 406 MinEnergyMolConf = MolConfsMap[MinEnergyConfNum] 407 408 ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None 409 MinEnergyMolConf.ClearProp('EmbedRMS') 410 411 return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD) 412 413 def InitializeConformerWorkerProcess(*EncodedArgs): 414 """Initialize data for a conformer worker process.""" 415 416 global Options, OptionsInfo 417 418 if not OptionsInfo["QuietMode"]: 419 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 420 421 # Decode Options and OptionInfo... 422 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 423 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 424 425 # Decode RefMol... 426 OptionsInfo["RefMolCore"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedRefMolCore"]) 427 428 # Psi4 is initialized in the worker process to avoid creation of redundant Psi4 429 # output files for each process... 430 OptionsInfo["Psi4Initialized"] = False 431 432 def ConformerWorkerProcess(EncodedMolInfo): 433 """Process data for a conformer worker process.""" 434 435 if not OptionsInfo["Psi4Initialized"]: 436 InitializePsi4ForWorkerProcess() 437 438 MolConfIndex, EncodedMolConf = EncodedMolInfo 439 440 MolConfNum = MolConfIndex 441 CalcStatus = False 442 Energy = None 443 444 if EncodedMolConf is None: 445 return [MolConfIndex, None, CalcStatus, Energy] 446 447 MolConf = RDKitUtil.MolFromBase64EncodedMolString(EncodedMolConf) 448 MolConfName = RDKitUtil.GetMolName(MolConf, MolConfNum) 449 450 if not OptionsInfo["QuietMode"]: 451 MiscUtil.PrintInfo("Processing molecule %s conformer number %s..." % (MolConfName, MolConfNum)) 452 453 # Perform Psi4 constrained minimization... 454 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(OptionsInfo["psi4"], MolConf, OptionsInfo["RefMolCore"], MolConfNum) 455 if not CalcStatus: 456 if not OptionsInfo["QuietMode"]: 457 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 458 return [MolConfIndex, EncodedMolConf, CalcStatus, Energy] 459 460 return [MolConfIndex, RDKitUtil.MolToBase64EncodedMolString(MolConf, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps), CalcStatus, Energy] 461 462 def InitializePsi4ForWorkerProcess(): 463 """Initialize Psi4 for a worker process.""" 464 465 if OptionsInfo["Psi4Initialized"]: 466 return 467 468 OptionsInfo["Psi4Initialized"] = True 469 470 if OptionsInfo["MPLevelConformersMode"] and re.match("auto", OptionsInfo["Psi4RunParams"]["OutputFileSpecified"], re.I): 471 # Run Psi4 in quiet mode during multiprocessing at Conformers level for 'auto' OutputFile... 472 OptionsInfo["Psi4RunParams"]["OutputFile"] = "quiet" 473 else: 474 # Update output file... 475 OptionsInfo["Psi4RunParams"]["OutputFile"] = Psi4Util.UpdatePsi4OutputFileUsingPID(OptionsInfo["Psi4RunParams"]["OutputFile"], os.getpid()) 476 477 # Intialize Psi4... 478 OptionsInfo["psi4"] = Psi4Util.InitializePsi4(Psi4RunParams = OptionsInfo["Psi4RunParams"], Psi4OptionsParams = OptionsInfo["Psi4OptionsParams"], PrintVersion = False, PrintHeader = True) 479 480 # Setup max iterations global variable... 481 Psi4Util.UpdatePsi4OptionsParameters(OptionsInfo["psi4"], {'GEOM_MAXITER': OptionsInfo["MaxIters"]}) 482 483 # Setup conversion factor for energy units... 484 SetupEnergyConversionFactor(OptionsInfo["psi4"]) 485 486 def RetrieveReferenceMolecule(): 487 """Retrieve and validate reference molecule.""" 488 489 RefFile = OptionsInfo["RefFile"] 490 491 MiscUtil.PrintInfo("\nProcessing file %s..." % (RefFile)) 492 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False 493 ValidRefMols, RefMolCount, ValidRefMolCount = RDKitUtil.ReadAndValidateMolecules(RefFile, **OptionsInfo["InfileParams"]) 494 495 if ValidRefMolCount == 0: 496 MiscUtil.PrintError("The reference file, %s, contains no valid molecules." % RefFile) 497 elif ValidRefMolCount > 1: 498 MiscUtil.PrintWarning("The reference file, %s, contains, %d, valid molecules. Using first molecule as the reference molecule..." % (RefFile, ValidRefMolCount)) 499 500 RefMol = ValidRefMols[0] 501 502 if OptionsInfo["UseScaffoldSMARTS"]: 503 ScaffoldPatternMol = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 504 if ScaffoldPatternMol is None: 505 MiscUtil.PrintError("Failed to create scaffold pattern molecule. The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is not valid." % (OptionsInfo["ScaffoldSMARTS"])) 506 507 if not RefMol.HasSubstructMatch(ScaffoldPatternMol): 508 MiscUtil.PrintError("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option, is missing in the first valid reference molecule." % (OptionsInfo["ScaffoldSMARTS"])) 509 510 return RefMol 511 512 def SetupCoreScaffold(RefMol, Mol, MolCount): 513 """Setup a reference molecule core containing common scaffold atoms between 514 a pair of molecules.""" 515 516 if OptionsInfo["UseScaffoldMCS"]: 517 return SetupCoreScaffoldByMCS(RefMol, Mol, MolCount) 518 elif OptionsInfo["UseScaffoldSMARTS"]: 519 return SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount) 520 else: 521 MiscUtil.PrintError("The value, %s, specified for \"-s, --scaffold\" option is not supported." % (OptionsInfo["Scaffold"])) 522 523 def SetupCoreScaffoldByMCS(RefMol, Mol, MolCount): 524 """Setup a reference molecule core containing common scaffold atoms between 525 a pair of molecules using MCS.""" 526 527 MCSParams = OptionsInfo["MCSParams"] 528 Mols = [RefMol, Mol] 529 530 MCSResultObject = rdFMCS.FindMCS(Mols, maximizeBonds = MCSParams["MaximizeBonds"], threshold = MCSParams["Threshold"], timeout = MCSParams["TimeOut"], verbose = MCSParams["Verbose"], matchValences = MCSParams["MatchValences"], ringMatchesRingOnly = MCSParams["RingMatchesRingOnly"], completeRingsOnly = MCSParams["CompleteRingsOnly"], matchChiralTag = MCSParams["MatchChiralTag"], atomCompare = MCSParams["AtomCompare"], bondCompare = MCSParams["BondCompare"], seedSmarts = MCSParams["SeedSMARTS"]) 531 532 if MCSResultObject.canceled: 533 if not OptionsInfo["QuietMode"]: 534 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 535 return None 536 537 CoreNumAtoms = MCSResultObject.numAtoms 538 CoreNumBonds = MCSResultObject.numBonds 539 540 SMARTSCore = MCSResultObject.smartsString 541 542 if not len(SMARTSCore): 543 if not OptionsInfo["QuietMode"]: 544 MiscUtil.PrintWarning("MCS failed to identify a common core scaffold between reference moecule and input molecule %s. Specify a different set of parameters using \"-m, --mcsParams\" option and try again." % (RDKitUtil.GetMolName(Mol, MolCount))) 545 return None 546 547 if CoreNumAtoms < MCSParams["MinNumAtoms"]: 548 if not OptionsInfo["QuietMode"]: 549 MiscUtil.PrintWarning("Number of atoms, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumAtoms\" parameter in \"-m, --mcsParams\" option." % (CoreNumAtoms, MCSParams["MinNumAtoms"])) 550 return None 551 552 if CoreNumBonds < MCSParams["MinNumBonds"]: 553 if not OptionsInfo["QuietMode"]: 554 MiscUtil.PrintWarning("Number of bonds, %d, in core scaffold identified by MCS is less than, %d, as specified by \"minNumBonds\" parameter in \"-m, --mcsParams\" option." % (CoreNumBonds, MCSParams["MinNumBonds"])) 555 return None 556 557 return GenerateCoreMol(RefMol, SMARTSCore) 558 559 def SetupCoreScaffoldBySMARTS(RefMol, Mol, MolCount): 560 """Setup a reference molecule core containing common scaffold atoms between 561 a pair of molecules using specified SMARTS.""" 562 563 if OptionsInfo["ScaffoldPatternMol"] is None: 564 OptionsInfo["ScaffoldPatternMol"] = Chem.MolFromSmarts(OptionsInfo["ScaffoldSMARTS"]) 565 566 if not Mol.HasSubstructMatch(OptionsInfo["ScaffoldPatternMol"]): 567 if not OptionsInfo["QuietMode"]: 568 MiscUtil.PrintWarning("The scaffold SMARTS pattern, %s, specified using \"-s, --scaffold\" option is missing in input molecule, %s." % (OptionsInfo["ScaffoldSMARTS"], RDKitUtil.GetMolName(Mol, MolCount))) 569 return None 570 571 return GenerateCoreMol(RefMol, OptionsInfo["ScaffoldSMARTS"]) 572 573 def GenerateCoreMol(RefMol, SMARTSCore): 574 """Generate core molecule for embedding. """ 575 576 # Create a molecule corresponding to core atoms... 577 SMARTSCoreMol = Chem.MolFromSmarts(SMARTSCore) 578 579 # Setup a ref molecule containing core atoms with dummy atoms as 580 # attachment points for atoms around the core atoms... 581 Core = AllChem.ReplaceSidechains(Chem.RemoveHs(RefMol), SMARTSCoreMol) 582 583 # Delete any substructures containing dummy atoms.. 584 RefMolCore = AllChem.DeleteSubstructs(Core, Chem.MolFromSmiles('*')) 585 RefMolCore.UpdatePropertyCache() 586 587 return RefMolCore 588 589 def ConstrainAndMinimizeMolecule(Psi4Handle, Mol, RefMolCore, MolNum = None): 590 """Constrain and minimize molecule.""" 591 592 # Add hydrogens... 593 Mol = Chem.AddHs(Mol, addCoords = True) 594 595 MolName = RDKitUtil.GetMolName(Mol, MolNum) 596 597 # Setup constrained conformers... 598 MolConfs, MolConfsStatus = ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum) 599 if not MolConfsStatus: 600 if not OptionsInfo["QuietMode"]: 601 MiscUtil.PrintWarning("Conformation generation couldn't be performed for molecule %s: Constrained embedding failed...\n" % MolName) 602 return (Mol, False, None, None) 603 604 # Minimize conformers... 605 ConfNums = [] 606 CalcEnergyMap = {} 607 MolConfsMap = {} 608 609 for ConfNum, MolConf in enumerate(MolConfs): 610 if not OptionsInfo["QuietMode"]: 611 MiscUtil.PrintInfo("\nProcessing molecule %s conformer number %s..." % (MolName, ConfNum)) 612 613 CalcStatus, Energy = ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, MolConf, RefMolCore, MolNum) 614 if not CalcStatus: 615 if not OptionsInfo["QuietMode"]: 616 MiscUtil.PrintWarning("Minimization couldn't be performed for molecule %s\n" % (MolName)) 617 return (Mol, False, None, None) 618 619 ConfNums.append(ConfNum) 620 CalcEnergyMap[ConfNum] = Energy 621 MolConfsMap[ConfNum] = MolConf 622 623 SortedConfNums = sorted(ConfNums, key = lambda ConfNum: CalcEnergyMap[ConfNum]) 624 MinEnergyConfNum = SortedConfNums[0] 625 626 MinEnergy = "%.*f" % (OptionsInfo["Precision"], CalcEnergyMap[MinEnergyConfNum]) if OptionsInfo["EnergyOut"] else None 627 MinEnergyMolConf = MolConfsMap[MinEnergyConfNum] 628 629 ScaffoldEmbedRMSD = "%.4f" % float(MinEnergyMolConf.GetProp('EmbedRMS')) if OptionsInfo["ScaffoldRMSDOut"] else None 630 MinEnergyMolConf.ClearProp('EmbedRMS') 631 632 return (MinEnergyMolConf, True, MinEnergy, ScaffoldEmbedRMSD) 633 634 def ConstrainAndMinimizeMoleculeUsingPsi4(Psi4Handle, Mol, RefMolCore, MolNum, ConfID = -1): 635 """Minimize molecule using Psi4.""" 636 637 # Setup a list for constrained atoms... 638 ConstrainedAtomIndices = SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore) 639 if len(ConstrainedAtomIndices) == 0: 640 return (False, None) 641 642 # Setup a Psi4Mol... 643 Psi4Mol = SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID) 644 if Psi4Mol is None: 645 return (False, None) 646 647 # Setup reference wave function... 648 Reference = SetupReferenceWavefunction(Mol) 649 Psi4Handle.set_options({'Reference': Reference}) 650 651 # Setup method name and basis set... 652 MethodName, BasisSet = SetupMethodNameAndBasisSet(Mol) 653 654 # Setup freeze list for constrained atoms... 655 FreezeXYZList = [("%s xyz" % AtomIdex) for AtomIdex in ConstrainedAtomIndices] 656 FreezeXYZString = " %s " % " ".join(FreezeXYZList) 657 Psi4Handle.set_options({"OPTKING__frozen_cartesian": FreezeXYZString}) 658 659 # Optimize geometry... 660 Status, Energy, WaveFunction = Psi4Util.PerformGeometryOptimization(Psi4Handle, Psi4Mol, MethodName, BasisSet, ReturnWaveFunction = True, Quiet = OptionsInfo["QuietMode"]) 661 662 if not Status: 663 PerformPsi4Cleanup(Psi4Handle) 664 return (False, None) 665 666 # Update atom positions... 667 AtomPositions = Psi4Util.GetAtomPositions(Psi4Handle, WaveFunction, InAngstroms = True) 668 RDKitUtil.SetAtomPositions(Mol, AtomPositions, ConfID = ConfID) 669 670 # Convert energy units... 671 if OptionsInfo["ApplyEnergyConversionFactor"]: 672 Energy = Energy * OptionsInfo["EnergyConversionFactor"] 673 674 # Clean up 675 PerformPsi4Cleanup(Psi4Handle) 676 677 return (True, Energy) 678 679 def ConstrainEmbedAndMinimizeMoleculeUsingRDKit(Mol, RefMolCore, MolNum = None): 680 """Constrain, embed, and minimize molecule.""" 681 682 # Setup forcefield function to use for constrained minimization... 683 ForceFieldFunction = None 684 ForceFieldName = None 685 if OptionsInfo["ConfGenerationParams"]["UseUFF"]: 686 ForceFieldFunction = lambda mol, confId = -1 : AllChem.UFFGetMoleculeForceField(mol, confId = confId) 687 ForeceFieldName = "UFF" 688 else: 689 ForceFieldFunction = lambda mol, confId = -1 : AllChem.MMFFGetMoleculeForceField(mol, AllChem.MMFFGetMoleculeProperties(mol, mmffVariant = OptionsInfo["ConfGenerationParams"]["MMFFVariant"]) , confId = confId) 690 ForeceFieldName = "MMFF" 691 692 if ForceFieldFunction is None: 693 if not OptionsInfo["QuietMode"]: 694 MiscUtil.PrintWarning("Failed to setup forcefield %s for molecule: %s\n" % (ForceFieldName, RDKitUtil.GetMolName(Mol, MolNum))) 695 return (None, False) 696 697 MaxConfs = OptionsInfo["ConfGenerationParams"]["MaxConfs"] 698 EnforceChirality = OptionsInfo["ConfGenerationParams"]["EnforceChirality"] 699 UseExpTorsionAnglePrefs = OptionsInfo["ConfGenerationParams"]["UseExpTorsionAnglePrefs"] 700 ETVersion = OptionsInfo["ConfGenerationParams"]["ETVersion"] 701 UseBasicKnowledge = OptionsInfo["ConfGenerationParams"]["UseBasicKnowledge"] 702 UseTethers = OptionsInfo["ConfGenerationParams"]["UseTethers"] 703 704 MolConfs = [] 705 ConfIDs = [ConfID for ConfID in range(0, MaxConfs)] 706 707 for ConfID in ConfIDs: 708 try: 709 MolConf = Chem.Mol(Mol) 710 AllChem.ConstrainedEmbed(MolConf, RefMolCore, useTethers = UseTethers, coreConfId = -1, randomseed = ConfID, getForceField = ForceFieldFunction, enforceChirality = EnforceChirality, useExpTorsionAnglePrefs = UseExpTorsionAnglePrefs, useBasicKnowledge = UseBasicKnowledge, ETversion = ETVersion) 711 except (ValueError, RuntimeError, Chem.rdchem.KekulizeException) as ErrMsg: 712 if not OptionsInfo["QuietMode"]: 713 MolName = RDKitUtil.GetMolName(Mol, MolNum) 714 MiscUtil.PrintWarning("Constrained embedding couldn't be performed for molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol, MolNum), ErrMsg)) 715 return (None, False) 716 717 MolConfs.append(MolConf) 718 719 return FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum) 720 721 def FilterConstrainedConformationsByRMSD(Mol, MolConfs, MolNum = None): 722 """Filter contarained conformations by RMSD.""" 723 724 EmbedRMSDCutoff = OptionsInfo["ConfGenerationParams"]["EmbedRMSDCutoff"] 725 if EmbedRMSDCutoff is None or EmbedRMSDCutoff <= 0: 726 if not OptionsInfo["QuietMode"]: 727 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: None; Size: %s" % (RDKitUtil.GetMolName(Mol, MolNum), len(MolConfs))) 728 return (MolConfs, True) 729 730 FirstMolConf = True 731 SelectedMolConfs = [] 732 for MolConfIndex, MolConf in enumerate(MolConfs): 733 if FirstMolConf: 734 FirstMolConf = False 735 SelectedMolConfs.append(MolConf) 736 continue 737 738 # Compare RMSD against all selected conformers... 739 ProbeMolConf = Chem.RemoveHs(Chem.Mol(MolConf)) 740 IgnoreConf = False 741 for SelectedMolConfIndex, SelectedMolConf in enumerate(SelectedMolConfs): 742 RefMolConf = Chem.RemoveHs(Chem.Mol(SelectedMolConf)) 743 CalcRMSD = rdMolAlign.AlignMol(ProbeMolConf, RefMolConf) 744 745 if CalcRMSD < EmbedRMSDCutoff: 746 IgnoreConf = True 747 break 748 749 if IgnoreConf: 750 continue 751 752 SelectedMolConfs.append(MolConf) 753 754 if not OptionsInfo["QuietMode"]: 755 MiscUtil.PrintInfo("\nGenerating initial ensemble of constrained conformations by distance geometry and forcefield for %s - EmbedRMSDCutoff: %s; Size: %s; Size after RMSD filtering: %s" % (RDKitUtil.GetMolName(Mol, MolNum), EmbedRMSDCutoff, len(MolConfs), len(SelectedMolConfs))) 756 757 return (SelectedMolConfs, True) 758 759 def SetupConstrainedAtomIndicesForPsi4(Mol, RefMolCore, ConstrainHydrogens = False): 760 """Setup a list of atom indices to be constrained during Psi4 minimizaiton.""" 761 762 AtomIndices = [] 763 764 # Collect matched heavy atoms along with attached hydrogens... 765 for AtomIndex in Mol.GetSubstructMatch(RefMolCore): 766 Atom = Mol.GetAtomWithIdx(AtomIndex) 767 if Atom.GetAtomicNum() == 1: 768 continue 769 770 AtomIndices.append(AtomIndex) 771 for AtomNbr in Atom.GetNeighbors(): 772 if AtomNbr.GetAtomicNum() == 1: 773 if ConstrainHydrogens: 774 AtomNbrIndex = AtomNbr.GetIdx() 775 AtomIndices.append(AtomNbrIndex) 776 777 AtomIndices = sorted(AtomIndices) 778 779 # Atom indices start from 1 for Psi4 instead 0 for RDKit... 780 AtomIndices = [ AtomIndex + 1 for AtomIndex in AtomIndices] 781 782 return AtomIndices 783 784 def SetupPsi4Mol(Psi4Handle, Mol, MolNum, ConfID = -1): 785 """Setup a Psi4 molecule object.""" 786 787 # Turn off recentering and reorientation to perform optimization in the 788 # constrained coordinate space... 789 MolGeometry = RDKitUtil.GetPsi4XYZFormatString(Mol, ConfID = ConfID, NoCom = True, NoReorient = True) 790 791 try: 792 Psi4Mol = Psi4Handle.geometry(MolGeometry) 793 except Exception as ErrMsg: 794 Psi4Mol = None 795 if not OptionsInfo["QuietMode"]: 796 MiscUtil.PrintWarning("Failed to create Psi4 molecule from geometry string: %s\n" % ErrMsg) 797 MolName = RDKitUtil.GetMolName(Mol, MolNum) 798 MiscUtil.PrintWarning("Ignoring molecule: %s" % MolName) 799 800 return Psi4Mol 801 802 def PerformPsi4Cleanup(Psi4Handle): 803 """Perform clean up.""" 804 805 # Clean up after Psi4 run... 806 Psi4Handle.core.clean() 807 808 # Clean up any leftover scratch files... 809 if OptionsInfo["MPMode"]: 810 Psi4Util.RemoveScratchFiles(Psi4Handle, OptionsInfo["Psi4RunParams"]["OutputFile"]) 811 812 def CheckAndValidateMolecule(Mol, MolCount = None): 813 """Validate molecule for Psi4 calculations.""" 814 815 if Mol is None: 816 if not OptionsInfo["QuietMode"]: 817 MiscUtil.PrintInfo("\nProcessing molecule number %s..." % MolCount) 818 return False 819 820 MolName = RDKitUtil.GetMolName(Mol, MolCount) 821 if not OptionsInfo["QuietMode"]: 822 MiscUtil.PrintInfo("\nProcessing molecule %s..." % MolName) 823 824 if RDKitUtil.IsMolEmpty(Mol): 825 if not OptionsInfo["QuietMode"]: 826 MiscUtil.PrintWarning("Ignoring empty molecule: %s\n" % MolName) 827 return False 828 829 if not RDKitUtil.ValidateElementSymbols(RDKitUtil.GetAtomSymbols(Mol)): 830 if not OptionsInfo["QuietMode"]: 831 MiscUtil.PrintWarning("Ignoring molecule containing invalid element symbols: %s\n" % MolName) 832 return False 833 834 return True 835 836 def SetupMethodNameAndBasisSet(Mol): 837 """Setup method name and basis set.""" 838 839 MethodName = OptionsInfo["MethodName"] 840 if OptionsInfo["MethodNameAuto"]: 841 MethodName = "B3LYP" 842 843 BasisSet = OptionsInfo["BasisSet"] 844 if OptionsInfo["BasisSetAuto"]: 845 BasisSet = "6-31+G**" if RDKitUtil.IsAtomSymbolPresentInMol(Mol, "S") else "6-31G**" 846 847 return (MethodName, BasisSet) 848 849 def SetupReferenceWavefunction(Mol): 850 """Setup reference wavefunction.""" 851 852 Reference = OptionsInfo["Reference"] 853 if OptionsInfo["ReferenceAuto"]: 854 Reference = 'UHF' if (RDKitUtil.GetSpinMultiplicity(Mol) > 1) else 'RHF' 855 856 return Reference 857 858 def SetupEnergyConversionFactor(Psi4Handle): 859 """Setup converstion factor for energt units. The Psi4 energy units are Hartrees.""" 860 861 EnergyUnits = OptionsInfo["EnergyUnits"] 862 863 ApplyConversionFactor = True 864 if re.match("^kcal\/mol$", EnergyUnits, re.I): 865 ConversionFactor = Psi4Handle.constants.hartree2kcalmol 866 elif re.match("^kJ\/mol$", EnergyUnits, re.I): 867 ConversionFactor = Psi4Handle.constants.hartree2kJmol 868 elif re.match("^eV$", EnergyUnits, re.I): 869 ConversionFactor = Psi4Handle.constants.hartree2ev 870 else: 871 ApplyConversionFactor = False 872 ConversionFactor = 1.0 873 874 OptionsInfo["ApplyEnergyConversionFactor"] = ApplyConversionFactor 875 OptionsInfo["EnergyConversionFactor"] = ConversionFactor 876 877 def WriteMolecule(Writer, Mol, MolNum = None, Energy = None, ScaffoldEmbedRMSD = None, ConfID = None,): 878 """Write molecule.""" 879 880 if ScaffoldEmbedRMSD is not None: 881 Mol.SetProp("CoreScaffoldEmbedRMSD", ScaffoldEmbedRMSD) 882 883 if OptionsInfo["EnergyOut"] and Energy is not None: 884 Mol.SetProp(OptionsInfo["EnergyDataFieldLabel"], Energy) 885 886 try: 887 if ConfID is None: 888 Writer.write(Mol) 889 else: 890 Writer.write(Mol, confId = ConfID) 891 except (ValueError, RuntimeError) as ErrMsg: 892 if not OptionsInfo["QuietMode"]: 893 MolName = RDKitUtil.GetMolName(Mol, MolNum) 894 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (MolName, ErrMsg)) 895 return False 896 897 return True 898 899 def ProcessMCSParameters(): 900 """Set up and process MCS parameters.""" 901 902 SetupMCSParameters() 903 ProcessSpecifiedMCSParameters() 904 905 def SetupMCSParameters(): 906 """Set up default MCS parameters.""" 907 908 OptionsInfo["MCSParams"] = {"MaximizeBonds": True, "Threshold": 0.9, "TimeOut": 3600, "Verbose": False, "MatchValences": True, "MatchChiralTag": False, "RingMatchesRingOnly": True, "CompleteRingsOnly": True, "AtomCompare": rdFMCS.AtomCompare.CompareElements, "BondCompare": rdFMCS.BondCompare.CompareOrder, "SeedSMARTS": "", "MinNumAtoms": 1, "MinNumBonds": 0} 909 910 def ProcessSpecifiedMCSParameters(): 911 """Process specified MCS parameters.""" 912 913 if re.match("^auto$", OptionsInfo["SpecifiedMCSParams"], re.I): 914 # Nothing to process... 915 return 916 917 # Parse specified parameters... 918 MCSParams = re.sub(" ", "", OptionsInfo["SpecifiedMCSParams"]) 919 if not MCSParams: 920 MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-m, --mcsParams\" option.") 921 922 MCSParamsWords = MCSParams.split(",") 923 if len(MCSParamsWords) % 2: 924 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-m, --mcsParams\" option must be an even number." % (len(MCSParamsWords))) 925 926 # Setup canonical parameter names... 927 ValidParamNames = [] 928 CanonicalParamNamesMap = {} 929 for ParamName in sorted(OptionsInfo["MCSParams"]): 930 ValidParamNames.append(ParamName) 931 CanonicalParamNamesMap[ParamName.lower()] = ParamName 932 933 # Validate and set paramater names and value... 934 for Index in range(0, len(MCSParamsWords), 2): 935 Name = MCSParamsWords[Index] 936 Value = MCSParamsWords[Index + 1] 937 938 CanonicalName = Name.lower() 939 if not CanonicalName in CanonicalParamNamesMap: 940 MiscUtil.PrintError("The parameter name, %s, specified using \"-m, --mcsParams\" option is not a valid name. Supported parameter names: %s" % (Name, " ".join(ValidParamNames))) 941 942 ParamName = CanonicalParamNamesMap[CanonicalName] 943 if re.match("^Threshold$", ParamName, re.I): 944 Value = float(Value) 945 if Value <= 0.0 or Value > 1.0 : 946 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0 and <= 1.0" % (Value, Name)) 947 ParamValue = Value 948 elif re.match("^Timeout$", ParamName, re.I): 949 Value = int(Value) 950 if Value <= 0: 951 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: > 0" % (Value, Name)) 952 ParamValue = Value 953 elif re.match("^MinNumAtoms$", ParamName, re.I): 954 Value = int(Value) 955 if Value < 1: 956 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >= 1" % (Value, Name)) 957 ParamValue = Value 958 elif re.match("^MinNumBonds$", ParamName, re.I): 959 Value = int(Value) 960 if Value < 0: 961 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: >=0 " % (Value, Name)) 962 ParamValue = Value 963 elif re.match("^AtomCompare$", ParamName, re.I): 964 if re.match("^CompareAny$", Value, re.I): 965 ParamValue = rdFMCS.AtomCompare.CompareAny 966 elif re.match("^CompareElements$", Value, re.I): 967 ParamValue = Chem.rdFMCS.AtomCompare.CompareElements 968 elif re.match("^CompareIsotopes$", Value, re.I): 969 ParamValue = Chem.rdFMCS.AtomCompare.CompareIsotopes 970 else: 971 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareElements CompareIsotopes" % (Value, Name)) 972 elif re.match("^BondCompare$", ParamName, re.I): 973 if re.match("^CompareAny$", Value, re.I): 974 ParamValue = Chem.rdFMCS.BondCompare.CompareAny 975 elif re.match("^CompareOrder$", Value, re.I): 976 ParamValue = rdFMCS.BondCompare.CompareOrder 977 elif re.match("^CompareOrderExact$", Value, re.I): 978 ParamValue = rdFMCS.BondCompare.CompareOrderExact 979 else: 980 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: CompareAny CompareOrder CompareOrderExact" % (Value, Name)) 981 elif re.match("^SeedSMARTS$", ParamName, re.I): 982 if not len(Value): 983 MiscUtil.PrintError("The parameter value specified using \"-m, --mcsParams\" option for parameter, %s, is empty. " % (Name)) 984 ParamValue = Value 985 else: 986 if not re.match("^(Yes|No|True|False)$", Value, re.I): 987 MiscUtil.PrintError("The parameter value, %s, specified using \"-m, --mcsParams\" option for parameter, %s, is not a valid value. Supported values: Yes No True False" % (Value, Name)) 988 ParamValue = False 989 if re.match("^(Yes|True)$", Value, re.I): 990 ParamValue = True 991 992 # Set value... 993 OptionsInfo["MCSParams"][ParamName] = ParamValue 994 995 def ProcessOptions(): 996 """Process and validate command line arguments and options.""" 997 998 MiscUtil.PrintInfo("Processing options...") 999 1000 # Validate options... 1001 ValidateOptions() 1002 1003 OptionsInfo["Infile"] = Options["--infile"] 1004 OptionsInfo["SMILESInfileStatus"] = True if MiscUtil.CheckFileExt(Options["--infile"], "smi csv tsv txt") else False 1005 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 1006 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 1007 1008 OptionsInfo["RefFile"] = Options["--reffile"] 1009 1010 OptionsInfo["Outfile"] = Options["--outfile"] 1011 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"]) 1012 1013 OptionsInfo["Overwrite"] = Options["--overwrite"] 1014 1015 OptionsInfo["Scaffold"] = Options["--scaffold"] 1016 if re.match("^auto$", Options["--scaffold"], re.I): 1017 UseScaffoldMCS = True 1018 UseScaffoldSMARTS = False 1019 ScaffoldSMARTS = None 1020 else: 1021 UseScaffoldMCS = False 1022 UseScaffoldSMARTS = True 1023 ScaffoldSMARTS = OptionsInfo["Scaffold"] 1024 1025 OptionsInfo["UseScaffoldMCS"] = UseScaffoldMCS 1026 OptionsInfo["UseScaffoldSMARTS"] = UseScaffoldSMARTS 1027 OptionsInfo["ScaffoldSMARTS"] = ScaffoldSMARTS 1028 OptionsInfo["ScaffoldPatternMol"] = None 1029 1030 OptionsInfo["SpecifiedMCSParams"] = Options["--mcsParams"] 1031 ProcessMCSParameters() 1032 1033 OptionsInfo["ScaffoldRMSDOut"] = True if re.match("^yes$", Options["--scaffoldRMSDOut"], re.I) else False 1034 1035 # Method, basis set, and reference wavefunction... 1036 OptionsInfo["BasisSet"] = Options["--basisSet"] 1037 OptionsInfo["BasisSetAuto"] = True if re.match("^auto$", Options["--basisSet"], re.I) else False 1038 1039 OptionsInfo["MethodName"] = Options["--methodName"] 1040 OptionsInfo["MethodNameAuto"] = True if re.match("^auto$", Options["--methodName"], re.I) else False 1041 1042 OptionsInfo["Reference"] = Options["--reference"] 1043 OptionsInfo["ReferenceAuto"] = True if re.match("^auto$", Options["--reference"], re.I) else False 1044 1045 # Run and options parameters... 1046 OptionsInfo["Psi4OptionsParams"] = Psi4Util.ProcessPsi4OptionsParameters("--psi4OptionsParams", Options["--psi4OptionsParams"]) 1047 OptionsInfo["Psi4RunParams"] = Psi4Util.ProcessPsi4RunParameters("--psi4RunParams", Options["--psi4RunParams"], InfileName = OptionsInfo["Infile"]) 1048 1049 # Conformer generation paramaters... 1050 ParamsDefaultInfoOverride = {"MaxConfs": 50} 1051 OptionsInfo["ConfGenerationParams"] = MiscUtil.ProcessOptionConformerParameters("--confParams", Options["--confParams"], ParamsDefaultInfoOverride) 1052 1053 # Write energy parameters... 1054 OptionsInfo["EnergyOut"] = True if re.match("^yes$", Options["--energyOut"], re.I) else False 1055 OptionsInfo["EnergyUnits"] = Options["--energyUnits"] 1056 1057 EnergyDataFieldLabel = Options["--energyDataFieldLabel"] 1058 if re.match("^auto$", EnergyDataFieldLabel, re.I): 1059 EnergyDataFieldLabel = "Psi4_Energy (%s)" % Options["--energyUnits"] 1060 OptionsInfo["EnergyDataFieldLabel"] = EnergyDataFieldLabel 1061 1062 OptionsInfo["MaxIters"] = int(Options["--maxIters"]) 1063 1064 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 1065 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 1066 1067 # Multiprocessing level... 1068 MPLevelMoleculesMode = False 1069 MPLevelConformersMode = False 1070 MPLevel = Options["--mpLevel"] 1071 if re.match("^Molecules$", MPLevel, re.I): 1072 MPLevelMoleculesMode = True 1073 elif re.match("^Conformers$", MPLevel, re.I): 1074 MPLevelConformersMode = True 1075 else: 1076 MiscUtil.PrintError("The value, %s, specified for option \"--mpLevel\" is not valid. " % MPLevel) 1077 OptionsInfo["MPLevel"] = MPLevel 1078 OptionsInfo["MPLevelMoleculesMode"] = MPLevelMoleculesMode 1079 OptionsInfo["MPLevelConformersMode"] = MPLevelConformersMode 1080 1081 OptionsInfo["Precision"] = int(Options["--precision"]) 1082 OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False 1083 1084 def RetrieveOptions(): 1085 """Retrieve command line arguments and options.""" 1086 1087 # Get options... 1088 global Options 1089 Options = docopt(_docoptUsage_) 1090 1091 # Set current working directory to the specified directory... 1092 WorkingDir = Options["--workingdir"] 1093 if WorkingDir: 1094 os.chdir(WorkingDir) 1095 1096 # Handle examples option... 1097 if "--examples" in Options and Options["--examples"]: 1098 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1099 sys.exit(0) 1100 1101 def ValidateOptions(): 1102 """Validate option values.""" 1103 1104 MiscUtil.ValidateOptionTextValue("--energyOut", Options["--energyOut"], "yes no") 1105 MiscUtil.ValidateOptionTextValue("--energyUnits", Options["--energyUnits"], "Hartrees kcal/mol kJ/mol eV") 1106 1107 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1108 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv") 1109 1110 MiscUtil.ValidateOptionFilePath("-r, --reffile", Options["--reffile"]) 1111 MiscUtil.ValidateOptionFileExt("-r, --reffile", Options["--reffile"], "sdf sd mol") 1112 1113 MiscUtil.ValidateOptionIntegerValue("--maxIters", Options["--maxIters"], {">": 0}) 1114 1115 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1116 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1117 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1118 1119 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1120 MiscUtil.ValidateOptionTextValue("--mpLevel", Options["--mpLevel"], "Molecules Conformers") 1121 1122 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 1123 MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no") 1124 1125 MiscUtil.ValidateOptionTextValue("--scaffoldRMSDOut", Options["--scaffoldRMSDOut"], "yes no") 1126 1127 # Setup a usage string for docopt... 1128 _docoptUsage_ = """ 1129 Psi4PerformConstrainedMinimization.py - Perform constrained minimization 1130 1131 Usage: 1132 Psi4PerformConstrainedMinimization.py [--basisSet <text>] [--confParams <Name,Value,...>] [--energyOut <yes or no>] 1133 [--energyDataFieldLabel <text>] [--energyUnits <text>] [--infileParams <Name,Value,...>] 1134 [--maxIters <number>] [--methodName <text>] [--mcsParams <Name,Value,...>] 1135 [--mp <yes or no>] [--mpLevel <Molecules or Conformers>] [--mpParams <Name, Value,...>] 1136 [ --outfileParams <Name,Value,...> ] [--overwrite] [--precision <number> ] 1137 [--psi4OptionsParams <Name,Value,...>] [--psi4RunParams <Name,Value,...>] 1138 [--quiet <yes or no>] [--reference <text>] [--scaffold <auto or SMARTS>] 1139 [--scaffoldRMSDOut <yes or no>] [-w <dir>] -i <infile> -r <reffile> -o <outfile> 1140 Psi4PerformConstrainedMinimization.py -h | --help | -e | --examples 1141 1142 Description: 1143 Generate 3D structures for molecules by performing a constrained energy 1144 minimization against a reference molecule. The 3D structures for molecules 1145 are generated using a combination of distance geometry and forcefield 1146 minimization followed by geometry optimization using a quantum chemistry 1147 method. 1148 1149 An initial set of 3D conformers are generated for input molecules using 1150 distance geometry. A common core scaffold, corresponding to a Maximum 1151 Common Substructure (MCS) or an explicit SMARTS pattern, is identified 1152 between a pair of input and reference molecules. The core scaffold atoms in 1153 input molecules are aligned against the same atoms in the reference molecule. 1154 The energy of aligned structures are sequentially minimized using the forcefield 1155 and a quantum chemistry method. The conformation with the lowest energy is 1156 selected to represent the final structure. 1157 1158 A Psi4 XYZ format geometry string is automatically generated for each molecule 1159 in input file. It contains atom symbols and 3D coordinates for each atom in a 1160 molecule. In addition, the formal charge and spin multiplicity are present in the 1161 the geometry string. These values are either retrieved from molecule properties 1162 named 'FormalCharge' and 'SpinMultiplicty' or dynamically calculated for a 1163 molecule. 1164 1165 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 1166 .csv, .tsv, .txt) 1167 1168 The supported output file formats are: SD (.sdf, .sd) 1169 1170 Options: 1171 -b, --basisSet <text> [default: auto] 1172 Basis set to use for constrained energy minimization. Default: 6-31+G** 1173 for sulfur containing molecules; Otherwise, 6-31G** [ Ref 150 ]. The specified 1174 value must be a valid Psi4 basis set. No validation is performed. 1175 1176 The following list shows a representative sample of basis sets available 1177 in Psi4: 1178 1179 STO-3G, 6-31G, 6-31+G, 6-31++G, 6-31G*, 6-31+G*, 6-31++G*, 1180 6-31G**, 6-31+G**, 6-31++G**, 6-311G, 6-311+G, 6-311++G, 1181 6-311G*, 6-311+G*, 6-311++G*, 6-311G**, 6-311+G**, 6-311++G**, 1182 cc-pVDZ, cc-pCVDZ, aug-cc-pVDZ, cc-pVDZ-DK, cc-pCVDZ-DK, def2-SVP, 1183 def2-SVPD, def2-TZVP, def2-TZVPD, def2-TZVPP, def2-TZVPPD 1184 1185 --confParams <Name,Value,...> [default: auto] 1186 A comma delimited list of parameter name and value pairs for generating 1187 initial 3D coordinates for molecules in input file. A common core scaffold is 1188 identified between a pair of input and reference molecules. The atoms in 1189 common core scaffold of input molecules are aligned against the reference 1190 molecule followed by constrained energy minimization using forcefield 1191 available in RDKit. The 3D structures are subsequently constrained and 1192 minimized by a quantum chemistry method available in Psi4. 1193 1194 The supported parameter names along with their default values are shown 1195 below: 1196 1197 confMethod,ETKDGv2, 1198 forceField,MMFF, forceFieldMMFFVariant,MMFF94, 1199 enforceChirality,yes,embedRMSDCutoff,0.5,maxConfs,50, 1200 useTethers,yes 1201 1202 confMethod,ETKDGv2 [ Possible values: SDG, KDG, ETDG, 1203 ETKDG , or ETKDGv2] 1204 forceField, MMFF [ Possible values: UFF or MMFF ] 1205 forceFieldMMFFVariant,MMFF94 [ Possible values: MMFF94 or MMFF94s ] 1206 enforceChirality,yes [ Possible values: yes or no ] 1207 useTethers,yes [ Possible values: yes or no ] 1208 1209 confMethod: Conformation generation methodology for generating initial 3D 1210 coordinates. Possible values: Standard Distance Geometry (SDG), Experimental 1211 Torsion-angle preference with Distance Geometry (ETDG), basic Knowledge-terms 1212 with Distance Geometry (KDG) and Experimental Torsion-angle preference 1213 along with basic Knowledge-terms with Distance Geometry (ETKDG or 1214 ETKDGv2) [Ref 129, 167] . 1215 1216 forceField: Forcefield method to use for constrained energy minimization. 1217 Possible values: Universal Force Field (UFF) [ Ref 81 ] or Merck Molecular 1218 Mechanics Force Field [ Ref 83-87 ] . 1219 1220 enforceChirality: Enforce chirality for defined chiral centers during 1221 forcefield minimization. 1222 1223 maxConfs: Maximum number of conformations to generate for each molecule 1224 during the generation of an initial 3D conformation ensemble using conformation 1225 generation methodology. The conformations are constrained and minimized using 1226 the specified forcefield and a quantum chemistry method. The lowest energy 1227 conformation is written to the output file. 1228 1229 embedRMSDCutoff: RMSD cutoff for retaining initial set of conformers embedded 1230 using distance geometry and forcefield minimization. All embedded conformers 1231 are kept for 'None' value. Otherwise, only those conformers which are different 1232 from each other by the specified RMSD cutoff, 0.5 by default, are kept. The first 1233 embedded conformer is always retained. 1234 1235 useTethers: Use tethers to optimize the final embedded conformation by 1236 applying a series of extra forces to align matching atoms to the positions of 1237 the core atoms. Otherwise, use simple distance constraints during the 1238 optimization. 1239 --energyOut <yes or no> [default: yes] 1240 Write out energy values. 1241 --energyDataFieldLabel <text> [default: auto] 1242 Energy data field label for writing energy values. Default: Psi4_Energy (<Units>). 1243 --energyUnits <text> [default: kcal/mol] 1244 Energy units. Possible values: Hartrees, kcal/mol, kJ/mol, or eV. 1245 -e, --examples 1246 Print examples. 1247 -h, --help 1248 Print this help message. 1249 -i, --infile <infile> 1250 Input file name. 1251 --infileParams <Name,Value,...> [default: auto] 1252 A comma delimited list of parameter name and value pairs for reading 1253 molecules from files. The supported parameter names for different file 1254 formats, along with their default values, are shown below: 1255 1256 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1257 1258 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 1259 smilesTitleLine,auto,sanitize,yes 1260 1261 Possible values for smilesDelimiter: space, comma or tab. 1262 --maxIters <number> [default: 50] 1263 Maximum number of iterations to perform for each molecule or conformer 1264 during energy minimization by a quantum chemistry method. 1265 -m, --methodName <text> [default: auto] 1266 Method to use for constrained energy minimization. Default: B3LYP [ Ref 150 ]. 1267 The specified value must be a valid Psi4 method name. No validation is 1268 performed. 1269 1270 The following list shows a representative sample of methods available 1271 in Psi4: 1272 1273 B1LYP, B2PLYP, B2PLYP-D3BJ, B2PLYP-D3MBJ, B3LYP, B3LYP-D3BJ, 1274 B3LYP-D3MBJ, CAM-B3LYP, CAM-B3LYP-D3BJ, HF, HF-D3BJ, HF3c, M05, 1275 M06, M06-2x, M06-HF, M06-L, MN12-L, MN15, MN15-D3BJ,PBE, PBE0, 1276 PBEH3c, PW6B95, PW6B95-D3BJ, WB97, WB97X, WB97X-D, WB97X-D3BJ 1277 1278 --mcsParams <Name,Value,...> [default: auto] 1279 Parameter values to use for identifying a maximum common substructure 1280 (MCS) in between a pair of reference and input molecules.In general, it is a 1281 comma delimited list of parameter name and value pairs. The supported 1282 parameter names along with their default values are shown below: 1283 1284 atomCompare,CompareElements,bondCompare,CompareOrder, 1285 maximizeBonds,yes,matchValences,yes,matchChiralTag,no, 1286 minNumAtoms,1,minNumBonds,0,ringMatchesRingOnly,yes, 1287 completeRingsOnly,yes,threshold,1.0,timeOut,3600,seedSMARTS,none 1288 1289 Possible values for atomCompare: CompareAny, CompareElements, 1290 CompareIsotopes. Possible values for bondCompare: CompareAny, 1291 CompareOrder, CompareOrderExact. 1292 1293 A brief description of MCS parameters taken from RDKit documentation is 1294 as follows: 1295 1296 atomCompare - Controls match between two atoms 1297 bondCompare - Controls match between two bonds 1298 maximizeBonds - Maximize number of bonds instead of atoms 1299 matchValences - Include atom valences in the MCS match 1300 matchChiralTag - Include atom chirality in the MCS match 1301 minNumAtoms - Minimum number of atoms in the MCS match 1302 minNumBonds - Minimum number of bonds in the MCS match 1303 ringMatchesRingOnly - Ring bonds only match other ring bonds 1304 completeRingsOnly - Partial rings not allowed during the match 1305 threshold - Fraction of the dataset that must contain the MCS 1306 seedSMARTS - SMARTS string as the seed of the MCS 1307 timeout - Timeout for the MCS calculation in seconds 1308 1309 --mp <yes or no> [default: no] 1310 Use multiprocessing. 1311 1312 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1313 function employing lazy RDKit data iterable. This allows processing of 1314 arbitrary large data sets without any additional requirements memory. 1315 1316 All input data may be optionally loaded into memory by mp.Pool.map() 1317 before starting worker processes in a process pool by setting the value 1318 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1319 1320 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1321 data mode may adversely impact the performance. The '--mpParams' section 1322 provides additional information to tune the value of 'chunkSize'. 1323 --mpLevel <Molecules or Conformers> [default: Molecules] 1324 Perform multiprocessing at molecules or conformers level. Possible values: 1325 Molecules or Conformers. The 'Molecules' value starts a process pool at the 1326 molecules level. All conformers of a molecule are processed in a single 1327 process. The 'Conformers' value, however, starts a process pool at the 1328 conformers level. Each conformer of a molecule is processed in an individual 1329 process in the process pool. The default Psi4 'OutputFile' is set to 'quiet' 1330 using '--psi4RunParams' for 'Conformers' level. Otherwise, it may generate 1331 a large number of Psi4 output files. 1332 --mpParams <Name,Value,...> [default: auto] 1333 A comma delimited list of parameter name and value pairs to configure 1334 multiprocessing. 1335 1336 The supported parameter names along with their default and possible 1337 values are shown below: 1338 1339 chunkSize, auto 1340 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 1341 numProcesses, auto [ Default: mp.cpu_count() ] 1342 1343 These parameters are used by the following functions to configure and 1344 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 1345 mp.Pool.imap(). 1346 1347 The chunkSize determines chunks of input data passed to each worker 1348 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 1349 The default value of chunkSize is dependent on the value of 'inputDataMode'. 1350 1351 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 1352 automatically converts RDKit data iterable into a list, loads all data into 1353 memory, and calculates the default chunkSize using the following method 1354 as shown in its code: 1355 1356 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 1357 if extra: chunkSize += 1 1358 1359 For example, the default chunkSize will be 7 for a pool of 4 worker processes 1360 and 100 data items. 1361 1362 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 1363 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 1364 data into memory. Consequently, the size of input data is not known a priori. 1365 It's not possible to estimate an optimal value for the chunkSize. The default 1366 chunkSize is set to 1. 1367 1368 The default value for the chunkSize during 'Lazy' data mode may adversely 1369 impact the performance due to the overhead associated with exchanging 1370 small chunks of data. It is generally a good idea to explicitly set chunkSize to 1371 a larger value during 'Lazy' input data mode, based on the size of your input 1372 data and number of processes in the process pool. 1373 1374 The mp.Pool.map() function waits for all worker processes to process all 1375 the data and return the results. The mp.Pool.imap() function, however, 1376 returns the the results obtained from worker processes as soon as the 1377 results become available for specified chunks of data. 1378 1379 The order of data in the results returned by both mp.Pool.map() and 1380 mp.Pool.imap() functions always corresponds to the input data. 1381 -o, --outfile <outfile> 1382 Output file name. 1383 --outfileParams <Name,Value,...> [default: auto] 1384 A comma delimited list of parameter name and value pairs for writing 1385 molecules to files. The supported parameter names for different file 1386 formats, along with their default values, are shown below: 1387 1388 SD: kekulize,yes,forceV3000,no 1389 1390 --overwrite 1391 Overwrite existing files. 1392 --precision <number> [default: 6] 1393 Floating point precision for writing energy values. 1394 --psi4OptionsParams <Name,Value,...> [default: none] 1395 A comma delimited list of Psi4 option name and value pairs for setting 1396 global and module options. The names are 'option_name' for global options 1397 and 'module_name__option_name' for options local to a module. The 1398 specified option names must be valid Psi4 names. No validation is 1399 performed. 1400 1401 The specified option name and value pairs are processed and passed to 1402 psi4.set_options() as a dictionary. The supported value types are float, 1403 integer, boolean, or string. The float value string is converted into a float. 1404 The valid values for a boolean string are yes, no, true, false, on, or off. 1405 --psi4RunParams <Name,Value,...> [default: auto] 1406 A comma delimited list of parameter name and value pairs for configuring 1407 Psi4 jobs. 1408 1409 The supported parameter names along with their default and possible 1410 values are shown below: 1411 1412 MemoryInGB, 1 1413 NumThreads, 1 1414 OutputFile, auto [ Possible values: stdout, quiet, or FileName ] 1415 ScratchDir, auto [ Possivle values: DirName] 1416 RemoveOutputFile, yes [ Possible values: yes, no, true, or false] 1417 1418 These parameters control the runtime behavior of Psi4. 1419 1420 The default file name for 'OutputFile' is <InFileRoot>_Psi4.out. The PID 1421 is appended to output file name during multiprocessing as shown: 1422 <InFileRoot>_Psi4_<PIDNum>.out. The 'stdout' value for 'OutputType' 1423 sends Psi4 output to stdout. The 'quiet' or 'devnull' value suppresses 1424 all Psi4 output. The 'OutputFile' is set to 'quiet' for 'auto' value during 1425 'Conformers' of '--mpLevel' option. 1426 1427 The default 'Yes' value of 'RemoveOutputFile' option forces the removal 1428 of any existing Psi4 before creating new files to append output from 1429 multiple Psi4 runs. 1430 1431 The option 'ScratchDir' is a directory path to the location of scratch 1432 files. The default value corresponds to Psi4 default. It may be used to 1433 override the deafult path. 1434 -q, --quiet <yes or no> [default: no] 1435 Use quiet mode. The warning and information messages will not be printed. 1436 -r, --reffile <reffile> 1437 Reference input file name containing a 3D reference molecule. A common 1438 core scaffold must be present in a pair of an input and reference molecules. 1439 Otherwise, no constrained minimization is performed on the input molecule. 1440 --reference <text> [default: auto] 1441 Reference wave function to use for energy calculation. Default: RHF or UHF. 1442 The default values are Restricted Hartree-Fock (RHF) for closed-shell molecules 1443 with all electrons paired and Unrestricted Hartree-Fock (UHF) for open-shell 1444 molecules with unpaired electrons. 1445 1446 The specified value must be a valid Psi4 reference wave function. No validation 1447 is performed. For example: ROHF, CUHF, RKS, etc. 1448 1449 The spin multiplicity determines the default value of reference wave function 1450 for input molecules. It is calculated from number of free radical electrons using 1451 Hund's rule of maximum multiplicity defined as 2S + 1 where S is the total 1452 electron spin. The total spin is 1/2 the number of free radical electrons in a 1453 molecule. The value of 'SpinMultiplicity' molecule property takes precedence 1454 over the calculated value of spin multiplicity. 1455 -s, --scaffold <auto or SMARTS> [default: auto] 1456 Common core scaffold between a pair of input and reference molecules used for 1457 constrained minimization of molecules in input file. Possible values: Auto or a 1458 valid SMARTS pattern. The common core scaffold is automatically detected 1459 corresponding to the Maximum Common Substructure (MCS) between a pair of 1460 reference and input molecules. A valid SMARTS pattern may be optionally specified 1461 for the common core scaffold. 1462 --scaffoldRMSDOut <yes or no> [default: No] 1463 Write out RMSD value for common core alignment between a pair of input and 1464 reference molecules. 1465 -w, --workingdir <dir> 1466 Location of working directory which defaults to the current directory. 1467 1468 Examples: 1469 To perform constrained energy minimization for molecules in a SMILES file against 1470 a reference 3D molecule in a SD file using a common core scaffold between pairs of 1471 input and reference molecules identified using MCS, generating up to 50 conformations 1472 using ETKDGv2 methodology followed by initial MMFF forcefield minimization and final 1473 energy minimization using B3LYP/6-31G** and B3LYP/6-31+G** for non-sulfur and 1474 sulfur containing molecules, and write out a SD file containing minimum energy 1475 structure corresponding to each constrained molecule, type: 1476 1477 % Psi4PerformConstrainedMinimization.py -i Psi4SampleAlkanes.smi 1478 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf 1479 1480 To run the first example in a quiet mode and write out a SD file, type: 1481 1482 % Psi4PerformConstrainedMinimization.py -q yes 1483 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1484 -o Psi4SampleAlkanesOut.sdf 1485 1486 To run the first example in multiprocessing mode at molecules level on all 1487 available CPUs without loading all data into memory and write out a SD file, 1488 type: 1489 1490 % Psi4PerformConstrainedMinimization.py --mp yes 1491 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1492 -o Psi4SampleAlkanesOut.sdf 1493 1494 To run the first example in multiprocessing mode at conformers level on all 1495 available CPUs without loading all data into memory and write out a SD file, 1496 type: 1497 1498 % Psi4PerformConstrainedMinimization.py --mp yes --mpLevel Conformers 1499 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1500 -o Psi4SampleAlkanesOut.sdf 1501 1502 To run the first example in multiprocessing mode at molecules level on all 1503 available CPUs by loading all data into memory and write out a SD file, type: 1504 1505 % Psi4PerformConstrainedMinimization.py --mp yes --mpParams 1506 "inputDataMode,Lazy,numProcesses,4,chunkSize,8" 1507 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1508 -o Psi4SampleAlkanesOut.sdf 1509 1510 To rerun the first example using an explicit SMARTS string for a common core 1511 scaffold and write out a SD file, type: 1512 1513 % Psi4PerformConstrainedMinimization.py --scaffold "CC" 1514 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1515 -o Psi4SampleAlkanesOut.sdf 1516 1517 To run the first example using a specific set of parameters for generating 1518 an initial set of conformers followed by energy minimization using forcefield 1519 and a quantum chemistry method and write out a SD file type: 1520 1521 % Psi4PerformConstrainedMinimization.py --confParams " 1522 confMethod,ETKDGv2,forceField,MMFF, forceFieldMMFFVariant,MMFF94s, 1523 maxConfs,20,embedRMSDCutoff,0.5" --energyUnits "kJ/mol" -m B3LYP 1524 -b "6-31+G**" --maxIters 20 -i Psi4SampleAlkanes.smi -r Psi4SampleEthane3D.sdf 1525 -o Psi4SampleAlkanesOut.sdf 1526 1527 To run the first example for molecules in a CSV SMILES file, SMILES strings 1528 in column 1, name column 2, and write out a SD file, type: 1529 1530 % Psi4PerformConstrainedMinimization.py --infileParams 1531 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1, 1532 smilesNameColumn,2" -i Psi4SampleAlkanes.csv 1533 -r Psi4SampleEthane3D.sdf -o Psi4SampleAlkanesOut.sdf 1534 1535 Author: 1536 1537 Manish Sud(msud@san.rr.com) 1538 1539 See also: 1540 Psi4CalculateEnergy.py, Psi4CalculatePartialCharges.py, Psi4GenerateConformers.py, 1541 Psi4GenerateConstrainedConformers.py, Psi4PerformMinimization.py 1542 1543 Copyright: 1544 Copyright (C) 2025 Manish Sud. All rights reserved. 1545 1546 The functionality available in this script is implemented using Psi4, an 1547 open source quantum chemistry software package, and RDKit, an open 1548 source toolkit for cheminformatics developed by Greg Landrum. 1549 1550 This file is part of MayaChemTools. 1551 1552 MayaChemTools is free software; you can redistribute it and/or modify it under 1553 the terms of the GNU Lesser General Public License as published by the Free 1554 Software Foundation; either version 3 of the License, or (at your option) any 1555 later version. 1556 1557 """ 1558 1559 if __name__ == "__main__": 1560 main()