1 #!/bin/env python 2 # 3 # File: RDKitFilterTorsionStrainEnergyAlerts.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Collaborator: Pat Walters 7 # 8 # Copyright (C) 2024 Manish Sud. All rights reserved. 9 # 10 # This script uses the torsion strain energy library developed by Gu, S.; 11 # Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ]. 12 # 13 # The torsion strain enegy library is based on the Torsion Library jointly 14 # developed by the University of Hamburg, Center for Bioinformatics, 15 # Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland. 16 # 17 # The functionality available in this script is implemented using RDKit, an 18 # open source toolkit for cheminformatics developed by Greg Landrum. 19 # 20 # This file is part of MayaChemTools. 21 # 22 # MayaChemTools is free software; you can redistribute it and/or modify it under 23 # the terms of the GNU Lesser General Public License as published by the Free 24 # Software Foundation; either version 3 of the License, or (at your option) any 25 # later version. 26 # 27 # MayaChemTools is distributed in the hope that it will be useful, but without 28 # any warranty; without even the implied warranty of merchantability of fitness 29 # for a particular purpose. See the GNU Lesser General Public License for more 30 # details. 31 # 32 # You should have received a copy of the GNU Lesser General Public License 33 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 34 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 35 # Boston, MA, 02111-1307, USA. 36 # 37 38 from __future__ import print_function 39 40 # Add local python path to the global path and import standard library modules... 41 import os 42 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 43 import time 44 import re 45 import glob 46 import multiprocessing as mp 47 import math 48 49 # RDKit imports... 50 try: 51 from rdkit import rdBase 52 from rdkit import Chem 53 from rdkit.Chem import rdMolTransforms 54 except ImportError as ErrMsg: 55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 57 sys.exit(1) 58 59 # MayaChemTools imports... 60 try: 61 from docopt import docopt 62 import MiscUtil 63 import RDKitUtil 64 from TorsionAlerts.TorsionStrainEnergyAlerts import TorsionStrainEnergyAlerts 65 except ImportError as ErrMsg: 66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 68 sys.exit(1) 69 70 ScriptName = os.path.basename(sys.argv[0]) 71 Options = {} 72 OptionsInfo = {} 73 74 def main(): 75 """Start execution of the script.""" 76 77 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 78 79 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 80 81 # Retrieve command line arguments and options... 82 RetrieveOptions() 83 84 if Options["--list"]: 85 # Handle listing of torsion library information... 86 ProcessListTorsionLibraryOption() 87 else: 88 # Process and validate command line arguments and options... 89 ProcessOptions() 90 91 # Perform actions required by the script... 92 PerformFiltering() 93 94 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 95 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 96 97 def PerformFiltering(): 98 """Filter molecules using SMARTS torsion rules in the torsion strain energy 99 library file.""" 100 101 # Setup a molecule reader... 102 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"]) 103 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 104 105 MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols) 106 107 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount) 108 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 109 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount) 110 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount)) 111 112 MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount) 113 MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount)) 114 115 def ProcessMolecules(Mols): 116 """Process and filter molecules.""" 117 118 if OptionsInfo["MPMode"]: 119 return ProcessMoleculesUsingMultipleProcesses(Mols) 120 else: 121 return ProcessMoleculesUsingSingleProcess(Mols) 122 123 def ProcessMoleculesUsingSingleProcess(Mols): 124 """Process and filter molecules using a single process.""" 125 126 # Instantiate torsion strain energy alerts class... 127 TorsionStrainEnergyAlertsHandle = InstantiateTorsionStrainEnergyAlertsClass() 128 129 MiscUtil.PrintInfo("\nFiltering molecules...") 130 131 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"] 132 133 # Set up writers... 134 OutfilesWriters = SetupOutfilesWriters() 135 136 WriterRemaining = OutfilesWriters["WriterRemaining"] 137 WriterFiltered = OutfilesWriters["WriterFiltered"] 138 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"] 139 140 # Initialize alerts summary info... 141 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo() 142 143 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5 144 for Mol in Mols: 145 MolCount += 1 146 147 if Mol is None: 148 continue 149 150 if RDKitUtil.IsMolEmpty(Mol): 151 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount)) 152 continue 153 154 # Check for 3D flag... 155 if not Mol.GetConformer().Is3D(): 156 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount)) 157 continue 158 159 ValidMolCount += 1 160 161 # Identify torsion library alerts for rotatable bonds.. 162 RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionStrainEnergyAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol) 163 164 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo) 165 166 # Write out filtered and remaining molecules... 167 WriteStatus = True 168 if RotBondsAlertsStatus: 169 if OutfileFilteredMode: 170 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo) 171 if WriteStatus: 172 FilteredMolWriteCount += 1 173 else: 174 RemainingMolCount += 1 175 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo) 176 177 if not WriteStatus: 178 WriteFailedCount += 1 179 180 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo) 181 CloseOutfilesWriters(OutfilesWriters) 182 183 if FilteredMolWriteCount: 184 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo) 185 186 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount) 187 188 def ProcessMoleculesUsingMultipleProcesses(Mols): 189 """Process and filter molecules using multiprocessing.""" 190 191 MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...") 192 193 MPParams = OptionsInfo["MPParams"] 194 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"] 195 196 # Instantiate torsion strain energy alerts class to list torsion library information... 197 TorsionStrainEnergyAlertsHandle = InstantiateTorsionStrainEnergyAlertsClass() 198 199 # Set up writers... 200 OutfilesWriters = SetupOutfilesWriters() 201 202 WriterRemaining = OutfilesWriters["WriterRemaining"] 203 WriterFiltered = OutfilesWriters["WriterFiltered"] 204 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"] 205 206 # Initialize alerts summary info... 207 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo() 208 209 # Setup data for initializing a worker process... 210 MiscUtil.PrintInfo("\nEncoding options info and rotatable bond pattern molecule...") 211 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo)) 212 213 # Setup a encoded mols data iterable for a worker process... 214 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols) 215 216 # Setup process pool along with data initialization for each process... 217 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")) 218 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]))) 219 220 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs) 221 222 # Start processing... 223 if re.match("^Lazy$", MPParams["InputDataMode"], re.I): 224 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 225 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I): 226 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"]) 227 else: 228 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"])) 229 230 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5 231 for Result in Results: 232 MolCount += 1 233 MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result 234 235 if EncodedMol is None: 236 continue 237 ValidMolCount += 1 238 239 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 240 241 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo) 242 243 # Write out filtered and remaining molecules... 244 WriteStatus = True 245 if RotBondsAlertsStatus: 246 if OutfileFilteredMode: 247 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo) 248 if WriteStatus: 249 FilteredMolWriteCount += 1 250 else: 251 RemainingMolCount += 1 252 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo) 253 254 if not WriteStatus: 255 WriteFailedCount += 1 256 257 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo) 258 CloseOutfilesWriters(OutfilesWriters) 259 260 if FilteredMolWriteCount: 261 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo) 262 263 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount) 264 265 def InitializeWorkerProcess(*EncodedArgs): 266 """Initialize data for a worker process.""" 267 268 global Options, OptionsInfo 269 270 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid()) 271 272 # Decode Options and OptionInfo... 273 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0]) 274 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1]) 275 276 # Instantiate torsion strain energy alerts class... 277 OptionsInfo["TorsionStrainEnergyAlertsHandle"] = InstantiateTorsionStrainEnergyAlertsClass(Quiet = True) 278 279 def WorkerProcess(EncodedMolInfo): 280 """Process data for a worker process.""" 281 282 MolIndex, EncodedMol = EncodedMolInfo 283 284 if EncodedMol is None: 285 return [MolIndex, None, False, None] 286 287 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol) 288 if RDKitUtil.IsMolEmpty(Mol): 289 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 290 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName) 291 return [MolIndex, None, False, None] 292 293 # Check for 3D flag... 294 if not Mol.GetConformer().Is3D(): 295 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1)) 296 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName) 297 return [MolIndex, None, False, None] 298 299 # Identify torsion library alerts for rotatable bonds.. 300 TorsionStrainEnergyAlertsHandle = OptionsInfo["TorsionStrainEnergyAlertsHandle"] 301 RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionStrainEnergyAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol) 302 303 return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo] 304 305 def InitializeTorsionAlertsSummaryInfo(): 306 """Initialize torsion alerts summary.""" 307 308 if OptionsInfo["CountMode"]: 309 return None 310 311 if not OptionsInfo["TrackAlertsSummaryInfo"]: 312 return None 313 314 TorsionAlertsSummaryInfo = {} 315 TorsionAlertsSummaryInfo["RuleIDs"] = [] 316 317 for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "EnergyMethod", "MaxSingleEnergyAlertTypes", "MaxSingleEnergyAlertTypesMolCount"]: 318 TorsionAlertsSummaryInfo[DataLabel] = {} 319 320 return TorsionAlertsSummaryInfo 321 322 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo): 323 """Track torsion alerts summary information for matched torsion rules in a 324 molecule.""" 325 326 if OptionsInfo["CountMode"]: 327 return 328 329 if not OptionsInfo["TrackAlertsSummaryInfo"]: 330 return 331 332 if RotBondsAlertsInfo is None: 333 return 334 335 MolAlertsInfo = {} 336 MolAlertsInfo["RuleIDs"] = [] 337 MolAlertsInfo["MaxSingleEnergyAlertTypes"] = {} 338 339 for ID in RotBondsAlertsInfo["IDs"]: 340 if not RotBondsAlertsInfo["MatchStatus"][ID]: 341 continue 342 343 if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo): 344 continue 345 346 MaxSingleEnergyAlertType = SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]) 347 348 TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID] 349 TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID] 350 351 # Track data for torsion alert summary information across molecules... 352 if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]: 353 TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID) 354 TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID 355 356 TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS 357 TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassName"][ID] 358 TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassName"][ID] 359 360 TorsionAlertsSummaryInfo["EnergyMethod"][TorsionRuleNodeID] = RotBondsAlertsInfo["EnergyMethod"][ID] 361 362 # Initialize number of alert types across all molecules... 363 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {} 364 365 # Initialize number of molecules flagged by each alert type... 366 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID] = {} 367 368 if MaxSingleEnergyAlertType not in TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]: 369 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0 370 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0 371 372 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1 373 374 # Track data for torsion alert information in a molecule... 375 if TorsionRuleNodeID not in MolAlertsInfo["MaxSingleEnergyAlertTypes"]: 376 MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID) 377 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {} 378 379 if MaxSingleEnergyAlertType not in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]: 380 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0 381 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1 382 383 # Track number of molecules flagged by a specific torsion alert... 384 for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]: 385 for MaxSingleEnergyAlertType in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]: 386 if MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType]: 387 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1 388 389 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo): 390 """Write out torsion alerts summary informatio to a CSV file.""" 391 392 if OptionsInfo["CountMode"]: 393 return 394 395 if not OptionsInfo["OutfileSummaryMode"]: 396 return 397 398 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0: 399 return 400 401 # Write headers... 402 QuoteValues = True 403 Values = ["TorsionRule", "HierarchyClass", "HierarchySubClass", "EnergyMethod", "MaxSingleEnergyTorsionAlertTypes", "MaxSingleEnergyTorsionAlertCount", "MaxSingleEnergyTorsionAlertMolCount"] 404 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues)) 405 406 SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo) 407 408 # Write alerts information... 409 for ID in SortedRuleIDs: 410 # Remove any double quotes in SMARTS... 411 RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID] 412 RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I) 413 414 HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID] 415 HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID] 416 417 EnergyMethod = TorsionAlertsSummaryInfo["EnergyMethod"][ID] 418 419 MaxSingleEnergyAlertTypes = [] 420 MaxSingleEnergyAlertTypesCount = [] 421 MaxSingleEnergyAlertTypesMolCount = [] 422 for MaxSingleEnergyAlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]): 423 MaxSingleEnergyAlertTypes.append(MaxSingleEnergyAlertType) 424 MaxSingleEnergyAlertTypesCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID][MaxSingleEnergyAlertType]) 425 MaxSingleEnergyAlertTypesMolCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][MaxSingleEnergyAlertType]) 426 427 Values = [RuleSMARTS, HierarchyClassName, HierarchySubClassName, EnergyMethod, "%s" % MiscUtil.JoinWords(MaxSingleEnergyAlertTypes, ","), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesCount, ",")), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesMolCount, ","))] 428 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues)) 429 430 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo): 431 """Sort torsion rule IDs by alert types molecule count in descending order.""" 432 433 SortedRuleIDs = [] 434 435 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"] 436 if len(RuleIDs) == 0: 437 return SortedRuleIDs 438 439 # Setup a map from AlertTypesMolCount to IDs for sorting alerts... 440 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"] 441 MolCountMap = {} 442 for ID in RuleIDs: 443 MolCount = 0 444 for AlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]): 445 MolCount += TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][AlertType] 446 MolCountMap[ID] = MolCount 447 448 SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True) 449 450 return SortedRuleIDs 451 452 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo): 453 """Write out torsion alerts SD files for individual torsion rules.""" 454 455 if OptionsInfo["CountMode"]: 456 return 457 458 if not OptionsInfo["OutfilesFilteredByRulesMode"]: 459 return 460 461 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0: 462 return 463 464 # Setup a molecule reader for filtered molecules... 465 FilteredMols = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"]) 466 467 # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 468 TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo) 469 470 # Setup writers... 471 ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs) 472 473 for Mol in FilteredMols: 474 # Retrieve torsion alerts info... 475 TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo) 476 if TorsionAlertsInfo is None: 477 continue 478 479 for TorsionRuleID in TorsionRuleIDs: 480 if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]: 481 continue 482 483 WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo) 484 485 CloseByRuleOutfilesWriters(ByRuleOutfilesWriters) 486 487 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo): 488 """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules.""" 489 490 # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to 491 # the least number of unique molecules... 492 RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo) 493 494 # Select torsion rule IDs for writing out SD files... 495 if not OptionsInfo["OutfilesFilteredByRulesAllMode"]: 496 MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"] 497 if MaxRuleIDs < len(RuleIDs): 498 RuleIDs = RuleIDs[0:MaxRuleIDs] 499 500 return RuleIDs 501 502 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo): 503 """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds.""" 504 505 TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"] 506 TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None 507 508 if TorsionAlerts is None or len(TorsionAlerts) == 0: 509 return None 510 511 # Initialize for tracking by rule IDs... 512 TorsionAlertsInfo = {} 513 TorsionAlertsInfo["RuleIDs"] = [] 514 515 TorsionAlertsInfo["RuleSMARTS"] = {} 516 TorsionAlertsInfo["HierarchyClassName"] = {} 517 TorsionAlertsInfo["HierarchySubClassName"] = {} 518 TorsionAlertsInfo["EnergyMethod"] = {} 519 520 TorsionAlertsInfo["AtomIndices"] = {} 521 TorsionAlertsInfo["TorsionAtomIndices"] = {} 522 TorsionAlertsInfo["TorsionAngle"] = {} 523 524 TorsionAlertsInfo["Energy"] = {} 525 TorsionAlertsInfo["EnergyLowerBound"] = {} 526 TorsionAlertsInfo["EnergyUpperBound"] = {} 527 528 TorsionAlertsInfo["AngleNotObserved"] = {} 529 TorsionAlertsInfo["MaxSingleEnergyAlertType"] = {} 530 531 TorsionAlertsInfo["AnglesNotObservedCount"] = {} 532 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"] = {} 533 534 ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"] 535 TorsionAlertsSetSize = 12 536 537 TorsionAlertsWords = TorsionAlerts.split() 538 if len(TorsionAlertsWords) % TorsionAlertsSetSize: 539 MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize)) 540 541 ID = 0 542 for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize): 543 ID += 1 544 545 RotBondIndices, TorsionIndices, TorsionAngle, Energy, EnergyLowerBound, EnergyUpperBound, HierarchyClass, HierarchySubClass, TorsionRule, EnergyMethod, AngleNotObserved, MaxSingleEnergyAlertType = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize] 546 RotBondIndices = RotBondIndices.split(ValuesDelimiter) 547 TorsionIndices = TorsionIndices.split(ValuesDelimiter) 548 549 if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]: 550 MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule) 551 continue 552 TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule] 553 554 # Track data for torsion alerts in a molecule... 555 if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]: 556 TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID) 557 558 TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule 559 TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass 560 TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass 561 TorsionAlertsInfo["EnergyMethod"][TorsionRuleNodeID] = EnergyMethod 562 563 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = [] 564 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = [] 565 TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID] = [] 566 567 TorsionAlertsInfo["Energy"][TorsionRuleNodeID] = [] 568 TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID] = [] 569 TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID] = [] 570 TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID] = [] 571 TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID] = [] 572 573 TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] = 0 574 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] = 0 575 576 # Track multiple values for a rule ID... 577 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices) 578 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices) 579 TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID].append(TorsionAngle) 580 581 TorsionAlertsInfo["Energy"][TorsionRuleNodeID].append(Energy) 582 TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID].append(EnergyLowerBound) 583 TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID].append(EnergyUpperBound) 584 TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID].append(AngleNotObserved) 585 586 TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID].append(MaxSingleEnergyAlertType) 587 588 # Count angles not observer for a rule ID... 589 if AngleNotObserved == 'Yes': 590 TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] += 1 591 592 # Count max single energy alert for a rule ID... 593 if MaxSingleEnergyAlertType == 'Yes': 594 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] += 1 595 596 return TorsionAlertsInfo 597 598 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo): 599 """Write out molecule.""" 600 601 if OptionsInfo["CountMode"]: 602 return True 603 604 SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo) 605 606 try: 607 Writer.write(Mol) 608 except Exception as ErrMsg: 609 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg)) 610 return False 611 612 return True 613 614 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo): 615 """Setup molecule properties containing alerts information for rotatable bonds.""" 616 617 if not OptionsInfo["OutfileAlerts"]: 618 return 619 620 SDFieldIDsToLabels = OptionsInfo["SDFieldIDsToLabels"] 621 Precision = OptionsInfo["Precision"] 622 623 # Setup rotatable bonds count... 624 RotBondsCount = 0 625 if RotBondsAlertsInfo is not None: 626 RotBondsCount = len(RotBondsAlertsInfo["IDs"]) 627 Mol.SetProp(SDFieldIDsToLabels["RotBondsCountLabel"], "%s" % RotBondsCount) 628 629 if RotBondsAlertsInfo is not None: 630 # Setup total energy along with lower and upper bounds... 631 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergy"], Precision)) 632 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLowerBoundCILabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyLowerBound"], Precision)) 633 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyUpperBoundCILabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyUpperBound"], Precision)) 634 635 # Setup max single energy and alert count... 636 if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]: 637 Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyLabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["MaxSingleEnergy"], Precision)) 638 Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyAlertsCountLabel"], "%s" % ("NA" if RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"] is None else RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"])) 639 640 Mol.SetProp(SDFieldIDsToLabels["AnglesNotObservedCountLabel"], "%s" % ("NA" if RotBondsAlertsInfo["AnglesNotObservedCount"] is None else RotBondsAlertsInfo["AnglesNotObservedCount"])) 641 642 643 # Setup alert information for rotatable bonds... 644 AlertsInfoValues = [] 645 646 # Delimiter for multiple values corresponding to specific set of information for 647 # a rotatable bond. For example: TorsionAtomIndices 648 ValuesDelim = OptionsInfo["IntraSetValuesDelim"] 649 650 # Delimiter for various values for a rotatable bond... 651 RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"] 652 653 # Delimiter for values corresponding to multiple rotatable bonds... 654 AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"] 655 656 if RotBondsAlertsInfo is not None: 657 for ID in RotBondsAlertsInfo["IDs"]: 658 if not RotBondsAlertsInfo["MatchStatus"][ID]: 659 continue 660 661 if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo): 662 continue 663 664 RotBondValues = [] 665 666 # Bond atom indices... 667 Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]] 668 RotBondValues.append(ValuesDelim.join(Values)) 669 670 # Torsion atom indices... 671 TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim) 672 RotBondValues.append(TorsionAtomIndices) 673 674 # Torsion angle... 675 RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngle"][ID]) 676 677 # Energy along with its lower and upper bound confidence interval... 678 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["Energy"][ID], Precision)) 679 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyLowerBound"][ID], Precision)) 680 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyUpperBound"][ID], Precision)) 681 682 # Hierarchy class and subclass names... 683 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassName"][ID]) 684 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassName"][ID]) 685 686 # Torsion rule SMARTS... 687 RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]) 688 689 # Energy method... 690 RotBondValues.append("%s" % RotBondsAlertsInfo["EnergyMethod"][ID]) 691 692 # Angle not observed... 693 RotBondValues.append("%s" % SetupAngleNotObservedValue(RotBondsAlertsInfo["AngleNotObserved"][ID])) 694 695 # Max single energy alert status... 696 RotBondValues.append("%s" % SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID])) 697 698 # Track joined values for a rotatable bond... 699 AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues)) 700 701 if len(AlertsInfoValues): 702 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues))) 703 704 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo): 705 """Write out molecule.""" 706 707 if OptionsInfo["CountMode"]: 708 return 709 710 SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo) 711 712 Writer.write(Mol) 713 714 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo): 715 """Setup molecule properties containing alerts information for torsion alerts 716 fileted by Rule IDs.""" 717 718 # Delete torsion alerts information for rotatable bonds... 719 if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]): 720 Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]) 721 722 # Delimiter for values... 723 IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"] 724 InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"] 725 726 # Setup alert rule information... 727 AlertRuleInfoValues = [] 728 729 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID]) 730 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID]) 731 732 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID]) 733 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["EnergyMethod"][TorsionRuleID]) 734 735 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues))) 736 737 # Setup max single energy alert count for torsion rule... 738 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxSingleEnergyAlertsCountLabel"], "%s" % TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleID]) 739 740 # Setup angle not observed count for torsion rule... 741 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAnglesNotObservedCountLabel"], "%s" % TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleID]) 742 743 # Setup torsion rule alerts... 744 # "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod AngleNotObserved MaxSingleEnergyAlert) 745 AlertsInfoValues = [] 746 for Index in range(0, len(TorsionAlertsInfo["AtomIndices"][TorsionRuleID])): 747 RotBondInfoValues = [] 748 749 # Bond atom indices... 750 Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]] 751 RotBondInfoValues.append(IntraSetValuesDelim.join(Values)) 752 753 # Torsion atom indices retrieved from the filtered SD file and stored as strings... 754 Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]] 755 RotBondInfoValues.append(IntraSetValuesDelim.join(Values)) 756 757 # Torsion angle... 758 RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngle"][TorsionRuleID][Index]) 759 760 # Energy and its bounds... 761 RotBondInfoValues.append(TorsionAlertsInfo["Energy"][TorsionRuleID][Index]) 762 RotBondInfoValues.append(TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleID][Index]) 763 RotBondInfoValues.append(TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleID][Index]) 764 765 # Angle not observed...... 766 RotBondInfoValues.append(TorsionAlertsInfo["AngleNotObserved"][TorsionRuleID][Index]) 767 768 # Max single energy alert type... 769 RotBondInfoValues.append(TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleID][Index]) 770 771 # Track alerts informaiton... 772 AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues)) 773 774 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"], "%s" % (InterSetValuesDelim.join(AlertsInfoValues))) 775 776 def SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo): 777 """Skip rotatble bond alert info for a specific bond during writing to output files.""" 778 779 if not OptionsInfo["OutfileAlertsOnly"]: 780 return False 781 782 if RotBondsAlertsInfo["RotBondsAlertsStatus"] is None: 783 return True 784 785 Status = False 786 if OptionsInfo["TotalEnergyMode"]: 787 if not RotBondsAlertsInfo["RotBondsAlertsStatus"]: 788 Status = True 789 elif OptionsInfo["MaxSingleEnergyMode"]: 790 if RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] is None or not RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]: 791 Status = True 792 elif OptionsInfo["TotalOrMaxSingleEnergyMode"]: 793 if not RotBondsAlertsInfo["RotBondsAlertsStatus"]: 794 Status = True 795 796 return Status 797 798 def SetupEnergyValueForSDField(Value, Precision): 799 """Setup energy value for SD field.""" 800 801 if Value is None or math.isnan(Value) or math.isinf(Value): 802 return "NA" 803 804 return "%.*f" % (Precision, Value) 805 806 def SetupAngleNotObservedValue(Value): 807 """Setup angle not observed value.""" 808 809 if Value is None: 810 return "NA" 811 812 return "Yes" if Value else "No" 813 814 def SetupMaxSingleEnergyAlertStatusValue(Value): 815 """Setup max single energy alert status value.""" 816 817 if Value is None: 818 return "NA" 819 820 return "Yes" if Value else "No" 821 822 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim): 823 """Setup torsion atom indices value for output files.""" 824 825 Values = ["%s" % Value for Value in TorsionAtomIndicesList] 826 827 return ValuesDelim.join(Values) 828 829 def SetupOutfilesWriters(): 830 """Setup molecule and summary writers.""" 831 832 OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None} 833 834 # Writers for SD files... 835 WriterRemaining, WriterFiltered = SetupMoleculeWriters() 836 OutfilesWriters["WriterRemaining"] = WriterRemaining 837 OutfilesWriters["WriterFiltered"] = WriterFiltered 838 839 # Writer for alert summary CSV file... 840 WriterAlertSummary = SetupAlertSummaryWriter() 841 OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary 842 843 return OutfilesWriters 844 845 def SetupMoleculeWriters(): 846 """Setup molecule writers.""" 847 848 Writer = None 849 WriterFiltered = None 850 851 if OptionsInfo["CountMode"]: 852 return (Writer, WriterFiltered) 853 854 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"]) 855 if Writer is None: 856 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"]) 857 MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"]) 858 859 if OptionsInfo["OutfileFilteredMode"]: 860 WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"]) 861 if WriterFiltered is None: 862 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"]) 863 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"]) 864 865 return (Writer, WriterFiltered) 866 867 def SetupAlertSummaryWriter(): 868 """Setup a alert summary writer.""" 869 870 Writer = None 871 872 if OptionsInfo["CountMode"]: 873 return Writer 874 875 if not OptionsInfo["OutfileSummaryMode"]: 876 return Writer 877 878 Outfile = OptionsInfo["OutfileSummary"] 879 Writer = open(Outfile, "w") 880 if Writer is None: 881 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 882 883 MiscUtil.PrintInfo("Generating file %s..." % Outfile) 884 885 return Writer 886 887 def CloseOutfilesWriters(OutfilesWriters): 888 """Close outfile writers.""" 889 890 for WriterType, Writer in OutfilesWriters.items(): 891 if Writer is not None: 892 Writer.close() 893 894 def SetupByRuleOutfilesWriters(RuleIDs): 895 """Setup by rule outfiles writers.""" 896 897 # Initialize... 898 OutfilesWriters = {} 899 for RuleID in RuleIDs: 900 OutfilesWriters[RuleID] = None 901 902 if OptionsInfo["CountMode"]: 903 return OutfilesWriters 904 905 if not OptionsInfo["OutfilesFilteredByRulesMode"]: 906 return OutfilesWriters 907 908 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 909 OutfilesRoot = "%s_Filtered_TopRule" % FileName 910 OutfilesExt = "sdf" 911 912 MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"] 913 MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt)) 914 915 # Delete any existing output files... 916 Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt)) 917 if len(Outfiles): 918 MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt)) 919 for Outfile in Outfiles: 920 try: 921 os.remove(Outfile) 922 except Exception as ErrMsg: 923 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg) 924 925 RuleIndex = 0 926 for RuleID in RuleIDs: 927 RuleIndex += 1 928 Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt) 929 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 930 if Writer is None: 931 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 932 933 OutfilesWriters[RuleID] = Writer 934 935 return OutfilesWriters 936 937 def CloseByRuleOutfilesWriters(OutfilesWriters): 938 """Close by rule outfile writers.""" 939 940 for RuleID, Writer in OutfilesWriters.items(): 941 if Writer is not None: 942 Writer.close() 943 944 def InstantiateTorsionStrainEnergyAlertsClass(Quiet = False): 945 """Initialize torsion strain energy alerts class.""" 946 947 try: 948 TorsionStrainEnergyAlertsHandle = TorsionStrainEnergyAlerts(AlertsMode = OptionsInfo["AlertsMode"], TotalEnergyCutoff = OptionsInfo["TotalEnergyCutoff"], MaxSingleEnergyCutoff = OptionsInfo["MaxSingleEnergyCutoff"], RotBondsSMARTSMode = OptionsInfo["RotBondsSMARTSMode"], RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPattern"], TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"], AlertTorsionsNotObserved = OptionsInfo["FilterTorsionsNotObserved"]) 949 except Exception as ErrMsg: 950 MiscUtil.PrintError("Failed to instantiate TorsionStrainEnergyAlerts:\n%s\n" % (ErrMsg)) 951 952 if not Quiet: 953 MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionStrainEnergyAlertsHandle.GetTorsionLibraryFilePath()) 954 TorsionStrainEnergyAlertsHandle.ListTorsionLibraryInfo() 955 956 return TorsionStrainEnergyAlertsHandle 957 958 def ProcessRotatableBondsSMARTSMode(): 959 """"Process SMARTS pattern for rotatable bonds.""" 960 961 RotBondsMode = OptionsInfo["RotBondsSMARTSMode"] 962 963 RotBondsSMARTSPattern = None 964 RotBondsSMARTSPatternSpecified = OptionsInfo["RotBondsSMARTSPatternSpecified"] 965 966 if re.match("^(NonStrict|SemiStrict|Strict)$", RotBondsMode, re.I): 967 RotBondsSMARTSPattern = None 968 elif re.match("Specify", RotBondsMode, re.I): 969 RotBondsSMARTSPatternSpecified = RotBondsSMARTSPatternSpecified.strip() 970 if not len(RotBondsSMARTSPatternSpecified): 971 MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode) 972 973 RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPatternSpecified) 974 if RotBondsPatternMol is None: 975 MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPatternSpecified)) 976 else: 977 MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode) 978 979 OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern 980 981 def ProcessSDFieldLabelsOption(): 982 """Process SD data field label option.""" 983 984 ParamsOptionName = "--outfileSDFieldLabels" 985 ParamsOptionValue = Options["--outfileSDFieldLabels"] 986 987 ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TotalEnergyLabel": "TotalEnergy", "TotalEnergyLowerBoundCILabel": "TotalEnergyLowerBoundCI", "TotalEnergyUpperBoundCILabel": "TotalEnergyUpperBoundCI", "MaxSingleEnergyLabel": "MaxSingleEnergy", "MaxSingleEnergyAlertsCountLabel": "MaxSingleEnergyAlertsCount", "AnglesNotObservedCountLabel": "AnglesNotObservedCount", "TorsionAlertsLabel": "TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass HierarchySubClass TorsionRule EnergyMethod AngleNotObserved MaxSingleEnergyAlert)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionRule EnergyMethod)", "TorsionRuleMaxSingleEnergyAlertsCountLabel": "TorsionRuleMaxSingleEnergyAlertsCount", "TorsionRuleAnglesNotObservedCountLabel": "TorsionRuleAnglesNotObservedCount", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved MaxSingleEnergyAlert)"} 988 989 if re.match("^auto$", ParamsOptionValue, re.I): 990 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels 991 return 992 993 # Setup a canonical paramater names... 994 ValidParamNames = [] 995 CanonicalParamNamesMap = {} 996 for ParamName in sorted(ParamsIDsToLabels): 997 ValidParamNames.append(ParamName) 998 CanonicalParamNamesMap[ParamName.lower()] = ParamName 999 1000 ParamsOptionValue = ParamsOptionValue.strip() 1001 if not ParamsOptionValue: 1002 PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName) 1003 1004 ParamsOptionValueWords = ParamsOptionValue.split(",") 1005 if len(ParamsOptionValueWords) % 2: 1006 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName)) 1007 1008 # Validate paramater name and value pairs... 1009 for Index in range(0, len(ParamsOptionValueWords), 2): 1010 Name = ParamsOptionValueWords[Index].strip() 1011 Value = ParamsOptionValueWords[Index + 1].strip() 1012 1013 CanonicalName = Name.lower() 1014 if not CanonicalName in CanonicalParamNamesMap: 1015 MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames))) 1016 1017 ParamName = CanonicalParamNamesMap[CanonicalName] 1018 ParamValue = Value 1019 1020 # Set value... 1021 ParamsIDsToLabels[ParamName] = ParamValue 1022 1023 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels 1024 1025 def ProcessOptions(): 1026 """Process and validate command line arguments and options.""" 1027 1028 MiscUtil.PrintInfo("Processing options...") 1029 1030 # Validate options... 1031 ValidateOptions() 1032 1033 TotalEnergyMode, MaxSingleEnergyMode, TotalOrMaxSingleEnergyMode = [False] * 3 1034 AlertsMode = Options["--alertsMode"] 1035 if re.match("^TotalEnergy$", AlertsMode, re.I): 1036 TotalEnergyMode = True 1037 elif re.match("^MaxSingleEnergy$", AlertsMode, re.I): 1038 MaxSingleEnergyMode = True 1039 elif re.match("^TotalOrMaxSingleEnergy$", AlertsMode, re.I): 1040 TotalOrMaxSingleEnergyMode = True 1041 OptionsInfo["AlertsMode"] = AlertsMode 1042 OptionsInfo["TotalEnergyMode"] = TotalEnergyMode 1043 OptionsInfo["MaxSingleEnergyMode"] = MaxSingleEnergyMode 1044 OptionsInfo["TotalOrMaxSingleEnergyMode"] = TotalOrMaxSingleEnergyMode 1045 1046 OptionsInfo["FilterTorsionsNotObserved"] = True if re.match("^yes$", Options["--filterTorsionsNotObserved"], re.I) else False 1047 1048 OptionsInfo["MaxSingleEnergyCutoff"] = float(Options["--alertsMaxSingleEnergyCutoff"]) 1049 OptionsInfo["TotalEnergyCutoff"] = float(Options["--alertsTotalEnergyCutoff"]) 1050 1051 OptionsInfo["Infile"] = Options["--infile"] 1052 ParamsDefaultInfoOverride = {"RemoveHydrogens": False} 1053 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride) 1054 1055 OptionsInfo["Outfile"] = Options["--outfile"] 1056 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) 1057 1058 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 1059 OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt) 1060 OptionsInfo["OutfileFiltered"] = OutfileFiltered 1061 OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False 1062 1063 OptionsInfo["OutfileSummary"] = "%s_AlertsSummary.csv" % (FileName) 1064 1065 OutfileSummaryMode = Options["--outfileSummary"] 1066 if re.match("^auto$", OutfileSummaryMode, re.I): 1067 OutfileSummaryMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no' 1068 OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", OutfileSummaryMode, re.I) else False 1069 1070 if re.match("^yes$", Options["--outfileSummary"], re.I): 1071 if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I): 1072 MiscUtil.PrintError("The value \"%s\" specified for \"--outfileSummary\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"])) 1073 1074 OutfilesFilteredByRulesMode = Options["--outfilesFilteredByRules"] 1075 if re.match("^auto$", OutfilesFilteredByRulesMode, re.I): 1076 OutfilesFilteredByRulesMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no' 1077 OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", OutfilesFilteredByRulesMode, re.I) else False 1078 1079 if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I): 1080 if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I): 1081 MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"])) 1082 1083 OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False 1084 1085 OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"] 1086 if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I): 1087 OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount) 1088 OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount 1089 OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False 1090 1091 OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False 1092 1093 if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I): 1094 if not re.match("^yes$", Options["--outfileAlerts"], re.I): 1095 MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"])) 1096 1097 OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"] 1098 OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False 1099 1100 ProcessSDFieldLabelsOption() 1101 1102 OptionsInfo["Overwrite"] = Options["--overwrite"] 1103 OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False 1104 1105 OptionsInfo["Precision"] = int(Options["--precision"]) 1106 1107 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False 1108 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"]) 1109 1110 OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"] 1111 OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"] 1112 ProcessRotatableBondsSMARTSMode() 1113 1114 OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"] 1115 1116 # Setup delimiter for writing out torsion alert information to output files... 1117 OptionsInfo["IntraSetValuesDelim"] = "," 1118 OptionsInfo["InterSetValuesDelim"] = " " 1119 1120 def RetrieveOptions(): 1121 """Retrieve command line arguments and options.""" 1122 1123 # Get options... 1124 global Options 1125 Options = docopt(_docoptUsage_) 1126 1127 # Set current working directory to the specified directory... 1128 WorkingDir = Options["--workingdir"] 1129 if WorkingDir: 1130 os.chdir(WorkingDir) 1131 1132 # Handle examples option... 1133 if "--examples" in Options and Options["--examples"]: 1134 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1135 sys.exit(0) 1136 1137 def ProcessListTorsionLibraryOption(): 1138 """Process list torsion library information.""" 1139 1140 # Validate and process dataFile option for listing torsion library information... 1141 OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"] 1142 if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I): 1143 MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"]) 1144 1145 # Instantiate TorsionStrainEnergyAlerts using defaults... 1146 TorsionStrainEnergyAlertsHandle = TorsionStrainEnergyAlerts(TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"]) 1147 MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionStrainEnergyAlertsHandle.GetTorsionLibraryFilePath()) 1148 TorsionStrainEnergyAlertsHandle.ListTorsionLibraryInfo() 1149 1150 def ValidateOptions(): 1151 """Validate option values.""" 1152 1153 MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "TotalEnergy MaxSingleEnergy TotalOrMaxSingleEnergy") 1154 1155 MiscUtil.ValidateOptionFloatValue("--alertsMaxSingleEnergyCutoff", Options["--alertsMaxSingleEnergyCutoff"], {">": 0.0}) 1156 MiscUtil.ValidateOptionFloatValue("--alertsTotalEnergyCutoff", Options["--alertsTotalEnergyCutoff"], {">": 0.0}) 1157 1158 MiscUtil.ValidateOptionTextValue("--filterTorsionsNotObserved", Options["--filterTorsionsNotObserved"], "yes no") 1159 1160 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1161 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol") 1162 1163 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd") 1164 if re.match("^filter$", Options["--mode"], re.I): 1165 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1166 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1167 1168 MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no") 1169 1170 MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no auto") 1171 if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I): 1172 MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0}) 1173 1174 MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no auto") 1175 MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no") 1176 MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly") 1177 1178 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count") 1179 if re.match("^filter$", Options["--mode"], re.I): 1180 if not Options["--outfile"]: 1181 MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option") 1182 1183 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no") 1184 1185 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0}) 1186 1187 MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify") 1188 if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I): 1189 if not Options["--rotBondsSMARTSPattern"]: 1190 MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option") 1191 else: 1192 if Options["--rotBondsSMARTSPattern"]: 1193 MiscUtil.PrintError("The SMARTS pattern must not be specified using \"--rotBondsSMARTSPattern\" during \"%s\" value of \"-r, --rotBondsSMARTS\" option" % (Options["--rotBondsSMARTSMode"])) 1194 1195 if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I): 1196 MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"]) 1197 1198 # Setup a usage string for docopt... 1199 _docoptUsage_ = """ 1200 RDKitFilterTorsionStrainEnergyAlerts.py - Filter torsion strain energy library alerts 1201 1202 Usage: 1203 RDKitFilterTorsionStrainEnergyAlerts.py [--alertsMode <TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy>] 1204 [--alertsMaxSingleEnergyCutoff <Number>] [--alertsTotalEnergyCutoff <Number>] 1205 [--filterTorsionsNotObserved <yes or no>] [--infileParams <Name,Value,...>] [--mode <filter or count>] 1206 [--mp <yes or no>] [--mpParams <Name,Value,...>] [--outfileAlerts <yes or no>] 1207 [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>] 1208 [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>] 1209 [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>] [--outfileParams <Name,Value,...>] 1210 [--overwrite] [--precision <number>] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>] 1211 [--rotBondsSMARTSPattern <SMARTS>] [--torsionEnergyLibraryFile <FileName or auto>] 1212 [-w <dir>] -i <infile> -o <outfile> 1213 RDKitFilterTorsionStrainEnergyAlerts.py [--torsionEnergyLibraryFile <FileName or auto>] -l | --list 1214 RDKitFilterTorsionStrainEnergyAlerts.py -h | --help | -e | --examples 1215 1216 Description: 1217 Filter strained molecules from an input file for torsion strain energy library 1218 [ Ref 153 ] alerts by matching rotatable bonds against SMARTS patterns specified 1219 for torsion rules in a torsion energy library file and write out appropriate 1220 molecules to output files. The molecules must have 3D coordinates in input file. 1221 The default torsion strain energy library file, TorsionStrainEnergyLibrary.xml, 1222 is available under MAYACHEMTOOLS/lib/python/TorsionAlerts directory. 1223 1224 The data in torsion strain energy library file is organized in a hierarchical 1225 manner. It consists of one generic class and six specific classes at the highest 1226 level. Each class contains multiple subclasses corresponding to named functional 1227 groups or substructure patterns. The subclasses consist of torsion rules sorted 1228 from specific to generic torsion patterns. The torsion rule, in turn, contains a 1229 list of peak values for torsion angles and two tolerance values. A pair of tolerance 1230 values define torsion bins around a torsion peak value. 1231 1232 A strain energy calculation method, 'exact' or 'approximate' [ Ref 153 ], is 1233 associated with each torsion rule for calculating torsion strain energy. The 'exact' 1234 stain energy calculation relies on the energy bins available under the energy histogram 1235 consisting of 36 bins covering angles from -180 to 180. The width of each bin is 10 1236 degree. The energy bins are are defined at the right end points. The first and the 1237 last energy bins correspond to -170 and 180 respectively. The torsion angle is mapped 1238 to a energy bin. An angle offset is calculated for the torsion angle from the the right 1239 end point angle of the bin. The strain energy is estimated for the angle offset based 1240 on the energy difference between the current and previous bins. The torsion strain 1241 energy, in terms of torsion energy units (TEUs), corresponds to the sum of bin strain 1242 energy and the angle offset strain energy. 1243 1244 Energy = BinEnergyDiff/10.0 * BinAngleOffset + BinEnergy[BinNum] 1245 1246 Where: 1247 1248 BinEnergyDiff = BinEnergy[BinNum] - BinEnergy[PreviousBinNum] 1249 BinAngleOffset = TorsionAngle - BinAngleRightSide 1250 1251 The 'approximate' strain energy calculation relies on the angle difference between a 1252 torsion angle and the torsion peaks observed for the torsion rules in the torsion 1253 energy library. The torsion angle is matched to a torsion peak based on the value of 1254 torsion angle difference. It must be less than or equal to the value for the second 1255 tolerance 'tolerance2'. Otherwise, the torsion angle is not observed in the torsion 1256 energy library and a value of 'NA' is assigned for torsion energy along with the lower 1257 and upper bounds on energy at 95% confidence interval. The 'approximate' torsion 1258 energy (TEUs) for observed torsion angle is calculated using the following formula: 1259 1260 Energy = beta_1 * (AngleDiff ** 2) + beta_2 * (AngleDiff ** 4) 1261 1262 The coefficients 'beta_1' and 'beta_2' are available for the observed angles in 1263 the torsion strain energy library. The 'AngleDiff' is the difference between the 1264 torsion angle and the matched torsion peak. 1265 1266 For example: 1267 1268 <library> 1269 <hierarchyClass id1="G" id2="G" name="GG"> 1270 ... 1271 </hierarchyClass> 1272 <hierarchyClass id1="C" id2="O" name="CO"> 1273 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]"> 1274 <torsionRule method="exact" smarts= 1275 "[O:1]=[C:2]!@[O:3]~[CH0:4]"> 1276 <angleList> 1277 <angle score="56.52" tolerance1="20.00" 1278 tolerance2="25.00" value="0.0"/> 1279 </angleList> 1280 <histogram> 1281 <bin count="1"/> 1282 ... 1283 </histogram> 1284 <histogram_shifted> 1285 <bin count="0"/> 1286 ... 1287 </histogram_shifted> 1288 <histogram_converted> 1289 <bin energy="4.67... lower="2.14..." upper="Inf"/> 1290 ... 1291 <bin energy="1.86..." lower="1.58..." upper="2.40..."/> 1292 ... 1293 </histogram_converted> 1294 </torsionRule> 1295 <torsionRule method="approximate" smarts= 1296 "[cH0:1][c:2]([cH0])!@[O:3][p:4]"> 1297 <angleList> 1298 <angle beta_1="0.002..." beta_2="-7.843...e-07" 1299 score="27.14" theta_0="-90.0" tolerance1="30.00" 1300 tolerance2="45.00" value="-90.0"/> 1301 ... 1302 </angleList> 1303 <histogram> 1304 <bin count="0"/> 1305 ... 1306 </histogram> 1307 <histogram_shifted> 1308 <bin count="0"/> 1309 ... 1310 </histogram_shifted> 1311 </torsionRule> 1312 ... 1313 ... 1314 </hierarchyClass> 1315 <hierarchyClass id1="N" id2="C" name="NC"> 1316 ... 1317 </hierarchyClass> 1318 <hierarchyClass id1="S" id2="N" name="SN"> 1319 ... 1320 </hierarchyClass> 1321 <hierarchyClass id1="C" id2="S" name="CS"> 1322 ... 1323 </hierarchyClass> 1324 <hierarchyClass id1="C" id2="C" name="CC"> 1325 ... 1326 </hierarchyClass> 1327 <hierarchyClass id1="S" id2="S" name="SS"> 1328 ... 1329 </hierarchyClass> 1330 </library> 1331 1332 The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern. 1333 A custom SMARTS pattern may be optionally specified to detect rotatable bonds. 1334 Each rotatable bond is matched to a torsion rule in the torsion strain energy library. 1335 The strain energy is calculated for each rotatable bond using the calculation 1336 method, 'exact' or 'approximate', associated with the matched torsion rule. 1337 1338 The total strain energy (TEUs) of a molecule corresponds to the sum of 'exact' and 1339 'approximate' strain energies calculated for all matched rotatable bonds in the 1340 molecule. The total strain energy is set to 'NA' for molecules containing a 'approximate' 1341 energy estimate for a torsion angle not observed in the torsion energy library. In 1342 addition, the lower and upper bounds on energy at 95% confidence interval are 1343 set to 'NA'. 1344 1345 The following output files are generated after the filtering: 1346 1347 <OutfileRoot>.sdf 1348 <OutfileRoot>_Filtered.sdf 1349 <OutfileRoot>_AlertsSummary.csv 1350 <OutfileRoot>_Filtered_TopRule*.sdf 1351 1352 The last two set of outfile files, <OutfileRoot>_AlertsSummary.csv and 1353 <OutfileRoot>_<OutfileRoot>_AlertsSummary.csv, are only generated during filtering 1354 by 'MaxSingleEnergy'. 1355 1356 The supported input file formats are: Mol (.mol), SD (.sdf, .sd) 1357 1358 The supported output file formats are: SD (.sdf, .sd) 1359 1360 Options: 1361 -a, --alertsMode <TotalEnergy,...> [default: TotalEnergy] 1362 Torsion strain energy library alert types to use for filtering molecules 1363 containing rotatable bonds based on the calculated values for the total 1364 torsion strain energy of a molecule and the maximum single strain 1365 energy of a rotatable bond in a molecule. 1366 1367 Possible values: TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy 1368 1369 The strain energy cutoff values in terms of torsion energy units (TEUs) are 1370 used to filter molecules as shown below: 1371 1372 AlertsMode AlertsEnergyCutoffs (TEUs) 1373 1374 TotalEnergy >= TotalEnergyCutoff 1375 1376 MaxSingleEnergy >= MaxSingleEnergyCutoff 1377 1378 TotalOrMaxSingleEnergy >= TotalEnergyCutoff 1379 or >= MaxSingleEnergyCutoff 1380 1381 --alertsMaxSingleEnergyCutoff <Number> [default: 1.8] 1382 Maximum single strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules 1383 based on the maximum value of a single strain energy of a rotatable bond 1384 in a molecule. This option is used during 'MaxSingleEnergy' or 1385 'TotalOrMaxSingleEnergy' values of '-a, --alertsMode' option. 1386 1387 The maximum single strain energy must be greater than or equal to the 1388 specified cutoff value for filtering molecules. 1389 --alertsTotalEnergyCutoff <Number> [default: 6.0] 1390 Total strain strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules 1391 based on total strain energy for all rotatable bonds in a molecule. This 1392 option is used during 'TotalEnergy' or 'TotalOrMaxSingleEnergy' 1393 values of '-a, --alertsMode' option. 1394 1395 The total strain energy must be greater than or equal to the specified 1396 cutoff value for filtering molecules. 1397 --filterTorsionsNotObserved <yes or no> [default: no] 1398 Filter molecules containing torsion angles not observed in torsion strain 1399 energy library. It's not possible to calculate torsion strain energies for 1400 these torsions during 'approximate' match to a specified torsion in the 1401 library. 1402 1403 The 'approximate' strain energy calculation relies on the angle difference 1404 between a torsion angle and the torsion peaks observed for the torsion 1405 rules in the torsion energy library. The torsion angle is matched to a 1406 torsion peak based on the value of torsion angle difference. It must be 1407 less than or equal to the value for the second tolerance 'tolerance2'. 1408 Otherwise, the torsion angle is not observed in the torsion energy library 1409 and a value of 'NA' is assigned for torsion energy along with the lower and 1410 upper bounds on energy at 95% confidence interval. 1411 -e, --examples 1412 Print examples. 1413 -h, --help 1414 Print this help message. 1415 -i, --infile <infile> 1416 Input file name. 1417 --infileParams <Name,Value,...> [default: auto] 1418 A comma delimited list of parameter name and value pairs for reading 1419 molecules from files. The supported parameter names for different file 1420 formats, along with their default values, are shown below: 1421 1422 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes 1423 1424 -l, --list 1425 List torsion library information without performing any filtering. 1426 -m, --mode <filter or count> [default: filter] 1427 Specify whether to filter molecules for torsion strain energy library [ Ref 153 ] 1428 alerts by matching rotatable bonds against SMARTS patterns specified for 1429 torsion rules to calculate torsion strain energies and write out the rest 1430 of the molecules to an outfile or simply count the number of matched 1431 molecules marked for filtering. 1432 --mp <yes or no> [default: no] 1433 Use multiprocessing. 1434 1435 By default, input data is retrieved in a lazy manner via mp.Pool.imap() 1436 function employing lazy RDKit data iterable. This allows processing of 1437 arbitrary large data sets without any additional requirements memory. 1438 1439 All input data may be optionally loaded into memory by mp.Pool.map() 1440 before starting worker processes in a process pool by setting the value 1441 of 'inputDataMode' to 'InMemory' in '--mpParams' option. 1442 1443 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input 1444 data mode may adversely impact the performance. The '--mpParams' section 1445 provides additional information to tune the value of 'chunkSize'. 1446 --mpParams <Name,Value,...> [default: auto] 1447 A comma delimited list of parameter name and value pairs to configure 1448 multiprocessing. 1449 1450 The supported parameter names along with their default and possible 1451 values are shown below: 1452 1453 chunkSize, auto 1454 inputDataMode, Lazy [ Possible values: InMemory or Lazy ] 1455 numProcesses, auto [ Default: mp.cpu_count() ] 1456 1457 These parameters are used by the following functions to configure and 1458 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and 1459 mp.Pool.imap(). 1460 1461 The chunkSize determines chunks of input data passed to each worker 1462 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions. 1463 The default value of chunkSize is dependent on the value of 'inputDataMode'. 1464 1465 The mp.Pool.map() function, invoked during 'InMemory' input data mode, 1466 automatically converts RDKit data iterable into a list, loads all data into 1467 memory, and calculates the default chunkSize using the following method 1468 as shown in its code: 1469 1470 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4) 1471 if extra: chunkSize += 1 1472 1473 For example, the default chunkSize will be 7 for a pool of 4 worker processes 1474 and 100 data items. 1475 1476 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs 1477 'lazy' RDKit data iterable to retrieve data as needed, without loading all the 1478 data into memory. Consequently, the size of input data is not known a priori. 1479 It's not possible to estimate an optimal value for the chunkSize. The default 1480 chunkSize is set to 1. 1481 1482 The default value for the chunkSize during 'Lazy' data mode may adversely 1483 impact the performance due to the overhead associated with exchanging 1484 small chunks of data. It is generally a good idea to explicitly set chunkSize to 1485 a larger value during 'Lazy' input data mode, based on the size of your input 1486 data and number of processes in the process pool. 1487 1488 The mp.Pool.map() function waits for all worker processes to process all 1489 the data and return the results. The mp.Pool.imap() function, however, 1490 returns the the results obtained from worker processes as soon as the 1491 results become available for specified chunks of data. 1492 1493 The order of data in the results returned by both mp.Pool.map() and 1494 mp.Pool.imap() functions always corresponds to the input data. 1495 -o, --outfile <outfile> 1496 Output file name. 1497 --outfileAlerts <yes or no> [default: yes] 1498 Write out alerts information to SD output files. 1499 --outfileAlertsMode <All or AlertsOnly> [default: AlertsOnly] 1500 Write alerts information to SD output files for all alerts or only for alerts 1501 specified by '--AlertsMode' option. Possible values: All or AlertsOnly 1502 This option is only valid for 'Yes' value of '--outfileAlerts' option. 1503 1504 The following alerts information is added to SD output files using 1505 'TorsionAlerts' data field: 1506 1507 RotBondIndices TorsionIndices TorsionAngle 1508 Energy EnergyLowerBoundCI EnergyUpperBoundCI 1509 HierarchyClass HierarchySubClass TorsionRule 1510 EnergyMethod AngleNotObserved MaxSingleEnergyAlert 1511 1512 The following data filelds are added to SD output files based on the value of 1513 '--AlertsMode' option: 1514 1515 TotalEnergy 1516 TotalEnergyLowerBoundCI 1517 TotalEnergyUpperBoundCI 1518 1519 MaxSingleEnergy 1520 MaxSingleEnergyAlertsCount 1521 1522 AnglesNotObservedCount 1523 1524 The 'RotBondsCount' is always added to SD output files containing both 1525 remaining and filtered molecules. 1526 1527 Format: 1528 1529 > <RotBondsCount> 1530 Number 1531 1532 > <TotalEnergy> 1533 Number 1534 1535 > <TotalEnergyLowerBoundCI> 1536 Number 1537 1538 > <TotalEnergyUpperBoundCI> 1539 Number 1540 1541 > <MaxSingleEnergy> 1542 Number 1543 1544 > <MaxSingleEnergyAlertsCount> 1545 Number 1546 1547 > <AnglesNotObservedCount> 1548 Number 1549 1550 > <TorsionAlerts (RotBondIndices TorsionIndices TorsionAngle 1551 Energy EnergyLowerBoundCI EnergyUpperBoundCI 1552 HierarchyClass HierarchySubClass TorsionRule 1553 EnergyMethod AngleNotObserved MaxSingleEnergyAlert)> 1554 AtomIndex2,AtomIndex3 AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4 1555 Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI 1556 ClassName SubClassName SMARTS EnergyMethod Yes|No|NA Yes|No|NA 1557 ... ... ... 1558 ... ... ... 1559 1560 A set of 12 values is written out as value of 'TorsionAlerts' data field for 1561 each torsion in a molecule. The space character is used as a delimiter 1562 to separate values with in a set and across set. The comma character 1563 is used to delimit multiple values for each value in a set. 1564 1565 The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited 1566 values representing atom indices for a rotatable bond and the matched 1567 torsion. 1568 1569 The 'Energy' value is the estimated strain energy for the matched torsion. 1570 The 'EnergyLowerBoundCI' and 'EnergyUpperBoundCI' represent lower and 1571 bound energy estimates at 95% confidence interval. The 'EnergyMethod', 1572 exact or approximate, corresponds to the method employed to estimate 1573 torsion strain energy. 1574 1575 The 'AngleNotObserved' is only valid for 'approximate' value of 'EnergyMethod'. 1576 It has three possible values: Yes, No, or NA. The 'Yes' value indicates that 1577 the 'TorsionAngle' is outside the 'tolerance2' of all peaks for the matched 1578 torsion rule in the torsion library. 1579 1580 The 'MaxSingleEnergyAlert' is valid for the following values of '-a, --alertsMode' 1581 option: 'MaxSingleEnergy' or 'TotalOrMaxSingleEnergy'. It has three possible 1582 values: Yes, No, or NA. It's set to 'NA' for 'Yes' or 'NA' values of 1583 'AngleNotObserved'. The 'Yes' value indicates that the estimated torsion 1584 energy is greater than the specified value for '--alertsMaxSingleEnergyCutoff' 1585 option. 1586 1587 For example: 1588 1589 > <RotBondsCount> (1) 1590 14 1591 1592 > <TotalEnergy> (1) 1593 6.8065 1594 1595 > <TotalEnergyLowerBoundCI> (1) 1596 5.9340 1597 1598 > <TotalEnergyUpperBoundCI> (1) 1599 NA 1600 1601 > <MaxSingleEnergy> (1) 1602 1.7108 1603 1604 > <MaxSingleEnergyAlertsCount> (1) 1605 0 1606 1607 > <AnglesNotObservedCount> (1) 1608 0 1609 1610 > <TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy 1611 EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass 1612 HierarchySubClass TorsionRule EnergyMethod AngleNotObserved 1613 MaxSingleEnergyAlert)> (1) 1614 2,1 48,2,1,0 61.90 0.0159 -0.0320 0.0674 CO Ether [O:1][CX4:2]! 1615 @[O:3][CX4:4] Exact NA No 2,3 1,2,3,4 109.12 1.5640 1.1175 NA CC 1616 None/[CX4][CX3] [O:1][CX4:2]!@[CX3:3]=[O:4] Exact NA No 1617 ... ... ... 1618 1619 --outfileFiltered <yes or no> [default: yes] 1620 Write out a file containing filtered molecules. Its name is automatically 1621 generated from the specified output file. Default: <OutfileRoot>_ 1622 Filtered.<OutfileExt>. 1623 --outfilesFilteredByRules <yes or no> [default: auto] 1624 Write out SD files containing filtered molecules for individual torsion 1625 rules triggering alerts in molecules. The name of SD files are automatically 1626 generated from the specified output file. Default file names: <OutfileRoot>_ 1627 Filtered_TopRule*.sdf. 1628 1629 Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option'; 1630 otherwise, 'no'. 1631 1632 The output files are only generated for 'MaxSingleEnergy' of 1633 '-a, --alertsMode' option. 1634 1635 The following alerts information is added to SD output files: 1636 1637 > <RotBondsCount> 1638 Number 1639 1640 > <TotalEnergy> 1641 Number 1642 1643 > <TotalEnergyLowerBoundCI> 1644 Number 1645 1646 > <TotalEnergyUpperBoundCI> 1647 Number 1648 1649 > <MaxSingleEnergy> 1650 Number 1651 1652 > <MaxSingleEnergyAlertsCount> 1653 Number 1654 1655 > <AnglesNotObservedCount> 1656 Number 1657 1658 > <TorsionRule (HierarchyClass HierarchySubClass TorsionRule 1659 EnergyMethod)> 1660 ClassName SubClassName EnergyMethod SMARTS 1661 ... ... ... 1662 1663 > <TorsionRuleMaxSingleEnergyAlertsCount> 1664 Number 1665 1666 > <TorsionRuleAnglesNotObservedCount> 1667 Number 1668 1669 > <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle 1670 Energy EnergyLowerBoundCI EnergyUpperBoundCI 1671 AngleNotObserved MaxSingleEnergyAlert)> 1672 AtomIndex2,AtomIndex3 AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4 1673 Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod 1674 Yes|No|NA Yes|No|NA 1675 ... ... ... 1676 1677 For example: 1678 1679 > <RotBondsCount> (1) 1680 8 1681 1682 > <TotalEnergy> (1) 1683 6.1889 1684 1685 > <TotalEnergyLowerBoundCI> (1) 1686 5.1940 1687 1688 > <TotalEnergyUpperBoundCI> (1) 1689 NA 1690 1691 > <MaxSingleEnergy> (1) 1692 1.9576 1693 1694 > <MaxSingleEnergyAlertsCount> (1) 1695 1 1696 1697 > <AnglesNotObservedCount> (1) 1698 0 1699 1700 > <TorsionRule (HierarchyClass HierarchySubClass TorsionRule 1701 EnergyMethod)> (1) 1702 CC None/[CX4:2][CX4:3] [!#1:1][CX4:2]!@[CX4:3][!#1:4] Exact 1703 1704 > <TorsionRuleMaxSingleEnergyAlertsCount> (1) 1705 0 1706 1707 > <TorsionRuleAnglesNotObservedCount> (1) 1708 0 1709 1710 > <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle 1711 Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved 1712 MaxSingleEnergyAlert)> (1) 1713 1,3 0,1,3,4 72.63 0.8946 0.8756 0.9145 NA No 1714 1715 --outfilesFilteredByRulesMaxCount <All or number> [default: 10] 1716 Write out SD files containing filtered molecules for specified number of 1717 top N torsion rules triggering alerts for the largest number of molecules 1718 or for all torsion rules triggering alerts across all molecules. 1719 1720 These output files are only generated for 'MaxSingleEnergy' value of 1721 '-a, --alertsMode' option. 1722 --outfileSummary <yes or no> [default: auto] 1723 Write out a CVS text file containing summary of torsions rules responsible 1724 for triggering torsion alerts. Its name is automatically generated from the 1725 specified output file. Default: <OutfileRoot>_AlertsSummary.csv. 1726 1727 Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option'; 1728 otherwise, 'no'. 1729 1730 The summary output file is only generated for 'MaxSingleEnergy' of 1731 '-a, --alertsMode' option. 1732 1733 The following alerts information is written to summary text file: 1734 1735 TorsionRule, HierarchyClass, HierarchySubClass, EnergyMethod, 1736 MaxSingleEnergyTorsionAlertTypes, MaxSingleEnergyTorsionAlertCount, 1737 MaxSingleEnergyTorsionAlertMolCount 1738 1739 The double quotes characters are removed from SMART patterns before 1740 before writing them to a CSV file. In addition, the torsion rules are sorted by 1741 TorsionAlertMolCount. 1742 --outfileSDFieldLabels <Type,Label,...> [default: auto] 1743 A comma delimited list of SD data field type and label value pairs for writing 1744 torsion alerts information along with molecules to SD files. 1745 1746 The supported SD data field label type along with their default values are 1747 shown below: 1748 1749 For all SD files: 1750 1751 RotBondsCountLabel, RotBondsCount, 1752 1753 TotalEnergyLabel, TotalEnergy, 1754 TotalEnergyLowerBoundCILabel, TotalEnergyLowerBoundCI, 1755 TotalEnergyUpperBoundCILabel, TotalEnergyUpperBoundCI, 1756 1757 MaxSingleEnergyLabel, MaxSingleEnergy, 1758 MaxSingleEnergyAlertsCountLabel, 1759 MaxSingleEnergyAlertsCount 1760 1761 AnglesNotObservedCountLabel, 1762 AnglesNotObservedCount 1763 1764 TorsionAlertsLabel, TorsionAlerts(RotBondIndices TorsionIndices 1765 TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI 1766 HierarchyClass HierarchySubClass TorsionRule 1767 EnergyMethod AngleNotObserved) 1768 1769 For individual SD files filtered by torsion rules: 1770 1771 TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass 1772 EnergyMethod TorsionRule) 1773 TorsionRuleMaxSingleEnergyAlertsCountLabel, 1774 TorsionRuleMaxSingleEnergyAlertsCount, 1775 TorsionRuleAnglesNotObservedCountLabel, 1776 TorsionRuleAnglesNotObservedCount, 1777 TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices 1778 TorsionIndices TorsionAngle Energy EnergyLowerBoundCI 1779 EnergyUpperBoundCI EnergyMethod AngleObserved) 1780 1781 --outfileParams <Name,Value,...> [default: auto] 1782 A comma delimited list of parameter name and value pairs for writing 1783 molecules to files. The supported parameter names for different file 1784 formats, along with their default values, are shown below: 1785 1786 SD: kekulize,yes,forceV3000,no 1787 1788 --overwrite 1789 Overwrite existing files. 1790 --precision <number> [default: 4] 1791 Floating point precision for writing torsion strain energy values. 1792 -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...> [default: SemiStrict] 1793 SMARTS pattern to use for identifying rotatable bonds in a molecule 1794 for matching against torsion rules in the torsion library. Possible values: 1795 NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches 1796 are filtered to ensure that each atom in the rotatable bond is attached to 1797 at least two heavy atoms. 1798 1799 The following SMARTS patterns are used to identify rotatable bonds for 1800 different modes: 1801 1802 NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1] 1803 1804 SemiStrict: 1805 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 1806 &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 1807 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 1808 1809 Strict: 1810 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br) 1811 &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1]) 1812 &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1]) 1813 &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F) 1814 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])] 1815 1816 The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 1817 'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS 1818 specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 1819 derived from 'Strict' SMARTS pattern for its usage in this script. 1820 1821 You may use any arbitrary SMARTS pattern to identify rotatable bonds by 1822 choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its 1823 value via '--rotBondsSMARTSPattern' option. 1824 --rotBondsSMARTSPattern <SMARTS> 1825 SMARTS pattern for identifying rotatable bonds. This option is only valid 1826 for 'Specify' value of '-r, --rotBondsSMARTSMode' option. 1827 -t, --torsionEnergyLibraryFile <FileName or auto> [default: auto] 1828 Specify a XML file name containing data for torsion starin energy library 1829 hierarchy or use default file, TorsionEnergyLibrary.xml, available in 1830 MAYACHEMTOOLS/lib/Python/TorsionAlerts directory. 1831 1832 The format of data in local XML file must match format of the data in Torsion 1833 Library [ Ref 153 ] file available in MAYACHEMTOOLS directory. 1834 -w, --workingdir <dir> 1835 Location of working directory which defaults to the current directory. 1836 1837 Examples: 1838 To filter molecules containing rotatable bonds with total strain energy value 1839 of >= 6.0 (TEUs) based on torsion rules in the torsion energy library and write 1840 write out SD files containing remaining and filtered molecules, type: 1841 1842 % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf 1843 -o Sample3DOut.sdf 1844 1845 To filter molecules containing any rotatable bonds with strain energy value of 1846 >= 1.8 (TEUs) based on torsion rules in the torsion energy library and write out 1847 SD files containing remaining and filtered molecules, and individual SD files for 1848 torsion rules triggering alerts along with appropriate torsion information for 1849 red alerts, type: 1850 1851 % RDKitFilterTorsionStrainEnergyAlerts.py -a MaxSingleEnergy 1852 -i Sample3D.sdf -o Sample3DOut.sdf 1853 1854 To filter molecules containing rotatable bonds with total strain energy value 1855 of >= 6.0 (TEUs) or any single strain energy value of >= 1.8 (TEUs) and write out 1856 SD files containing remaining and filtered molecules, type: 1857 1858 % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy 1859 -i Sample3D.sdf -o Sample3DOut.sdf 1860 1861 To filter molecules containing rotatable bonds with specific cutoff values for 1862 total or single torsion strain energy and write out SD files containing 1863 remaining and filtered molecules, type: 1864 1865 % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy 1866 -i Sample3D.sdf -o Sample3DOut.sdf --alertsTotalEnergyCutoff 6.0 1867 --alertsMaxSingleEnergyCutoff 1.8 1868 1869 To run the first example for filtering molecules and writing out torsion 1870 information for all alert types to SD files, type: 1871 1872 % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf 1873 -o Sample3DOut.sdf --outfileAlertsMode All 1874 1875 To run the first example for filtering molecules in multiprocessing mode on 1876 all available CPUs without loading all data into memory and write out SD files, 1877 type: 1878 1879 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes -i Sample3D.sdf 1880 -o Sample3DOut.sdf 1881 1882 To run the first example for filtering molecules in multiprocessing mode on 1883 all available CPUs by loading all data into memory and write out a SD files, 1884 type: 1885 1886 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams 1887 "inputDataMode, InMemory" -i Sample3D.sdf -o Sample3DOut.sdf 1888 1889 To run the first example for filtering molecules in multiprocessing mode on 1890 specific number of CPUs and chunksize without loading all data into memory 1891 and write out SD files, type: 1892 1893 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams 1894 "inputDataMode,lazy,numProcesses,4,chunkSize,8" -i Sample3D.sdf 1895 -o Sample3DOut.sdf 1896 1897 To list information about default torsion library file without performing any 1898 filtering, type: 1899 1900 % RDKitFilterTorsionStrainEnergyAlerts.py -l 1901 1902 To list information about a local torsion library XML file without performing 1903 any, filtering, type: 1904 1905 % RDKitFilterTorsionStrainEnergyAlerts.py --torsionEnergyLibraryFile 1906 TorsionStrainEnergyLibrary.xml -l 1907 1908 Author: 1909 Manish Sud (msud@san.rr.com) 1910 1911 Collaborator: 1912 Pat Walters 1913 1914 See also: 1915 RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionLibraryAlerts.py, 1916 RDKitConvertFileFormat.py, RDKitSearchSMARTS.py 1917 1918 Copyright: 1919 Copyright (C) 2024 Manish Sud. All rights reserved. 1920 1921 This script uses the torsion strain energy library developed by Gu, S.; 1922 Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ]. 1923 1924 The torsion strain enegy library is based on the Torsion Library jointly 1925 developed by the University of Hamburg, Center for Bioinformatics, 1926 Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland. 1927 1928 The functionality available in this script is implemented using RDKit, an 1929 open source toolkit for cheminformatics developed by Greg Landrum. 1930 1931 This file is part of MayaChemTools. 1932 1933 MayaChemTools is free software; you can redistribute it and/or modify it under 1934 the terms of the GNU Lesser General Public License as published by the Free 1935 Software Foundation; either version 3 of the License, or (at your option) any 1936 later version. 1937 1938 """ 1939 1940 if __name__ == "__main__": 1941 main()