1 #!/bin/env python
2 #
3 # File: RDKitFilterTorsionStrainEnergyAlerts.py
4 # Author: Manish Sud <msud@san.rr.com>
5 #
6 # Collaborator: Pat Walters
7 #
8 # Copyright (C) 2025 Manish Sud. All rights reserved.
9 #
10 # This script uses the torsion strain energy library developed by Gu, S.;
11 # Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ].
12 #
13 # The torsion strain enegy library is based on the Torsion Library jointly
14 # developed by the University of Hamburg, Center for Bioinformatics,
15 # Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
16 #
17 # The functionality available in this script is implemented using RDKit, an
18 # open source toolkit for cheminformatics developed by Greg Landrum.
19 #
20 # This file is part of MayaChemTools.
21 #
22 # MayaChemTools is free software; you can redistribute it and/or modify it under
23 # the terms of the GNU Lesser General Public License as published by the Free
24 # Software Foundation; either version 3 of the License, or (at your option) any
25 # later version.
26 #
27 # MayaChemTools is distributed in the hope that it will be useful, but without
28 # any warranty; without even the implied warranty of merchantability of fitness
29 # for a particular purpose. See the GNU Lesser General Public License for more
30 # details.
31 #
32 # You should have received a copy of the GNU Lesser General Public License
33 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
34 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
35 # Boston, MA, 02111-1307, USA.
36 #
37
38 from __future__ import print_function
39
40 # Add local python path to the global path and import standard library modules...
41 import os
42 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
43 import time
44 import re
45 import glob
46 import multiprocessing as mp
47 import math
48
49 # RDKit imports...
50 try:
51 from rdkit import rdBase
52 from rdkit import Chem
53 from rdkit.Chem import rdMolTransforms
54 except ImportError as ErrMsg:
55 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
56 sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
57 sys.exit(1)
58
59 # MayaChemTools imports...
60 try:
61 from docopt import docopt
62 import MiscUtil
63 import RDKitUtil
64 from TorsionAlerts.TorsionStrainEnergyAlerts import TorsionStrainEnergyAlerts
65 except ImportError as ErrMsg:
66 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
67 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
68 sys.exit(1)
69
70 ScriptName = os.path.basename(sys.argv[0])
71 Options = {}
72 OptionsInfo = {}
73
74 def main():
75 """Start execution of the script."""
76
77 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
78
79 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
80
81 # Retrieve command line arguments and options...
82 RetrieveOptions()
83
84 if Options["--list"]:
85 # Handle listing of torsion library information...
86 ProcessListTorsionLibraryOption()
87 else:
88 # Process and validate command line arguments and options...
89 ProcessOptions()
90
91 # Perform actions required by the script...
92 PerformFiltering()
93
94 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
95 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
96
97 def PerformFiltering():
98 """Filter molecules using SMARTS torsion rules in the torsion strain energy
99 library file."""
100
101 # Setup a molecule reader...
102 MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
103 Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
104
105 MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols)
106
107 MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
108 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
109 MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
110 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount))
111
112 MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
113 MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
114
115 def ProcessMolecules(Mols):
116 """Process and filter molecules."""
117
118 if OptionsInfo["MPMode"]:
119 return ProcessMoleculesUsingMultipleProcesses(Mols)
120 else:
121 return ProcessMoleculesUsingSingleProcess(Mols)
122
123 def ProcessMoleculesUsingSingleProcess(Mols):
124 """Process and filter molecules using a single process."""
125
126 # Instantiate torsion strain energy alerts class...
127 TorsionStrainEnergyAlertsHandle = InstantiateTorsionStrainEnergyAlertsClass()
128
129 MiscUtil.PrintInfo("\nFiltering molecules...")
130
131 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
132
133 # Set up writers...
134 OutfilesWriters = SetupOutfilesWriters()
135
136 WriterRemaining = OutfilesWriters["WriterRemaining"]
137 WriterFiltered = OutfilesWriters["WriterFiltered"]
138 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
139
140 # Initialize alerts summary info...
141 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
142
143 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
144 for Mol in Mols:
145 MolCount += 1
146
147 if Mol is None:
148 continue
149
150 if RDKitUtil.IsMolEmpty(Mol):
151 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount))
152 continue
153
154 # Check for 3D flag...
155 if not Mol.GetConformer().Is3D():
156 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount))
157 continue
158
159 ValidMolCount += 1
160
161 # Identify torsion library alerts for rotatable bonds..
162 RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionStrainEnergyAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
163
164 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
165
166 # Write out filtered and remaining molecules...
167 WriteStatus = True
168 if RotBondsAlertsStatus:
169 if OutfileFilteredMode:
170 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
171 if WriteStatus:
172 FilteredMolWriteCount += 1
173 else:
174 RemainingMolCount += 1
175 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
176
177 if not WriteStatus:
178 WriteFailedCount += 1
179
180 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
181 CloseOutfilesWriters(OutfilesWriters)
182
183 if FilteredMolWriteCount:
184 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
185
186 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
187
188 def ProcessMoleculesUsingMultipleProcesses(Mols):
189 """Process and filter molecules using multiprocessing."""
190
191 MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
192
193 MPParams = OptionsInfo["MPParams"]
194 OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
195
196 # Instantiate torsion strain energy alerts class to list torsion library information...
197 TorsionStrainEnergyAlertsHandle = InstantiateTorsionStrainEnergyAlertsClass()
198
199 # Set up writers...
200 OutfilesWriters = SetupOutfilesWriters()
201
202 WriterRemaining = OutfilesWriters["WriterRemaining"]
203 WriterFiltered = OutfilesWriters["WriterFiltered"]
204 WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
205
206 # Initialize alerts summary info...
207 TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
208
209 # Setup data for initializing a worker process...
210 MiscUtil.PrintInfo("\nEncoding options info and rotatable bond pattern molecule...")
211 InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
212
213 # Setup a encoded mols data iterable for a worker process...
214 WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
215
216 # Setup process pool along with data initialization for each process...
217 MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
218 MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
219
220 ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
221
222 # Start processing...
223 if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
224 Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
225 elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
226 Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
227 else:
228 MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
229
230 (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
231 for Result in Results:
232 MolCount += 1
233 MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result
234
235 if EncodedMol is None:
236 continue
237 ValidMolCount += 1
238
239 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
240
241 TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
242
243 # Write out filtered and remaining molecules...
244 WriteStatus = True
245 if RotBondsAlertsStatus:
246 if OutfileFilteredMode:
247 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
248 if WriteStatus:
249 FilteredMolWriteCount += 1
250 else:
251 RemainingMolCount += 1
252 WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
253
254 if not WriteStatus:
255 WriteFailedCount += 1
256
257 WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
258 CloseOutfilesWriters(OutfilesWriters)
259
260 if FilteredMolWriteCount:
261 WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
262
263 return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
264
265 def InitializeWorkerProcess(*EncodedArgs):
266 """Initialize data for a worker process."""
267
268 global Options, OptionsInfo
269
270 MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
271
272 # Decode Options and OptionInfo...
273 Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
274 OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
275
276 # Instantiate torsion strain energy alerts class...
277 OptionsInfo["TorsionStrainEnergyAlertsHandle"] = InstantiateTorsionStrainEnergyAlertsClass(Quiet = True)
278
279 def WorkerProcess(EncodedMolInfo):
280 """Process data for a worker process."""
281
282 MolIndex, EncodedMol = EncodedMolInfo
283
284 if EncodedMol is None:
285 return [MolIndex, None, False, None]
286
287 Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
288 if RDKitUtil.IsMolEmpty(Mol):
289 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
290 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
291 return [MolIndex, None, False, None]
292
293 # Check for 3D flag...
294 if not Mol.GetConformer().Is3D():
295 MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
296 MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName)
297 return [MolIndex, None, False, None]
298
299 # Identify torsion library alerts for rotatable bonds..
300 TorsionStrainEnergyAlertsHandle = OptionsInfo["TorsionStrainEnergyAlertsHandle"]
301 RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionStrainEnergyAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
302
303 return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo]
304
305 def InitializeTorsionAlertsSummaryInfo():
306 """Initialize torsion alerts summary."""
307
308 if OptionsInfo["CountMode"]:
309 return None
310
311 if not OptionsInfo["TrackAlertsSummaryInfo"]:
312 return None
313
314 TorsionAlertsSummaryInfo = {}
315 TorsionAlertsSummaryInfo["RuleIDs"] = []
316
317 for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "EnergyMethod", "MaxSingleEnergyAlertTypes", "MaxSingleEnergyAlertTypesMolCount"]:
318 TorsionAlertsSummaryInfo[DataLabel] = {}
319
320 return TorsionAlertsSummaryInfo
321
322 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo):
323 """Track torsion alerts summary information for matched torsion rules in a
324 molecule."""
325
326 if OptionsInfo["CountMode"]:
327 return
328
329 if not OptionsInfo["TrackAlertsSummaryInfo"]:
330 return
331
332 if RotBondsAlertsInfo is None:
333 return
334
335 MolAlertsInfo = {}
336 MolAlertsInfo["RuleIDs"] = []
337 MolAlertsInfo["MaxSingleEnergyAlertTypes"] = {}
338
339 for ID in RotBondsAlertsInfo["IDs"]:
340 if not RotBondsAlertsInfo["MatchStatus"][ID]:
341 continue
342
343 if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
344 continue
345
346 MaxSingleEnergyAlertType = SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID])
347
348 TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID]
349 TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]
350
351 # Track data for torsion alert summary information across molecules...
352 if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]:
353 TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID)
354 TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID
355
356 TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS
357 TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassName"][ID]
358 TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassName"][ID]
359
360 TorsionAlertsSummaryInfo["EnergyMethod"][TorsionRuleNodeID] = RotBondsAlertsInfo["EnergyMethod"][ID]
361
362 # Initialize number of alert types across all molecules...
363 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {}
364
365 # Initialize number of molecules flagged by each alert type...
366 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID] = {}
367
368 if MaxSingleEnergyAlertType not in TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
369 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
370 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
371
372 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
373
374 # Track data for torsion alert information in a molecule...
375 if TorsionRuleNodeID not in MolAlertsInfo["MaxSingleEnergyAlertTypes"]:
376 MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
377 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {}
378
379 if MaxSingleEnergyAlertType not in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
380 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
381 MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
382
383 # Track number of molecules flagged by a specific torsion alert...
384 for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]:
385 for MaxSingleEnergyAlertType in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
386 if MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType]:
387 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
388
389 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo):
390 """Write out torsion alerts summary informatio to a CSV file."""
391
392 if OptionsInfo["CountMode"]:
393 return
394
395 if not OptionsInfo["OutfileSummaryMode"]:
396 return
397
398 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
399 return
400
401 # Write headers...
402 QuoteValues = True
403 Values = ["TorsionRule", "HierarchyClass", "HierarchySubClass", "EnergyMethod", "MaxSingleEnergyTorsionAlertTypes", "MaxSingleEnergyTorsionAlertCount", "MaxSingleEnergyTorsionAlertMolCount"]
404 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
405
406 SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
407
408 # Write alerts information...
409 for ID in SortedRuleIDs:
410 # Remove any double quotes in SMARTS...
411 RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID]
412 RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I)
413
414 HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID]
415 HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID]
416
417 EnergyMethod = TorsionAlertsSummaryInfo["EnergyMethod"][ID]
418
419 MaxSingleEnergyAlertTypes = []
420 MaxSingleEnergyAlertTypesCount = []
421 MaxSingleEnergyAlertTypesMolCount = []
422 for MaxSingleEnergyAlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]):
423 MaxSingleEnergyAlertTypes.append(MaxSingleEnergyAlertType)
424 MaxSingleEnergyAlertTypesCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID][MaxSingleEnergyAlertType])
425 MaxSingleEnergyAlertTypesMolCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][MaxSingleEnergyAlertType])
426
427 Values = [RuleSMARTS, HierarchyClassName, HierarchySubClassName, EnergyMethod, "%s" % MiscUtil.JoinWords(MaxSingleEnergyAlertTypes, ","), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesCount, ",")), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesMolCount, ","))]
428 Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
429
430 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo):
431 """Sort torsion rule IDs by alert types molecule count in descending order."""
432
433 SortedRuleIDs = []
434
435 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
436 if len(RuleIDs) == 0:
437 return SortedRuleIDs
438
439 # Setup a map from AlertTypesMolCount to IDs for sorting alerts...
440 RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
441 MolCountMap = {}
442 for ID in RuleIDs:
443 MolCount = 0
444 for AlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]):
445 MolCount += TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][AlertType]
446 MolCountMap[ID] = MolCount
447
448 SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True)
449
450 return SortedRuleIDs
451
452 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo):
453 """Write out torsion alerts SD files for individual torsion rules."""
454
455 if OptionsInfo["CountMode"]:
456 return
457
458 if not OptionsInfo["OutfilesFilteredByRulesMode"]:
459 return
460
461 if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
462 return
463
464 # Setup a molecule reader for filtered molecules...
465 FilteredMols = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"])
466
467 # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules...
468 TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo)
469
470 # Setup writers...
471 ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs)
472
473 for Mol in FilteredMols:
474 # Retrieve torsion alerts info...
475 TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo)
476 if TorsionAlertsInfo is None:
477 continue
478
479 for TorsionRuleID in TorsionRuleIDs:
480 if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]:
481 continue
482
483 WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
484
485 CloseByRuleOutfilesWriters(ByRuleOutfilesWriters)
486
487 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo):
488 """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules."""
489
490 # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to
491 # the least number of unique molecules...
492 RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
493
494 # Select torsion rule IDs for writing out SD files...
495 if not OptionsInfo["OutfilesFilteredByRulesAllMode"]:
496 MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"]
497 if MaxRuleIDs < len(RuleIDs):
498 RuleIDs = RuleIDs[0:MaxRuleIDs]
499
500 return RuleIDs
501
502 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo):
503 """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds."""
504
505 TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]
506 TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None
507
508 if TorsionAlerts is None or len(TorsionAlerts) == 0:
509 return None
510
511 # Initialize for tracking by rule IDs...
512 TorsionAlertsInfo = {}
513 TorsionAlertsInfo["RuleIDs"] = []
514
515 TorsionAlertsInfo["RuleSMARTS"] = {}
516 TorsionAlertsInfo["HierarchyClassName"] = {}
517 TorsionAlertsInfo["HierarchySubClassName"] = {}
518 TorsionAlertsInfo["EnergyMethod"] = {}
519
520 TorsionAlertsInfo["AtomIndices"] = {}
521 TorsionAlertsInfo["TorsionAtomIndices"] = {}
522 TorsionAlertsInfo["TorsionAngle"] = {}
523
524 TorsionAlertsInfo["Energy"] = {}
525 TorsionAlertsInfo["EnergyLowerBound"] = {}
526 TorsionAlertsInfo["EnergyUpperBound"] = {}
527
528 TorsionAlertsInfo["AngleNotObserved"] = {}
529 TorsionAlertsInfo["MaxSingleEnergyAlertType"] = {}
530
531 TorsionAlertsInfo["AnglesNotObservedCount"] = {}
532 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"] = {}
533
534 ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"]
535 TorsionAlertsSetSize = 12
536
537 TorsionAlertsWords = TorsionAlerts.split()
538 if len(TorsionAlertsWords) % TorsionAlertsSetSize:
539 MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize))
540
541 ID = 0
542 for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize):
543 ID += 1
544
545 RotBondIndices, TorsionIndices, TorsionAngle, Energy, EnergyLowerBound, EnergyUpperBound, HierarchyClass, HierarchySubClass, TorsionRule, EnergyMethod, AngleNotObserved, MaxSingleEnergyAlertType = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize]
546 RotBondIndices = RotBondIndices.split(ValuesDelimiter)
547 TorsionIndices = TorsionIndices.split(ValuesDelimiter)
548
549 if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]:
550 MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule)
551 continue
552 TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule]
553
554 # Track data for torsion alerts in a molecule...
555 if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]:
556 TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
557
558 TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule
559 TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass
560 TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass
561 TorsionAlertsInfo["EnergyMethod"][TorsionRuleNodeID] = EnergyMethod
562
563 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = []
564 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = []
565 TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID] = []
566
567 TorsionAlertsInfo["Energy"][TorsionRuleNodeID] = []
568 TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID] = []
569 TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID] = []
570 TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID] = []
571 TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID] = []
572
573 TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] = 0
574 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] = 0
575
576 # Track multiple values for a rule ID...
577 TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices)
578 TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices)
579 TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID].append(TorsionAngle)
580
581 TorsionAlertsInfo["Energy"][TorsionRuleNodeID].append(Energy)
582 TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID].append(EnergyLowerBound)
583 TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID].append(EnergyUpperBound)
584 TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID].append(AngleNotObserved)
585
586 TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID].append(MaxSingleEnergyAlertType)
587
588 # Count angles not observer for a rule ID...
589 if AngleNotObserved == 'Yes':
590 TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] += 1
591
592 # Count max single energy alert for a rule ID...
593 if MaxSingleEnergyAlertType == 'Yes':
594 TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] += 1
595
596 return TorsionAlertsInfo
597
598 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo):
599 """Write out molecule."""
600
601 if OptionsInfo["CountMode"]:
602 return True
603
604 SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo)
605
606 try:
607 Writer.write(Mol)
608 except Exception as ErrMsg:
609 MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg))
610 return False
611
612 return True
613
614 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo):
615 """Setup molecule properties containing alerts information for rotatable bonds."""
616
617 if not OptionsInfo["OutfileAlerts"]:
618 return
619
620 SDFieldIDsToLabels = OptionsInfo["SDFieldIDsToLabels"]
621 Precision = OptionsInfo["Precision"]
622
623 # Setup rotatable bonds count...
624 RotBondsCount = 0
625 if RotBondsAlertsInfo is not None:
626 RotBondsCount = len(RotBondsAlertsInfo["IDs"])
627 Mol.SetProp(SDFieldIDsToLabels["RotBondsCountLabel"], "%s" % RotBondsCount)
628
629 if RotBondsAlertsInfo is not None:
630 # Setup total energy along with lower and upper bounds...
631 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergy"], Precision))
632 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLowerBoundCILabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyLowerBound"], Precision))
633 Mol.SetProp(SDFieldIDsToLabels["TotalEnergyUpperBoundCILabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyUpperBound"], Precision))
634
635 # Setup max single energy and alert count...
636 if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]:
637 Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyLabel"], "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["MaxSingleEnergy"], Precision))
638 Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyAlertsCountLabel"], "%s" % ("NA" if RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"] is None else RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"]))
639
640 Mol.SetProp(SDFieldIDsToLabels["AnglesNotObservedCountLabel"], "%s" % ("NA" if RotBondsAlertsInfo["AnglesNotObservedCount"] is None else RotBondsAlertsInfo["AnglesNotObservedCount"]))
641
642
643 # Setup alert information for rotatable bonds...
644 AlertsInfoValues = []
645
646 # Delimiter for multiple values corresponding to specific set of information for
647 # a rotatable bond. For example: TorsionAtomIndices
648 ValuesDelim = OptionsInfo["IntraSetValuesDelim"]
649
650 # Delimiter for various values for a rotatable bond...
651 RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"]
652
653 # Delimiter for values corresponding to multiple rotatable bonds...
654 AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"]
655
656 if RotBondsAlertsInfo is not None:
657 for ID in RotBondsAlertsInfo["IDs"]:
658 if not RotBondsAlertsInfo["MatchStatus"][ID]:
659 continue
660
661 if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
662 continue
663
664 RotBondValues = []
665
666 # Bond atom indices...
667 Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]]
668 RotBondValues.append(ValuesDelim.join(Values))
669
670 # Torsion atom indices...
671 TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim)
672 RotBondValues.append(TorsionAtomIndices)
673
674 # Torsion angle...
675 RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngle"][ID])
676
677 # Energy along with its lower and upper bound confidence interval...
678 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["Energy"][ID], Precision))
679 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyLowerBound"][ID], Precision))
680 RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyUpperBound"][ID], Precision))
681
682 # Hierarchy class and subclass names...
683 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassName"][ID])
684 RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassName"][ID])
685
686 # Torsion rule SMARTS...
687 RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID])
688
689 # Energy method...
690 RotBondValues.append("%s" % RotBondsAlertsInfo["EnergyMethod"][ID])
691
692 # Angle not observed...
693 RotBondValues.append("%s" % SetupAngleNotObservedValue(RotBondsAlertsInfo["AngleNotObserved"][ID]))
694
695 # Max single energy alert status...
696 RotBondValues.append("%s" % SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]))
697
698 # Track joined values for a rotatable bond...
699 AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues))
700
701 if len(AlertsInfoValues):
702 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues)))
703
704 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
705 """Write out molecule."""
706
707 if OptionsInfo["CountMode"]:
708 return
709
710 SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
711
712 Writer.write(Mol)
713
714 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
715 """Setup molecule properties containing alerts information for torsion alerts
716 fileted by Rule IDs."""
717
718 # Delete torsion alerts information for rotatable bonds...
719 if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]):
720 Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"])
721
722 # Delimiter for values...
723 IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"]
724 InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"]
725
726 # Setup alert rule information...
727 AlertRuleInfoValues = []
728
729 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID])
730 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID])
731
732 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID])
733 AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["EnergyMethod"][TorsionRuleID])
734
735 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues)))
736
737 # Setup max single energy alert count for torsion rule...
738 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxSingleEnergyAlertsCountLabel"], "%s" % TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleID])
739
740 # Setup angle not observed count for torsion rule...
741 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAnglesNotObservedCountLabel"], "%s" % TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleID])
742
743 # Setup torsion rule alerts...
744 # "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod AngleNotObserved MaxSingleEnergyAlert)
745 AlertsInfoValues = []
746 for Index in range(0, len(TorsionAlertsInfo["AtomIndices"][TorsionRuleID])):
747 RotBondInfoValues = []
748
749 # Bond atom indices...
750 Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]]
751 RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
752
753 # Torsion atom indices retrieved from the filtered SD file and stored as strings...
754 Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]]
755 RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
756
757 # Torsion angle...
758 RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngle"][TorsionRuleID][Index])
759
760 # Energy and its bounds...
761 RotBondInfoValues.append(TorsionAlertsInfo["Energy"][TorsionRuleID][Index])
762 RotBondInfoValues.append(TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleID][Index])
763 RotBondInfoValues.append(TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleID][Index])
764
765 # Angle not observed......
766 RotBondInfoValues.append(TorsionAlertsInfo["AngleNotObserved"][TorsionRuleID][Index])
767
768 # Max single energy alert type...
769 RotBondInfoValues.append(TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleID][Index])
770
771 # Track alerts informaiton...
772 AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues))
773
774 Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"], "%s" % (InterSetValuesDelim.join(AlertsInfoValues)))
775
776 def SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
777 """Skip rotatble bond alert info for a specific bond during writing to output files."""
778
779 if not OptionsInfo["OutfileAlertsOnly"]:
780 return False
781
782 if RotBondsAlertsInfo["RotBondsAlertsStatus"] is None:
783 return True
784
785 Status = False
786 if OptionsInfo["TotalEnergyMode"]:
787 if not RotBondsAlertsInfo["RotBondsAlertsStatus"]:
788 Status = True
789 elif OptionsInfo["MaxSingleEnergyMode"]:
790 if RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] is None or not RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]:
791 Status = True
792 elif OptionsInfo["TotalOrMaxSingleEnergyMode"]:
793 if not RotBondsAlertsInfo["RotBondsAlertsStatus"]:
794 Status = True
795
796 return Status
797
798 def SetupEnergyValueForSDField(Value, Precision):
799 """Setup energy value for SD field."""
800
801 if Value is None or math.isnan(Value) or math.isinf(Value):
802 return "NA"
803
804 return "%.*f" % (Precision, Value)
805
806 def SetupAngleNotObservedValue(Value):
807 """Setup angle not observed value."""
808
809 if Value is None:
810 return "NA"
811
812 return "Yes" if Value else "No"
813
814 def SetupMaxSingleEnergyAlertStatusValue(Value):
815 """Setup max single energy alert status value."""
816
817 if Value is None:
818 return "NA"
819
820 return "Yes" if Value else "No"
821
822 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim):
823 """Setup torsion atom indices value for output files."""
824
825 Values = ["%s" % Value for Value in TorsionAtomIndicesList]
826
827 return ValuesDelim.join(Values)
828
829 def SetupOutfilesWriters():
830 """Setup molecule and summary writers."""
831
832 OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None}
833
834 # Writers for SD files...
835 WriterRemaining, WriterFiltered = SetupMoleculeWriters()
836 OutfilesWriters["WriterRemaining"] = WriterRemaining
837 OutfilesWriters["WriterFiltered"] = WriterFiltered
838
839 # Writer for alert summary CSV file...
840 WriterAlertSummary = SetupAlertSummaryWriter()
841 OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary
842
843 return OutfilesWriters
844
845 def SetupMoleculeWriters():
846 """Setup molecule writers."""
847
848 Writer = None
849 WriterFiltered = None
850
851 if OptionsInfo["CountMode"]:
852 return (Writer, WriterFiltered)
853
854 Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
855 if Writer is None:
856 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
857 MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"])
858
859 if OptionsInfo["OutfileFilteredMode"]:
860 WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
861 if WriterFiltered is None:
862 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
863 MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
864
865 return (Writer, WriterFiltered)
866
867 def SetupAlertSummaryWriter():
868 """Setup a alert summary writer."""
869
870 Writer = None
871
872 if OptionsInfo["CountMode"]:
873 return Writer
874
875 if not OptionsInfo["OutfileSummaryMode"]:
876 return Writer
877
878 Outfile = OptionsInfo["OutfileSummary"]
879 Writer = open(Outfile, "w")
880 if Writer is None:
881 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
882
883 MiscUtil.PrintInfo("Generating file %s..." % Outfile)
884
885 return Writer
886
887 def CloseOutfilesWriters(OutfilesWriters):
888 """Close outfile writers."""
889
890 for WriterType, Writer in OutfilesWriters.items():
891 if Writer is not None:
892 Writer.close()
893
894 def SetupByRuleOutfilesWriters(RuleIDs):
895 """Setup by rule outfiles writers."""
896
897 # Initialize...
898 OutfilesWriters = {}
899 for RuleID in RuleIDs:
900 OutfilesWriters[RuleID] = None
901
902 if OptionsInfo["CountMode"]:
903 return OutfilesWriters
904
905 if not OptionsInfo["OutfilesFilteredByRulesMode"]:
906 return OutfilesWriters
907
908 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
909 OutfilesRoot = "%s_Filtered_TopRule" % FileName
910 OutfilesExt = "sdf"
911
912 MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"]
913 MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt))
914
915 # Delete any existing output files...
916 Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt))
917 if len(Outfiles):
918 MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt))
919 for Outfile in Outfiles:
920 try:
921 os.remove(Outfile)
922 except Exception as ErrMsg:
923 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg)
924
925 RuleIndex = 0
926 for RuleID in RuleIDs:
927 RuleIndex += 1
928 Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt)
929 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
930 if Writer is None:
931 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
932
933 OutfilesWriters[RuleID] = Writer
934
935 return OutfilesWriters
936
937 def CloseByRuleOutfilesWriters(OutfilesWriters):
938 """Close by rule outfile writers."""
939
940 for RuleID, Writer in OutfilesWriters.items():
941 if Writer is not None:
942 Writer.close()
943
944 def InstantiateTorsionStrainEnergyAlertsClass(Quiet = False):
945 """Initialize torsion strain energy alerts class."""
946
947 try:
948 TorsionStrainEnergyAlertsHandle = TorsionStrainEnergyAlerts(AlertsMode = OptionsInfo["AlertsMode"], TotalEnergyCutoff = OptionsInfo["TotalEnergyCutoff"], MaxSingleEnergyCutoff = OptionsInfo["MaxSingleEnergyCutoff"], RotBondsSMARTSMode = OptionsInfo["RotBondsSMARTSMode"], RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPattern"], TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"], AlertTorsionsNotObserved = OptionsInfo["FilterTorsionsNotObserved"])
949 except Exception as ErrMsg:
950 MiscUtil.PrintError("Failed to instantiate TorsionStrainEnergyAlerts:\n%s\n" % (ErrMsg))
951
952 if not Quiet:
953 MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionStrainEnergyAlertsHandle.GetTorsionLibraryFilePath())
954 TorsionStrainEnergyAlertsHandle.ListTorsionLibraryInfo()
955
956 return TorsionStrainEnergyAlertsHandle
957
958 def ProcessRotatableBondsSMARTSMode():
959 """"Process SMARTS pattern for rotatable bonds."""
960
961 RotBondsMode = OptionsInfo["RotBondsSMARTSMode"]
962
963 RotBondsSMARTSPattern = None
964 RotBondsSMARTSPatternSpecified = OptionsInfo["RotBondsSMARTSPatternSpecified"]
965
966 if re.match("^(NonStrict|SemiStrict|Strict)$", RotBondsMode, re.I):
967 RotBondsSMARTSPattern = None
968 elif re.match("Specify", RotBondsMode, re.I):
969 RotBondsSMARTSPatternSpecified = RotBondsSMARTSPatternSpecified.strip()
970 if not len(RotBondsSMARTSPatternSpecified):
971 MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode)
972
973 RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPatternSpecified)
974 if RotBondsPatternMol is None:
975 MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPatternSpecified))
976 else:
977 MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode)
978
979 OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern
980
981 def ProcessSDFieldLabelsOption():
982 """Process SD data field label option."""
983
984 ParamsOptionName = "--outfileSDFieldLabels"
985 ParamsOptionValue = Options["--outfileSDFieldLabels"]
986
987 ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TotalEnergyLabel": "TotalEnergy", "TotalEnergyLowerBoundCILabel": "TotalEnergyLowerBoundCI", "TotalEnergyUpperBoundCILabel": "TotalEnergyUpperBoundCI", "MaxSingleEnergyLabel": "MaxSingleEnergy", "MaxSingleEnergyAlertsCountLabel": "MaxSingleEnergyAlertsCount", "AnglesNotObservedCountLabel": "AnglesNotObservedCount", "TorsionAlertsLabel": "TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass HierarchySubClass TorsionRule EnergyMethod AngleNotObserved MaxSingleEnergyAlert)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionRule EnergyMethod)", "TorsionRuleMaxSingleEnergyAlertsCountLabel": "TorsionRuleMaxSingleEnergyAlertsCount", "TorsionRuleAnglesNotObservedCountLabel": "TorsionRuleAnglesNotObservedCount", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved MaxSingleEnergyAlert)"}
988
989 if re.match("^auto$", ParamsOptionValue, re.I):
990 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
991 return
992
993 # Setup a canonical paramater names...
994 ValidParamNames = []
995 CanonicalParamNamesMap = {}
996 for ParamName in sorted(ParamsIDsToLabels):
997 ValidParamNames.append(ParamName)
998 CanonicalParamNamesMap[ParamName.lower()] = ParamName
999
1000 ParamsOptionValue = ParamsOptionValue.strip()
1001 if not ParamsOptionValue:
1002 PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
1003
1004 ParamsOptionValueWords = ParamsOptionValue.split(",")
1005 if len(ParamsOptionValueWords) % 2:
1006 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
1007
1008 # Validate paramater name and value pairs...
1009 for Index in range(0, len(ParamsOptionValueWords), 2):
1010 Name = ParamsOptionValueWords[Index].strip()
1011 Value = ParamsOptionValueWords[Index + 1].strip()
1012
1013 CanonicalName = Name.lower()
1014 if not CanonicalName in CanonicalParamNamesMap:
1015 MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
1016
1017 ParamName = CanonicalParamNamesMap[CanonicalName]
1018 ParamValue = Value
1019
1020 # Set value...
1021 ParamsIDsToLabels[ParamName] = ParamValue
1022
1023 OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
1024
1025 def ProcessOptions():
1026 """Process and validate command line arguments and options."""
1027
1028 MiscUtil.PrintInfo("Processing options...")
1029
1030 # Validate options...
1031 ValidateOptions()
1032
1033 TotalEnergyMode, MaxSingleEnergyMode, TotalOrMaxSingleEnergyMode = [False] * 3
1034 AlertsMode = Options["--alertsMode"]
1035 if re.match("^TotalEnergy$", AlertsMode, re.I):
1036 TotalEnergyMode = True
1037 elif re.match("^MaxSingleEnergy$", AlertsMode, re.I):
1038 MaxSingleEnergyMode = True
1039 elif re.match("^TotalOrMaxSingleEnergy$", AlertsMode, re.I):
1040 TotalOrMaxSingleEnergyMode = True
1041 OptionsInfo["AlertsMode"] = AlertsMode
1042 OptionsInfo["TotalEnergyMode"] = TotalEnergyMode
1043 OptionsInfo["MaxSingleEnergyMode"] = MaxSingleEnergyMode
1044 OptionsInfo["TotalOrMaxSingleEnergyMode"] = TotalOrMaxSingleEnergyMode
1045
1046 OptionsInfo["FilterTorsionsNotObserved"] = True if re.match("^yes$", Options["--filterTorsionsNotObserved"], re.I) else False
1047
1048 OptionsInfo["MaxSingleEnergyCutoff"] = float(Options["--alertsMaxSingleEnergyCutoff"])
1049 OptionsInfo["TotalEnergyCutoff"] = float(Options["--alertsTotalEnergyCutoff"])
1050
1051 OptionsInfo["Infile"] = Options["--infile"]
1052 ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
1053 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
1054
1055 OptionsInfo["Outfile"] = Options["--outfile"]
1056 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
1057
1058 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
1059 OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
1060 OptionsInfo["OutfileFiltered"] = OutfileFiltered
1061 OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
1062
1063 OptionsInfo["OutfileSummary"] = "%s_AlertsSummary.csv" % (FileName)
1064
1065 OutfileSummaryMode = Options["--outfileSummary"]
1066 if re.match("^auto$", OutfileSummaryMode, re.I):
1067 OutfileSummaryMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no'
1068 OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", OutfileSummaryMode, re.I) else False
1069
1070 if re.match("^yes$", Options["--outfileSummary"], re.I):
1071 if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I):
1072 MiscUtil.PrintError("The value \"%s\" specified for \"--outfileSummary\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"]))
1073
1074 OutfilesFilteredByRulesMode = Options["--outfilesFilteredByRules"]
1075 if re.match("^auto$", OutfilesFilteredByRulesMode, re.I):
1076 OutfilesFilteredByRulesMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no'
1077 OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", OutfilesFilteredByRulesMode, re.I) else False
1078
1079 if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
1080 if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I):
1081 MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"]))
1082
1083 OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False
1084
1085 OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"]
1086 if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I):
1087 OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount)
1088 OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount
1089 OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False
1090
1091 OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False
1092
1093 if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
1094 if not re.match("^yes$", Options["--outfileAlerts"], re.I):
1095 MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"]))
1096
1097 OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"]
1098 OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False
1099
1100 ProcessSDFieldLabelsOption()
1101
1102 OptionsInfo["Overwrite"] = Options["--overwrite"]
1103 OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
1104
1105 OptionsInfo["Precision"] = int(Options["--precision"])
1106
1107 OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
1108 OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
1109
1110 OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"]
1111 OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"]
1112 ProcessRotatableBondsSMARTSMode()
1113
1114 OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"]
1115
1116 # Setup delimiter for writing out torsion alert information to output files...
1117 OptionsInfo["IntraSetValuesDelim"] = ","
1118 OptionsInfo["InterSetValuesDelim"] = " "
1119
1120 def RetrieveOptions():
1121 """Retrieve command line arguments and options."""
1122
1123 # Get options...
1124 global Options
1125 Options = docopt(_docoptUsage_)
1126
1127 # Set current working directory to the specified directory...
1128 WorkingDir = Options["--workingdir"]
1129 if WorkingDir:
1130 os.chdir(WorkingDir)
1131
1132 # Handle examples option...
1133 if "--examples" in Options and Options["--examples"]:
1134 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
1135 sys.exit(0)
1136
1137 def ProcessListTorsionLibraryOption():
1138 """Process list torsion library information."""
1139
1140 # Validate and process dataFile option for listing torsion library information...
1141 OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"]
1142 if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I):
1143 MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"])
1144
1145 # Instantiate TorsionStrainEnergyAlerts using defaults...
1146 TorsionStrainEnergyAlertsHandle = TorsionStrainEnergyAlerts(TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"])
1147 MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionStrainEnergyAlertsHandle.GetTorsionLibraryFilePath())
1148 TorsionStrainEnergyAlertsHandle.ListTorsionLibraryInfo()
1149
1150 def ValidateOptions():
1151 """Validate option values."""
1152
1153 MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "TotalEnergy MaxSingleEnergy TotalOrMaxSingleEnergy")
1154
1155 MiscUtil.ValidateOptionFloatValue("--alertsMaxSingleEnergyCutoff", Options["--alertsMaxSingleEnergyCutoff"], {">": 0.0})
1156 MiscUtil.ValidateOptionFloatValue("--alertsTotalEnergyCutoff", Options["--alertsTotalEnergyCutoff"], {">": 0.0})
1157
1158 MiscUtil.ValidateOptionTextValue("--filterTorsionsNotObserved", Options["--filterTorsionsNotObserved"], "yes no")
1159
1160 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
1161 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
1162
1163 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
1164 if re.match("^filter$", Options["--mode"], re.I):
1165 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
1166 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
1167
1168 MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
1169
1170 MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no auto")
1171 if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I):
1172 MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0})
1173
1174 MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no auto")
1175 MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no")
1176 MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly")
1177
1178 MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count")
1179 if re.match("^filter$", Options["--mode"], re.I):
1180 if not Options["--outfile"]:
1181 MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option")
1182
1183 MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
1184
1185 MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
1186
1187 MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify")
1188 if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I):
1189 if not Options["--rotBondsSMARTSPattern"]:
1190 MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option")
1191 else:
1192 if Options["--rotBondsSMARTSPattern"]:
1193 MiscUtil.PrintError("The SMARTS pattern must not be specified using \"--rotBondsSMARTSPattern\" during \"%s\" value of \"-r, --rotBondsSMARTS\" option" % (Options["--rotBondsSMARTSMode"]))
1194
1195 if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I):
1196 MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"])
1197
1198 # Setup a usage string for docopt...
1199 _docoptUsage_ = """
1200 RDKitFilterTorsionStrainEnergyAlerts.py - Filter torsion strain energy library alerts
1201
1202 Usage:
1203 RDKitFilterTorsionStrainEnergyAlerts.py [--alertsMode <TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy>]
1204 [--alertsMaxSingleEnergyCutoff <Number>] [--alertsTotalEnergyCutoff <Number>]
1205 [--filterTorsionsNotObserved <yes or no>] [--infileParams <Name,Value,...>] [--mode <filter or count>]
1206 [--mp <yes or no>] [--mpParams <Name,Value,...>] [--outfileAlerts <yes or no>]
1207 [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>]
1208 [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>]
1209 [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>] [--outfileParams <Name,Value,...>]
1210 [--overwrite] [--precision <number>] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>]
1211 [--rotBondsSMARTSPattern <SMARTS>] [--torsionEnergyLibraryFile <FileName or auto>]
1212 [-w <dir>] -i <infile> -o <outfile>
1213 RDKitFilterTorsionStrainEnergyAlerts.py [--torsionEnergyLibraryFile <FileName or auto>] -l | --list
1214 RDKitFilterTorsionStrainEnergyAlerts.py -h | --help | -e | --examples
1215
1216 Description:
1217 Filter strained molecules from an input file for torsion strain energy library
1218 [ Ref 153 ] alerts by matching rotatable bonds against SMARTS patterns specified
1219 for torsion rules in a torsion energy library file and write out appropriate
1220 molecules to output files. The molecules must have 3D coordinates in input file.
1221 The default torsion strain energy library file, TorsionStrainEnergyLibrary.xml,
1222 is available under MAYACHEMTOOLS/lib/python/TorsionAlerts directory.
1223
1224 The data in torsion strain energy library file is organized in a hierarchical
1225 manner. It consists of one generic class and six specific classes at the highest
1226 level. Each class contains multiple subclasses corresponding to named functional
1227 groups or substructure patterns. The subclasses consist of torsion rules sorted
1228 from specific to generic torsion patterns. The torsion rule, in turn, contains a
1229 list of peak values for torsion angles and two tolerance values. A pair of tolerance
1230 values define torsion bins around a torsion peak value.
1231
1232 A strain energy calculation method, 'exact' or 'approximate' [ Ref 153 ], is
1233 associated with each torsion rule for calculating torsion strain energy. The 'exact'
1234 stain energy calculation relies on the energy bins available under the energy histogram
1235 consisting of 36 bins covering angles from -180 to 180. The width of each bin is 10
1236 degree. The energy bins are are defined at the right end points. The first and the
1237 last energy bins correspond to -170 and 180 respectively. The torsion angle is mapped
1238 to a energy bin. An angle offset is calculated for the torsion angle from the the right
1239 end point angle of the bin. The strain energy is estimated for the angle offset based
1240 on the energy difference between the current and previous bins. The torsion strain
1241 energy, in terms of torsion energy units (TEUs), corresponds to the sum of bin strain
1242 energy and the angle offset strain energy.
1243
1244 Energy = BinEnergyDiff/10.0 * BinAngleOffset + BinEnergy[BinNum]
1245
1246 Where:
1247
1248 BinEnergyDiff = BinEnergy[BinNum] - BinEnergy[PreviousBinNum]
1249 BinAngleOffset = TorsionAngle - BinAngleRightSide
1250
1251 The 'approximate' strain energy calculation relies on the angle difference between a
1252 torsion angle and the torsion peaks observed for the torsion rules in the torsion
1253 energy library. The torsion angle is matched to a torsion peak based on the value of
1254 torsion angle difference. It must be less than or equal to the value for the second
1255 tolerance 'tolerance2'. Otherwise, the torsion angle is not observed in the torsion
1256 energy library and a value of 'NA' is assigned for torsion energy along with the lower
1257 and upper bounds on energy at 95% confidence interval. The 'approximate' torsion
1258 energy (TEUs) for observed torsion angle is calculated using the following formula:
1259
1260 Energy = beta_1 * (AngleDiff ** 2) + beta_2 * (AngleDiff ** 4)
1261
1262 The coefficients 'beta_1' and 'beta_2' are available for the observed angles in
1263 the torsion strain energy library. The 'AngleDiff' is the difference between the
1264 torsion angle and the matched torsion peak.
1265
1266 For example:
1267
1268 <library>
1269 <hierarchyClass id1="G" id2="G" name="GG">
1270 ...
1271 </hierarchyClass>
1272 <hierarchyClass id1="C" id2="O" name="CO">
1273 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]">
1274 <torsionRule method="exact" smarts=
1275 "[O:1]=[C:2]!@[O:3]~[CH0:4]">
1276 <angleList>
1277 <angle score="56.52" tolerance1="20.00"
1278 tolerance2="25.00" value="0.0"/>
1279 </angleList>
1280 <histogram>
1281 <bin count="1"/>
1282 ...
1283 </histogram>
1284 <histogram_shifted>
1285 <bin count="0"/>
1286 ...
1287 </histogram_shifted>
1288 <histogram_converted>
1289 <bin energy="4.67... lower="2.14..." upper="Inf"/>
1290 ...
1291 <bin energy="1.86..." lower="1.58..." upper="2.40..."/>
1292 ...
1293 </histogram_converted>
1294 </torsionRule>
1295 <torsionRule method="approximate" smarts=
1296 "[cH0:1][c:2]([cH0])!@[O:3][p:4]">
1297 <angleList>
1298 <angle beta_1="0.002..." beta_2="-7.843...e-07"
1299 score="27.14" theta_0="-90.0" tolerance1="30.00"
1300 tolerance2="45.00" value="-90.0"/>
1301 ...
1302 </angleList>
1303 <histogram>
1304 <bin count="0"/>
1305 ...
1306 </histogram>
1307 <histogram_shifted>
1308 <bin count="0"/>
1309 ...
1310 </histogram_shifted>
1311 </torsionRule>
1312 ...
1313 ...
1314 </hierarchyClass>
1315 <hierarchyClass id1="N" id2="C" name="NC">
1316 ...
1317 </hierarchyClass>
1318 <hierarchyClass id1="S" id2="N" name="SN">
1319 ...
1320 </hierarchyClass>
1321 <hierarchyClass id1="C" id2="S" name="CS">
1322 ...
1323 </hierarchyClass>
1324 <hierarchyClass id1="C" id2="C" name="CC">
1325 ...
1326 </hierarchyClass>
1327 <hierarchyClass id1="S" id2="S" name="SS">
1328 ...
1329 </hierarchyClass>
1330 </library>
1331
1332 The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern.
1333 A custom SMARTS pattern may be optionally specified to detect rotatable bonds.
1334 Each rotatable bond is matched to a torsion rule in the torsion strain energy library.
1335 The strain energy is calculated for each rotatable bond using the calculation
1336 method, 'exact' or 'approximate', associated with the matched torsion rule.
1337
1338 The total strain energy (TEUs) of a molecule corresponds to the sum of 'exact' and
1339 'approximate' strain energies calculated for all matched rotatable bonds in the
1340 molecule. The total strain energy is set to 'NA' for molecules containing a 'approximate'
1341 energy estimate for a torsion angle not observed in the torsion energy library. In
1342 addition, the lower and upper bounds on energy at 95% confidence interval are
1343 set to 'NA'.
1344
1345 The following output files are generated after the filtering:
1346
1347 <OutfileRoot>.sdf
1348 <OutfileRoot>_Filtered.sdf
1349 <OutfileRoot>_AlertsSummary.csv
1350 <OutfileRoot>_Filtered_TopRule*.sdf
1351
1352 The last two set of outfile files, <OutfileRoot>_AlertsSummary.csv and
1353 <OutfileRoot>_<OutfileRoot>_AlertsSummary.csv, are only generated during filtering
1354 by 'MaxSingleEnergy'.
1355
1356 The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
1357
1358 The supported output file formats are: SD (.sdf, .sd)
1359
1360 Options:
1361 -a, --alertsMode <TotalEnergy,...> [default: TotalEnergy]
1362 Torsion strain energy library alert types to use for filtering molecules
1363 containing rotatable bonds based on the calculated values for the total
1364 torsion strain energy of a molecule and the maximum single strain
1365 energy of a rotatable bond in a molecule.
1366
1367 Possible values: TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy
1368
1369 The strain energy cutoff values in terms of torsion energy units (TEUs) are
1370 used to filter molecules as shown below:
1371
1372 AlertsMode AlertsEnergyCutoffs (TEUs)
1373
1374 TotalEnergy >= TotalEnergyCutoff
1375
1376 MaxSingleEnergy >= MaxSingleEnergyCutoff
1377
1378 TotalOrMaxSingleEnergy >= TotalEnergyCutoff
1379 or >= MaxSingleEnergyCutoff
1380
1381 --alertsMaxSingleEnergyCutoff <Number> [default: 1.8]
1382 Maximum single strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules
1383 based on the maximum value of a single strain energy of a rotatable bond
1384 in a molecule. This option is used during 'MaxSingleEnergy' or
1385 'TotalOrMaxSingleEnergy' values of '-a, --alertsMode' option.
1386
1387 The maximum single strain energy must be greater than or equal to the
1388 specified cutoff value for filtering molecules.
1389 --alertsTotalEnergyCutoff <Number> [default: 6.0]
1390 Total strain strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules
1391 based on total strain energy for all rotatable bonds in a molecule. This
1392 option is used during 'TotalEnergy' or 'TotalOrMaxSingleEnergy'
1393 values of '-a, --alertsMode' option.
1394
1395 The total strain energy must be greater than or equal to the specified
1396 cutoff value for filtering molecules.
1397 --filterTorsionsNotObserved <yes or no> [default: no]
1398 Filter molecules containing torsion angles not observed in torsion strain
1399 energy library. It's not possible to calculate torsion strain energies for
1400 these torsions during 'approximate' match to a specified torsion in the
1401 library.
1402
1403 The 'approximate' strain energy calculation relies on the angle difference
1404 between a torsion angle and the torsion peaks observed for the torsion
1405 rules in the torsion energy library. The torsion angle is matched to a
1406 torsion peak based on the value of torsion angle difference. It must be
1407 less than or equal to the value for the second tolerance 'tolerance2'.
1408 Otherwise, the torsion angle is not observed in the torsion energy library
1409 and a value of 'NA' is assigned for torsion energy along with the lower and
1410 upper bounds on energy at 95% confidence interval.
1411 -e, --examples
1412 Print examples.
1413 -h, --help
1414 Print this help message.
1415 -i, --infile <infile>
1416 Input file name.
1417 --infileParams <Name,Value,...> [default: auto]
1418 A comma delimited list of parameter name and value pairs for reading
1419 molecules from files. The supported parameter names for different file
1420 formats, along with their default values, are shown below:
1421
1422 SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
1423
1424 -l, --list
1425 List torsion library information without performing any filtering.
1426 -m, --mode <filter or count> [default: filter]
1427 Specify whether to filter molecules for torsion strain energy library [ Ref 153 ]
1428 alerts by matching rotatable bonds against SMARTS patterns specified for
1429 torsion rules to calculate torsion strain energies and write out the rest
1430 of the molecules to an outfile or simply count the number of matched
1431 molecules marked for filtering.
1432 --mp <yes or no> [default: no]
1433 Use multiprocessing.
1434
1435 By default, input data is retrieved in a lazy manner via mp.Pool.imap()
1436 function employing lazy RDKit data iterable. This allows processing of
1437 arbitrary large data sets without any additional requirements memory.
1438
1439 All input data may be optionally loaded into memory by mp.Pool.map()
1440 before starting worker processes in a process pool by setting the value
1441 of 'inputDataMode' to 'InMemory' in '--mpParams' option.
1442
1443 A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
1444 data mode may adversely impact the performance. The '--mpParams' section
1445 provides additional information to tune the value of 'chunkSize'.
1446 --mpParams <Name,Value,...> [default: auto]
1447 A comma delimited list of parameter name and value pairs to configure
1448 multiprocessing.
1449
1450 The supported parameter names along with their default and possible
1451 values are shown below:
1452
1453 chunkSize, auto
1454 inputDataMode, Lazy [ Possible values: InMemory or Lazy ]
1455 numProcesses, auto [ Default: mp.cpu_count() ]
1456
1457 These parameters are used by the following functions to configure and
1458 control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
1459 mp.Pool.imap().
1460
1461 The chunkSize determines chunks of input data passed to each worker
1462 process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
1463 The default value of chunkSize is dependent on the value of 'inputDataMode'.
1464
1465 The mp.Pool.map() function, invoked during 'InMemory' input data mode,
1466 automatically converts RDKit data iterable into a list, loads all data into
1467 memory, and calculates the default chunkSize using the following method
1468 as shown in its code:
1469
1470 chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
1471 if extra: chunkSize += 1
1472
1473 For example, the default chunkSize will be 7 for a pool of 4 worker processes
1474 and 100 data items.
1475
1476 The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
1477 'lazy' RDKit data iterable to retrieve data as needed, without loading all the
1478 data into memory. Consequently, the size of input data is not known a priori.
1479 It's not possible to estimate an optimal value for the chunkSize. The default
1480 chunkSize is set to 1.
1481
1482 The default value for the chunkSize during 'Lazy' data mode may adversely
1483 impact the performance due to the overhead associated with exchanging
1484 small chunks of data. It is generally a good idea to explicitly set chunkSize to
1485 a larger value during 'Lazy' input data mode, based on the size of your input
1486 data and number of processes in the process pool.
1487
1488 The mp.Pool.map() function waits for all worker processes to process all
1489 the data and return the results. The mp.Pool.imap() function, however,
1490 returns the the results obtained from worker processes as soon as the
1491 results become available for specified chunks of data.
1492
1493 The order of data in the results returned by both mp.Pool.map() and
1494 mp.Pool.imap() functions always corresponds to the input data.
1495 -o, --outfile <outfile>
1496 Output file name.
1497 --outfileAlerts <yes or no> [default: yes]
1498 Write out alerts information to SD output files.
1499 --outfileAlertsMode <All or AlertsOnly> [default: AlertsOnly]
1500 Write alerts information to SD output files for all alerts or only for alerts
1501 specified by '--AlertsMode' option. Possible values: All or AlertsOnly
1502 This option is only valid for 'Yes' value of '--outfileAlerts' option.
1503
1504 The following alerts information is added to SD output files using
1505 'TorsionAlerts' data field:
1506
1507 RotBondIndices TorsionIndices TorsionAngle
1508 Energy EnergyLowerBoundCI EnergyUpperBoundCI
1509 HierarchyClass HierarchySubClass TorsionRule
1510 EnergyMethod AngleNotObserved MaxSingleEnergyAlert
1511
1512 The following data filelds are added to SD output files based on the value of
1513 '--AlertsMode' option:
1514
1515 TotalEnergy
1516 TotalEnergyLowerBoundCI
1517 TotalEnergyUpperBoundCI
1518
1519 MaxSingleEnergy
1520 MaxSingleEnergyAlertsCount
1521
1522 AnglesNotObservedCount
1523
1524 The 'RotBondsCount' is always added to SD output files containing both
1525 remaining and filtered molecules.
1526
1527 Format:
1528
1529 > <RotBondsCount>
1530 Number
1531
1532 > <TotalEnergy>
1533 Number
1534
1535 > <TotalEnergyLowerBoundCI>
1536 Number
1537
1538 > <TotalEnergyUpperBoundCI>
1539 Number
1540
1541 > <MaxSingleEnergy>
1542 Number
1543
1544 > <MaxSingleEnergyAlertsCount>
1545 Number
1546
1547 > <AnglesNotObservedCount>
1548 Number
1549
1550 > <TorsionAlerts (RotBondIndices TorsionIndices TorsionAngle
1551 Energy EnergyLowerBoundCI EnergyUpperBoundCI
1552 HierarchyClass HierarchySubClass TorsionRule
1553 EnergyMethod AngleNotObserved MaxSingleEnergyAlert)>
1554 AtomIndex2,AtomIndex3 AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4
1555 Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI
1556 ClassName SubClassName SMARTS EnergyMethod Yes|No|NA Yes|No|NA
1557 ... ... ...
1558 ... ... ...
1559
1560 A set of 12 values is written out as value of 'TorsionAlerts' data field for
1561 each torsion in a molecule. The space character is used as a delimiter
1562 to separate values with in a set and across set. The comma character
1563 is used to delimit multiple values for each value in a set.
1564
1565 The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited
1566 values representing atom indices for a rotatable bond and the matched
1567 torsion.
1568
1569 The 'Energy' value is the estimated strain energy for the matched torsion.
1570 The 'EnergyLowerBoundCI' and 'EnergyUpperBoundCI' represent lower and
1571 bound energy estimates at 95% confidence interval. The 'EnergyMethod',
1572 exact or approximate, corresponds to the method employed to estimate
1573 torsion strain energy.
1574
1575 The 'AngleNotObserved' is only valid for 'approximate' value of 'EnergyMethod'.
1576 It has three possible values: Yes, No, or NA. The 'Yes' value indicates that
1577 the 'TorsionAngle' is outside the 'tolerance2' of all peaks for the matched
1578 torsion rule in the torsion library.
1579
1580 The 'MaxSingleEnergyAlert' is valid for the following values of '-a, --alertsMode'
1581 option: 'MaxSingleEnergy' or 'TotalOrMaxSingleEnergy'. It has three possible
1582 values: Yes, No, or NA. It's set to 'NA' for 'Yes' or 'NA' values of
1583 'AngleNotObserved'. The 'Yes' value indicates that the estimated torsion
1584 energy is greater than the specified value for '--alertsMaxSingleEnergyCutoff'
1585 option.
1586
1587 For example:
1588
1589 > <RotBondsCount> (1)
1590 14
1591
1592 > <TotalEnergy> (1)
1593 6.8065
1594
1595 > <TotalEnergyLowerBoundCI> (1)
1596 5.9340
1597
1598 > <TotalEnergyUpperBoundCI> (1)
1599 NA
1600
1601 > <MaxSingleEnergy> (1)
1602 1.7108
1603
1604 > <MaxSingleEnergyAlertsCount> (1)
1605 0
1606
1607 > <AnglesNotObservedCount> (1)
1608 0
1609
1610 > <TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy
1611 EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass
1612 HierarchySubClass TorsionRule EnergyMethod AngleNotObserved
1613 MaxSingleEnergyAlert)> (1)
1614 2,1 48,2,1,0 61.90 0.0159 -0.0320 0.0674 CO Ether [O:1][CX4:2]!
1615 @[O:3][CX4:4] Exact NA No 2,3 1,2,3,4 109.12 1.5640 1.1175 NA CC
1616 None/[CX4][CX3] [O:1][CX4:2]!@[CX3:3]=[O:4] Exact NA No
1617 ... ... ...
1618
1619 --outfileFiltered <yes or no> [default: yes]
1620 Write out a file containing filtered molecules. Its name is automatically
1621 generated from the specified output file. Default: <OutfileRoot>_
1622 Filtered.<OutfileExt>.
1623 --outfilesFilteredByRules <yes or no> [default: auto]
1624 Write out SD files containing filtered molecules for individual torsion
1625 rules triggering alerts in molecules. The name of SD files are automatically
1626 generated from the specified output file. Default file names: <OutfileRoot>_
1627 Filtered_TopRule*.sdf.
1628
1629 Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option';
1630 otherwise, 'no'.
1631
1632 The output files are only generated for 'MaxSingleEnergy' of
1633 '-a, --alertsMode' option.
1634
1635 The following alerts information is added to SD output files:
1636
1637 > <RotBondsCount>
1638 Number
1639
1640 > <TotalEnergy>
1641 Number
1642
1643 > <TotalEnergyLowerBoundCI>
1644 Number
1645
1646 > <TotalEnergyUpperBoundCI>
1647 Number
1648
1649 > <MaxSingleEnergy>
1650 Number
1651
1652 > <MaxSingleEnergyAlertsCount>
1653 Number
1654
1655 > <AnglesNotObservedCount>
1656 Number
1657
1658 > <TorsionRule (HierarchyClass HierarchySubClass TorsionRule
1659 EnergyMethod)>
1660 ClassName SubClassName EnergyMethod SMARTS
1661 ... ... ...
1662
1663 > <TorsionRuleMaxSingleEnergyAlertsCount>
1664 Number
1665
1666 > <TorsionRuleAnglesNotObservedCount>
1667 Number
1668
1669 > <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle
1670 Energy EnergyLowerBoundCI EnergyUpperBoundCI
1671 AngleNotObserved MaxSingleEnergyAlert)>
1672 AtomIndex2,AtomIndex3 AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4
1673 Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod
1674 Yes|No|NA Yes|No|NA
1675 ... ... ...
1676
1677 For example:
1678
1679 > <RotBondsCount> (1)
1680 8
1681
1682 > <TotalEnergy> (1)
1683 6.1889
1684
1685 > <TotalEnergyLowerBoundCI> (1)
1686 5.1940
1687
1688 > <TotalEnergyUpperBoundCI> (1)
1689 NA
1690
1691 > <MaxSingleEnergy> (1)
1692 1.9576
1693
1694 > <MaxSingleEnergyAlertsCount> (1)
1695 1
1696
1697 > <AnglesNotObservedCount> (1)
1698 0
1699
1700 > <TorsionRule (HierarchyClass HierarchySubClass TorsionRule
1701 EnergyMethod)> (1)
1702 CC None/[CX4:2][CX4:3] [!#1:1][CX4:2]!@[CX4:3][!#1:4] Exact
1703
1704 > <TorsionRuleMaxSingleEnergyAlertsCount> (1)
1705 0
1706
1707 > <TorsionRuleAnglesNotObservedCount> (1)
1708 0
1709
1710 > <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle
1711 Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved
1712 MaxSingleEnergyAlert)> (1)
1713 1,3 0,1,3,4 72.63 0.8946 0.8756 0.9145 NA No
1714
1715 --outfilesFilteredByRulesMaxCount <All or number> [default: 10]
1716 Write out SD files containing filtered molecules for specified number of
1717 top N torsion rules triggering alerts for the largest number of molecules
1718 or for all torsion rules triggering alerts across all molecules.
1719
1720 These output files are only generated for 'MaxSingleEnergy' value of
1721 '-a, --alertsMode' option.
1722 --outfileSummary <yes or no> [default: auto]
1723 Write out a CVS text file containing summary of torsions rules responsible
1724 for triggering torsion alerts. Its name is automatically generated from the
1725 specified output file. Default: <OutfileRoot>_AlertsSummary.csv.
1726
1727 Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option';
1728 otherwise, 'no'.
1729
1730 The summary output file is only generated for 'MaxSingleEnergy' of
1731 '-a, --alertsMode' option.
1732
1733 The following alerts information is written to summary text file:
1734
1735 TorsionRule, HierarchyClass, HierarchySubClass, EnergyMethod,
1736 MaxSingleEnergyTorsionAlertTypes, MaxSingleEnergyTorsionAlertCount,
1737 MaxSingleEnergyTorsionAlertMolCount
1738
1739 The double quotes characters are removed from SMART patterns before
1740 before writing them to a CSV file. In addition, the torsion rules are sorted by
1741 TorsionAlertMolCount.
1742 --outfileSDFieldLabels <Type,Label,...> [default: auto]
1743 A comma delimited list of SD data field type and label value pairs for writing
1744 torsion alerts information along with molecules to SD files.
1745
1746 The supported SD data field label type along with their default values are
1747 shown below:
1748
1749 For all SD files:
1750
1751 RotBondsCountLabel, RotBondsCount,
1752
1753 TotalEnergyLabel, TotalEnergy,
1754 TotalEnergyLowerBoundCILabel, TotalEnergyLowerBoundCI,
1755 TotalEnergyUpperBoundCILabel, TotalEnergyUpperBoundCI,
1756
1757 MaxSingleEnergyLabel, MaxSingleEnergy,
1758 MaxSingleEnergyAlertsCountLabel,
1759 MaxSingleEnergyAlertsCount
1760
1761 AnglesNotObservedCountLabel,
1762 AnglesNotObservedCount
1763
1764 TorsionAlertsLabel, TorsionAlerts(RotBondIndices TorsionIndices
1765 TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI
1766 HierarchyClass HierarchySubClass TorsionRule
1767 EnergyMethod AngleNotObserved)
1768
1769 For individual SD files filtered by torsion rules:
1770
1771 TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass
1772 EnergyMethod TorsionRule)
1773 TorsionRuleMaxSingleEnergyAlertsCountLabel,
1774 TorsionRuleMaxSingleEnergyAlertsCount,
1775 TorsionRuleAnglesNotObservedCountLabel,
1776 TorsionRuleAnglesNotObservedCount,
1777 TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices
1778 TorsionIndices TorsionAngle Energy EnergyLowerBoundCI
1779 EnergyUpperBoundCI EnergyMethod AngleObserved)
1780
1781 --outfileParams <Name,Value,...> [default: auto]
1782 A comma delimited list of parameter name and value pairs for writing
1783 molecules to files. The supported parameter names for different file
1784 formats, along with their default values, are shown below:
1785
1786 SD: kekulize,yes,forceV3000,no
1787
1788 --overwrite
1789 Overwrite existing files.
1790 --precision <number> [default: 4]
1791 Floating point precision for writing torsion strain energy values.
1792 -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...> [default: SemiStrict]
1793 SMARTS pattern to use for identifying rotatable bonds in a molecule
1794 for matching against torsion rules in the torsion library. Possible values:
1795 NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches
1796 are filtered to ensure that each atom in the rotatable bond is attached to
1797 at least two heavy atoms.
1798
1799 The following SMARTS patterns are used to identify rotatable bonds for
1800 different modes:
1801
1802 NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
1803
1804 SemiStrict:
1805 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
1806 &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
1807 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
1808
1809 Strict:
1810 [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
1811 &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
1812 &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
1813 &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
1814 &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
1815
1816 The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The
1817 'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
1818 specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is
1819 derived from 'Strict' SMARTS pattern for its usage in this script.
1820
1821 You may use any arbitrary SMARTS pattern to identify rotatable bonds by
1822 choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its
1823 value via '--rotBondsSMARTSPattern' option.
1824 --rotBondsSMARTSPattern <SMARTS>
1825 SMARTS pattern for identifying rotatable bonds. This option is only valid
1826 for 'Specify' value of '-r, --rotBondsSMARTSMode' option.
1827 -t, --torsionEnergyLibraryFile <FileName or auto> [default: auto]
1828 Specify a XML file name containing data for torsion starin energy library
1829 hierarchy or use default file, TorsionEnergyLibrary.xml, available in
1830 MAYACHEMTOOLS/lib/Python/TorsionAlerts directory.
1831
1832 The format of data in local XML file must match format of the data in Torsion
1833 Library [ Ref 153 ] file available in MAYACHEMTOOLS directory.
1834 -w, --workingdir <dir>
1835 Location of working directory which defaults to the current directory.
1836
1837 Examples:
1838 To filter molecules containing rotatable bonds with total strain energy value
1839 of >= 6.0 (TEUs) based on torsion rules in the torsion energy library and write
1840 write out SD files containing remaining and filtered molecules, type:
1841
1842 % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf
1843 -o Sample3DOut.sdf
1844
1845 To filter molecules containing any rotatable bonds with strain energy value of
1846 >= 1.8 (TEUs) based on torsion rules in the torsion energy library and write out
1847 SD files containing remaining and filtered molecules, and individual SD files for
1848 torsion rules triggering alerts along with appropriate torsion information for
1849 red alerts, type:
1850
1851 % RDKitFilterTorsionStrainEnergyAlerts.py -a MaxSingleEnergy
1852 -i Sample3D.sdf -o Sample3DOut.sdf
1853
1854 To filter molecules containing rotatable bonds with total strain energy value
1855 of >= 6.0 (TEUs) or any single strain energy value of >= 1.8 (TEUs) and write out
1856 SD files containing remaining and filtered molecules, type:
1857
1858 % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy
1859 -i Sample3D.sdf -o Sample3DOut.sdf
1860
1861 To filter molecules containing rotatable bonds with specific cutoff values for
1862 total or single torsion strain energy and write out SD files containing
1863 remaining and filtered molecules, type:
1864
1865 % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy
1866 -i Sample3D.sdf -o Sample3DOut.sdf --alertsTotalEnergyCutoff 6.0
1867 --alertsMaxSingleEnergyCutoff 1.8
1868
1869 To run the first example for filtering molecules and writing out torsion
1870 information for all alert types to SD files, type:
1871
1872 % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf
1873 -o Sample3DOut.sdf --outfileAlertsMode All
1874
1875 To run the first example for filtering molecules in multiprocessing mode on
1876 all available CPUs without loading all data into memory and write out SD files,
1877 type:
1878
1879 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes -i Sample3D.sdf
1880 -o Sample3DOut.sdf
1881
1882 To run the first example for filtering molecules in multiprocessing mode on
1883 all available CPUs by loading all data into memory and write out a SD files,
1884 type:
1885
1886 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams
1887 "inputDataMode, InMemory" -i Sample3D.sdf -o Sample3DOut.sdf
1888
1889 To run the first example for filtering molecules in multiprocessing mode on
1890 specific number of CPUs and chunksize without loading all data into memory
1891 and write out SD files, type:
1892
1893 % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams
1894 "inputDataMode,lazy,numProcesses,4,chunkSize,8" -i Sample3D.sdf
1895 -o Sample3DOut.sdf
1896
1897 To list information about default torsion library file without performing any
1898 filtering, type:
1899
1900 % RDKitFilterTorsionStrainEnergyAlerts.py -l
1901
1902 To list information about a local torsion library XML file without performing
1903 any, filtering, type:
1904
1905 % RDKitFilterTorsionStrainEnergyAlerts.py --torsionEnergyLibraryFile
1906 TorsionStrainEnergyLibrary.xml -l
1907
1908 Author:
1909 Manish Sud (msud@san.rr.com)
1910
1911 Collaborator:
1912 Pat Walters
1913
1914 See also:
1915 RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionLibraryAlerts.py,
1916 RDKitConvertFileFormat.py, RDKitSearchSMARTS.py
1917
1918 Copyright:
1919 Copyright (C) 2025 Manish Sud. All rights reserved.
1920
1921 This script uses the torsion strain energy library developed by Gu, S.;
1922 Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ].
1923
1924 The torsion strain enegy library is based on the Torsion Library jointly
1925 developed by the University of Hamburg, Center for Bioinformatics,
1926 Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
1927
1928 The functionality available in this script is implemented using RDKit, an
1929 open source toolkit for cheminformatics developed by Greg Landrum.
1930
1931 This file is part of MayaChemTools.
1932
1933 MayaChemTools is free software; you can redistribute it and/or modify it under
1934 the terms of the GNU Lesser General Public License as published by the Free
1935 Software Foundation; either version 3 of the License, or (at your option) any
1936 later version.
1937
1938 """
1939
1940 if __name__ == "__main__":
1941 main()