MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitFilterTorsionStrainEnergyAlerts.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Collaborator: Pat Walters
    7 #
    8 # Copyright (C) 2024 Manish Sud. All rights reserved.
    9 #
   10 # This script uses the torsion strain energy library developed by Gu, S.;
   11 # Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ].
   12 #
   13 # The torsion strain enegy library is based on the Torsion Library jointly
   14 # developed by the University of Hamburg, Center for Bioinformatics,
   15 # Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
   16 #
   17 # The functionality available in this script is implemented using RDKit, an
   18 # open source toolkit for cheminformatics developed by Greg Landrum.
   19 #
   20 # This file is part of MayaChemTools.
   21 #
   22 # MayaChemTools is free software; you can redistribute it and/or modify it under
   23 # the terms of the GNU Lesser General Public License as published by the Free
   24 # Software Foundation; either version 3 of the License, or (at your option) any
   25 # later version.
   26 #
   27 # MayaChemTools is distributed in the hope that it will be useful, but without
   28 # any warranty; without even the implied warranty of merchantability of fitness
   29 # for a particular purpose.  See the GNU Lesser General Public License for more
   30 # details.
   31 #
   32 # You should have received a copy of the GNU Lesser General Public License
   33 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   34 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   35 # Boston, MA, 02111-1307, USA.
   36 #
   37 
   38 from __future__ import print_function
   39 
   40 # Add local python path to the global path and import standard library modules...
   41 import os
   42 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   43 import time
   44 import re
   45 import glob
   46 import multiprocessing as mp
   47 import math
   48 
   49 # RDKit imports...
   50 try:
   51     from rdkit import rdBase
   52     from rdkit import Chem
   53     from rdkit.Chem import rdMolTransforms
   54 except ImportError as ErrMsg:
   55     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   56     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   57     sys.exit(1)
   58 
   59 # MayaChemTools imports...
   60 try:
   61     from docopt import docopt
   62     import MiscUtil
   63     import RDKitUtil
   64     from TorsionAlerts.TorsionStrainEnergyAlerts import TorsionStrainEnergyAlerts
   65 except ImportError as ErrMsg:
   66     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   67     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   68     sys.exit(1)
   69 
   70 ScriptName = os.path.basename(sys.argv[0])
   71 Options = {}
   72 OptionsInfo = {}
   73 
   74 def main():
   75     """Start execution of the script."""
   76     
   77     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   78     
   79     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   80     
   81     # Retrieve command line arguments and options...
   82     RetrieveOptions()
   83     
   84     if  Options["--list"]:
   85         # Handle listing of torsion library information...
   86         ProcessListTorsionLibraryOption()
   87     else:
   88         # Process and validate command line arguments and options...
   89         ProcessOptions()
   90         
   91         # Perform actions required by the script...
   92         PerformFiltering()
   93     
   94     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   95     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   96 
   97 def PerformFiltering():
   98     """Filter molecules using SMARTS torsion rules in the torsion strain energy
   99     library file."""
  100 
  101     # Setup a molecule reader...
  102     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  103     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  104     
  105     MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols)
  106 
  107     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  108     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  109     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  110     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount))
  111 
  112     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
  113     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
  114 
  115 def ProcessMolecules(Mols):
  116     """Process and filter molecules."""
  117     
  118     if OptionsInfo["MPMode"]:
  119         return ProcessMoleculesUsingMultipleProcesses(Mols)
  120     else:
  121         return ProcessMoleculesUsingSingleProcess(Mols)
  122 
  123 def ProcessMoleculesUsingSingleProcess(Mols):
  124     """Process and filter molecules using a single process."""
  125     
  126     # Instantiate torsion strain energy alerts class...
  127     TorsionStrainEnergyAlertsHandle = InstantiateTorsionStrainEnergyAlertsClass()
  128     
  129     MiscUtil.PrintInfo("\nFiltering molecules...")
  130     
  131     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  132 
  133     # Set up writers...
  134     OutfilesWriters = SetupOutfilesWriters()
  135     
  136     WriterRemaining = OutfilesWriters["WriterRemaining"]
  137     WriterFiltered = OutfilesWriters["WriterFiltered"]
  138     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  139     
  140     # Initialize alerts summary info...
  141     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  142 
  143     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  144     for Mol in Mols:
  145         MolCount += 1
  146         
  147         if Mol is None:
  148             continue
  149         
  150         if RDKitUtil.IsMolEmpty(Mol):
  151             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount))
  152             continue
  153 
  154         # Check for 3D flag...
  155         if not Mol.GetConformer().Is3D():
  156             MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount))
  157             continue
  158         
  159         ValidMolCount += 1
  160         
  161         # Identify torsion library alerts for rotatable bonds..
  162         RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionStrainEnergyAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  163 
  164         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  165         
  166         # Write out filtered and remaining molecules...
  167         WriteStatus = True
  168         if RotBondsAlertsStatus:
  169             if OutfileFilteredMode:
  170                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  171                 if WriteStatus:
  172                     FilteredMolWriteCount += 1
  173         else:
  174             RemainingMolCount += 1
  175             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  176         
  177         if not WriteStatus:
  178             WriteFailedCount += 1
  179 
  180     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  181     CloseOutfilesWriters(OutfilesWriters)
  182 
  183     if FilteredMolWriteCount:
  184         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  185     
  186     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  187 
  188 def ProcessMoleculesUsingMultipleProcesses(Mols):
  189     """Process and filter molecules using multiprocessing."""
  190     
  191     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
  192     
  193     MPParams = OptionsInfo["MPParams"]
  194     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  195 
  196     # Instantiate torsion strain energy alerts class to list torsion library information...
  197     TorsionStrainEnergyAlertsHandle = InstantiateTorsionStrainEnergyAlertsClass()
  198     
  199     # Set up writers...
  200     OutfilesWriters = SetupOutfilesWriters()
  201     
  202     WriterRemaining = OutfilesWriters["WriterRemaining"]
  203     WriterFiltered = OutfilesWriters["WriterFiltered"]
  204     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  205     
  206     # Initialize alerts summary info...
  207     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  208 
  209     # Setup data for initializing a worker process...
  210     MiscUtil.PrintInfo("\nEncoding options info and rotatable bond pattern molecule...")
  211     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  212 
  213     # Setup a encoded mols data iterable for a worker process...
  214     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  215     
  216     # Setup process pool along with data initialization for each process...
  217     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  218     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  219     
  220     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  221     
  222     # Start processing...
  223     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  224         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  225     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  226         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  227     else:
  228         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  229     
  230     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  231     for Result in Results:
  232         MolCount += 1
  233         MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result
  234         
  235         if EncodedMol is None:
  236             continue
  237         ValidMolCount += 1
  238         
  239         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  240         
  241         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  242         
  243         # Write out filtered and remaining molecules...
  244         WriteStatus = True
  245         if RotBondsAlertsStatus:
  246             if OutfileFilteredMode:
  247                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  248                 if WriteStatus:
  249                     FilteredMolWriteCount += 1
  250         else:
  251             RemainingMolCount += 1
  252             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  253 
  254         if not WriteStatus:
  255             WriteFailedCount += 1
  256     
  257     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  258     CloseOutfilesWriters(OutfilesWriters)
  259 
  260     if FilteredMolWriteCount:
  261         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  262     
  263     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  264 
  265 def InitializeWorkerProcess(*EncodedArgs):
  266     """Initialize data for a worker process."""
  267     
  268     global Options, OptionsInfo
  269     
  270     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  271     
  272     # Decode Options and OptionInfo...
  273     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  274     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  275 
  276     # Instantiate torsion strain energy alerts class...
  277     OptionsInfo["TorsionStrainEnergyAlertsHandle"] = InstantiateTorsionStrainEnergyAlertsClass(Quiet = True)
  278 
  279 def WorkerProcess(EncodedMolInfo):
  280     """Process data for a worker process."""
  281 
  282     MolIndex, EncodedMol = EncodedMolInfo
  283     
  284     if EncodedMol is None:
  285         return [MolIndex, None, False, None]
  286     
  287     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  288     if RDKitUtil.IsMolEmpty(Mol):
  289         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  290         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  291         return [MolIndex, None, False, None]
  292         
  293     # Check for 3D flag...
  294     if not Mol.GetConformer().Is3D():
  295         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  296         MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName)
  297         return [MolIndex, None, False, None]
  298     
  299     # Identify torsion library alerts for rotatable bonds..
  300     TorsionStrainEnergyAlertsHandle = OptionsInfo["TorsionStrainEnergyAlertsHandle"]
  301     RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionStrainEnergyAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  302 
  303     return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo]
  304     
  305 def InitializeTorsionAlertsSummaryInfo():
  306     """Initialize torsion alerts summary."""
  307 
  308     if OptionsInfo["CountMode"]:
  309         return None
  310     
  311     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  312         return None
  313     
  314     TorsionAlertsSummaryInfo = {}
  315     TorsionAlertsSummaryInfo["RuleIDs"] = []
  316 
  317     for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "EnergyMethod", "MaxSingleEnergyAlertTypes", "MaxSingleEnergyAlertTypesMolCount"]:
  318         TorsionAlertsSummaryInfo[DataLabel] = {}
  319     
  320     return TorsionAlertsSummaryInfo
  321 
  322 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo):
  323     """Track torsion alerts summary information for matched torsion rules in a
  324     molecule."""
  325     
  326     if OptionsInfo["CountMode"]:
  327         return
  328     
  329     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  330         return
  331 
  332     if RotBondsAlertsInfo is None:
  333         return
  334     
  335     MolAlertsInfo = {}
  336     MolAlertsInfo["RuleIDs"] = []
  337     MolAlertsInfo["MaxSingleEnergyAlertTypes"] = {}
  338 
  339     for ID in RotBondsAlertsInfo["IDs"]:
  340         if not RotBondsAlertsInfo["MatchStatus"][ID]:
  341             continue
  342         
  343         if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
  344             continue
  345 
  346         MaxSingleEnergyAlertType = SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID])
  347         
  348         TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID]
  349         TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]
  350         
  351         # Track data for torsion alert summary information across molecules...
  352         if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]:
  353             TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID)
  354             TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID
  355             
  356             TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS
  357             TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassName"][ID]
  358             TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassName"][ID]
  359             
  360             TorsionAlertsSummaryInfo["EnergyMethod"][TorsionRuleNodeID] = RotBondsAlertsInfo["EnergyMethod"][ID]
  361             
  362             # Initialize number of alert types across all molecules...
  363             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {}
  364             
  365             # Initialize number of molecules flagged by each alert type...
  366             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID] = {}
  367         
  368         if MaxSingleEnergyAlertType not in TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
  369             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
  370             TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
  371         
  372         TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
  373         
  374         # Track data for torsion alert information in a molecule...
  375         if TorsionRuleNodeID not in MolAlertsInfo["MaxSingleEnergyAlertTypes"]:
  376             MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  377             MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID] = {}
  378         
  379         if MaxSingleEnergyAlertType not in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
  380             MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] = 0
  381         MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
  382     
  383     # Track number of molecules flagged by a specific torsion alert...
  384     for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]:
  385         for MaxSingleEnergyAlertType in MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID]:
  386             if MolAlertsInfo["MaxSingleEnergyAlertTypes"][TorsionRuleNodeID][MaxSingleEnergyAlertType]:
  387                 TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][TorsionRuleNodeID][MaxSingleEnergyAlertType] += 1
  388 
  389 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo):
  390     """Write out torsion alerts summary informatio to a CSV file."""
  391     
  392     if OptionsInfo["CountMode"]:
  393         return
  394     
  395     if not OptionsInfo["OutfileSummaryMode"]:
  396         return
  397 
  398     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  399         return
  400     
  401     # Write headers...
  402     QuoteValues = True
  403     Values = ["TorsionRule", "HierarchyClass", "HierarchySubClass", "EnergyMethod", "MaxSingleEnergyTorsionAlertTypes", "MaxSingleEnergyTorsionAlertCount", "MaxSingleEnergyTorsionAlertMolCount"]
  404     Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  405     
  406     SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  407     
  408     # Write alerts information...
  409     for ID in SortedRuleIDs:
  410         # Remove any double quotes in SMARTS...
  411         RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID]
  412         RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I)
  413         
  414         HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID]
  415         HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID]
  416         
  417         EnergyMethod = TorsionAlertsSummaryInfo["EnergyMethod"][ID]
  418         
  419         MaxSingleEnergyAlertTypes = []
  420         MaxSingleEnergyAlertTypesCount = []
  421         MaxSingleEnergyAlertTypesMolCount = []
  422         for MaxSingleEnergyAlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]):
  423             MaxSingleEnergyAlertTypes.append(MaxSingleEnergyAlertType)
  424             MaxSingleEnergyAlertTypesCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID][MaxSingleEnergyAlertType])
  425             MaxSingleEnergyAlertTypesMolCount.append("%s" % TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][MaxSingleEnergyAlertType])
  426         
  427         Values = [RuleSMARTS, HierarchyClassName, HierarchySubClassName, EnergyMethod, "%s" % MiscUtil.JoinWords(MaxSingleEnergyAlertTypes, ","), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesCount, ",")), "%s" % (MiscUtil.JoinWords(MaxSingleEnergyAlertTypesMolCount, ","))]
  428         Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  429 
  430 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo):
  431     """Sort torsion rule IDs by  alert types molecule count in descending order."""
  432 
  433     SortedRuleIDs = []
  434     
  435     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  436     if len(RuleIDs) == 0:
  437         return SortedRuleIDs
  438     
  439     # Setup a map from AlertTypesMolCount to IDs for sorting alerts...
  440     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  441     MolCountMap = {}
  442     for ID in RuleIDs:
  443         MolCount = 0
  444         for AlertType in sorted(TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypes"][ID]):
  445             MolCount += TorsionAlertsSummaryInfo["MaxSingleEnergyAlertTypesMolCount"][ID][AlertType]
  446         MolCountMap[ID] = MolCount
  447 
  448     SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True)
  449     
  450     return SortedRuleIDs
  451 
  452 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo):
  453     """Write out torsion alerts SD files for individual torsion rules."""
  454     
  455     if OptionsInfo["CountMode"]:
  456         return
  457     
  458     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  459         return
  460 
  461     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  462         return
  463 
  464     # Setup a molecule reader for filtered molecules...
  465     FilteredMols  = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"])
  466 
  467     # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 
  468     TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo)
  469 
  470     # Setup writers...
  471     ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs)
  472     
  473     for Mol in FilteredMols:
  474         # Retrieve torsion alerts info...
  475         TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo)
  476         if TorsionAlertsInfo is None:
  477             continue
  478         
  479         for TorsionRuleID in TorsionRuleIDs:
  480             if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]:
  481                 continue
  482             
  483             WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
  484         
  485     CloseByRuleOutfilesWriters(ByRuleOutfilesWriters)
  486 
  487 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo):
  488     """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules."""
  489     
  490     # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to
  491     # the least number of unique molecules...
  492     RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  493 
  494     # Select torsion rule IDs for writing out SD files...
  495     if not OptionsInfo["OutfilesFilteredByRulesAllMode"]:
  496         MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"]
  497         if MaxRuleIDs < len(RuleIDs):
  498             RuleIDs = RuleIDs[0:MaxRuleIDs]
  499     
  500     return RuleIDs
  501 
  502 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo):
  503     """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds."""
  504     
  505     TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]
  506     TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None
  507     
  508     if TorsionAlerts is None or len(TorsionAlerts) == 0:
  509         return None
  510 
  511     # Initialize for tracking by rule IDs...
  512     TorsionAlertsInfo = {}
  513     TorsionAlertsInfo["RuleIDs"] = []
  514     
  515     TorsionAlertsInfo["RuleSMARTS"] = {}
  516     TorsionAlertsInfo["HierarchyClassName"] = {}
  517     TorsionAlertsInfo["HierarchySubClassName"] = {}
  518     TorsionAlertsInfo["EnergyMethod"] = {}
  519     
  520     TorsionAlertsInfo["AtomIndices"] = {}
  521     TorsionAlertsInfo["TorsionAtomIndices"] = {}
  522     TorsionAlertsInfo["TorsionAngle"] = {}
  523     
  524     TorsionAlertsInfo["Energy"] = {}
  525     TorsionAlertsInfo["EnergyLowerBound"] = {}
  526     TorsionAlertsInfo["EnergyUpperBound"] = {}
  527 
  528     TorsionAlertsInfo["AngleNotObserved"] = {}
  529     TorsionAlertsInfo["MaxSingleEnergyAlertType"] = {}
  530     
  531     TorsionAlertsInfo["AnglesNotObservedCount"] = {}
  532     TorsionAlertsInfo["MaxSingleEnergyAlertsCount"] = {}
  533     
  534     ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"]
  535     TorsionAlertsSetSize = 12
  536     
  537     TorsionAlertsWords = TorsionAlerts.split()
  538     if len(TorsionAlertsWords) % TorsionAlertsSetSize:
  539         MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize))
  540 
  541     ID = 0
  542     for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize):
  543         ID += 1
  544         
  545         RotBondIndices, TorsionIndices, TorsionAngle, Energy, EnergyLowerBound, EnergyUpperBound, HierarchyClass, HierarchySubClass, TorsionRule, EnergyMethod, AngleNotObserved, MaxSingleEnergyAlertType = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize]
  546         RotBondIndices = RotBondIndices.split(ValuesDelimiter)
  547         TorsionIndices = TorsionIndices.split(ValuesDelimiter)
  548 
  549         if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]:
  550             MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule)
  551             continue
  552         TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule]
  553     
  554         # Track data for torsion alerts in a molecule...
  555         if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]:
  556             TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  557 
  558             TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule
  559             TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass
  560             TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass
  561             TorsionAlertsInfo["EnergyMethod"][TorsionRuleNodeID] = EnergyMethod
  562 
  563             TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = []
  564             TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = []
  565             TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID] = []
  566     
  567             TorsionAlertsInfo["Energy"][TorsionRuleNodeID] = []
  568             TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID] = []
  569             TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID] = []
  570             TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID] = []
  571             TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID] = []
  572             
  573             TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] = 0
  574             TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] = 0
  575             
  576         # Track multiple values for a rule ID...
  577         TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices)
  578         TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices)
  579         TorsionAlertsInfo["TorsionAngle"][TorsionRuleNodeID].append(TorsionAngle)
  580         
  581         TorsionAlertsInfo["Energy"][TorsionRuleNodeID].append(Energy)
  582         TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleNodeID].append(EnergyLowerBound)
  583         TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleNodeID].append(EnergyUpperBound)
  584         TorsionAlertsInfo["AngleNotObserved"][TorsionRuleNodeID].append(AngleNotObserved)
  585         
  586         TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleNodeID].append(MaxSingleEnergyAlertType)
  587         
  588         # Count angles not observer for a rule ID...
  589         if AngleNotObserved == 'Yes':
  590             TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleNodeID] += 1
  591 
  592         # Count max single energy alert for a rule ID...
  593         if MaxSingleEnergyAlertType == 'Yes':
  594             TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleNodeID] += 1
  595     
  596     return TorsionAlertsInfo
  597 
  598 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo):
  599     """Write out molecule."""
  600     
  601     if OptionsInfo["CountMode"]:
  602         return True
  603 
  604     SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo)
  605 
  606     try:
  607         Writer.write(Mol)
  608     except Exception as ErrMsg:
  609         MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg))
  610         return False
  611     
  612     return True
  613 
  614 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo):
  615     """Setup molecule properties containing alerts information for rotatable bonds."""
  616 
  617     if not OptionsInfo["OutfileAlerts"]:
  618         return
  619 
  620     SDFieldIDsToLabels = OptionsInfo["SDFieldIDsToLabels"]
  621     Precision = OptionsInfo["Precision"]
  622     
  623     # Setup rotatable bonds count...
  624     RotBondsCount = 0
  625     if RotBondsAlertsInfo is not None:
  626         RotBondsCount =  len(RotBondsAlertsInfo["IDs"])
  627     Mol.SetProp(SDFieldIDsToLabels["RotBondsCountLabel"],  "%s" % RotBondsCount)
  628 
  629     if RotBondsAlertsInfo is not None:
  630         # Setup total energy along with lower and upper bounds...
  631         Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergy"], Precision))
  632         Mol.SetProp(SDFieldIDsToLabels["TotalEnergyLowerBoundCILabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyLowerBound"], Precision))
  633         Mol.SetProp(SDFieldIDsToLabels["TotalEnergyUpperBoundCILabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["TotalEnergyUpperBound"], Precision))
  634         
  635         # Setup max single energy and alert count...
  636         if OptionsInfo["MaxSingleEnergyMode"] or OptionsInfo["TotalOrMaxSingleEnergyMode"]:
  637             Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyLabel"],  "%s" % SetupEnergyValueForSDField(RotBondsAlertsInfo["MaxSingleEnergy"], Precision))
  638             Mol.SetProp(SDFieldIDsToLabels["MaxSingleEnergyAlertsCountLabel"],  "%s" % ("NA" if RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"] is None else RotBondsAlertsInfo["MaxSingleEnergyAlertsCount"]))
  639             
  640         Mol.SetProp(SDFieldIDsToLabels["AnglesNotObservedCountLabel"],  "%s" % ("NA" if RotBondsAlertsInfo["AnglesNotObservedCount"] is None else RotBondsAlertsInfo["AnglesNotObservedCount"]))
  641     
  642     
  643     # Setup alert information for rotatable bonds...
  644     AlertsInfoValues = []
  645 
  646     # Delimiter for multiple values corresponding to specific set of information for
  647     # a rotatable bond. For example: TorsionAtomIndices
  648     ValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  649 
  650     # Delimiter for various values for a rotatable bond...
  651     RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"]
  652     
  653     # Delimiter for values corresponding to multiple rotatable bonds...
  654     AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"]
  655     
  656     if RotBondsAlertsInfo is not None:
  657         for ID in RotBondsAlertsInfo["IDs"]:
  658             if not RotBondsAlertsInfo["MatchStatus"][ID]:
  659                 continue
  660 
  661             if SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
  662                 continue
  663                 
  664             RotBondValues = []
  665             
  666             # Bond atom indices...
  667             Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]]
  668             RotBondValues.append(ValuesDelim.join(Values))
  669     
  670             # Torsion atom indices...
  671             TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim)
  672             RotBondValues.append(TorsionAtomIndices)
  673 
  674             # Torsion angle...
  675             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngle"][ID])
  676 
  677             # Energy along with its lower and upper bound confidence interval...
  678             RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["Energy"][ID], Precision))
  679             RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyLowerBound"][ID], Precision))
  680             RotBondValues.append(SetupEnergyValueForSDField(RotBondsAlertsInfo["EnergyUpperBound"][ID], Precision))
  681             
  682             # Hierarchy class and subclass names...
  683             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassName"][ID])
  684             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassName"][ID])
  685             
  686             # Torsion rule SMARTS...
  687             RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID])
  688             
  689             # Energy method...
  690             RotBondValues.append("%s" % RotBondsAlertsInfo["EnergyMethod"][ID])
  691 
  692             # Angle not observed...
  693             RotBondValues.append("%s" % SetupAngleNotObservedValue(RotBondsAlertsInfo["AngleNotObserved"][ID]))
  694             
  695             # Max single energy alert status...
  696             RotBondValues.append("%s" % SetupMaxSingleEnergyAlertStatusValue(RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]))
  697                 
  698             # Track joined values for a rotatable bond...
  699             AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues))
  700     
  701     if len(AlertsInfoValues):
  702         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues)))
  703 
  704 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
  705     """Write out molecule."""
  706     
  707     if OptionsInfo["CountMode"]:
  708         return
  709 
  710     SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
  711         
  712     Writer.write(Mol)
  713 
  714 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
  715     """Setup molecule properties containing alerts information for torsion alerts
  716     fileted by Rule IDs."""
  717 
  718     # Delete torsion alerts information for rotatable bonds...
  719     if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]):
  720         Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"])
  721 
  722     # Delimiter for values...
  723     IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  724     InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"]
  725 
  726     # Setup alert rule information...
  727     AlertRuleInfoValues = []
  728     
  729     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID])
  730     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID])
  731     
  732     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID])
  733     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["EnergyMethod"][TorsionRuleID])
  734     
  735     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues)))
  736 
  737     # Setup max single energy alert count for torsion rule...
  738     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxSingleEnergyAlertsCountLabel"],  "%s" % TorsionAlertsInfo["MaxSingleEnergyAlertsCount"][TorsionRuleID])
  739     
  740     # Setup angle not observed count for torsion rule...
  741     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAnglesNotObservedCountLabel"],  "%s" % TorsionAlertsInfo["AnglesNotObservedCount"][TorsionRuleID])
  742     
  743     # Setup torsion rule alerts...
  744     # "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod AngleNotObserved MaxSingleEnergyAlert)
  745     AlertsInfoValues = []
  746     for Index in range(0, len(TorsionAlertsInfo["AtomIndices"][TorsionRuleID])):
  747         RotBondInfoValues = []
  748         
  749         # Bond atom indices...
  750         Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]]
  751         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  752         
  753         # Torsion atom indices retrieved from the filtered SD file and stored as strings...
  754         Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]]
  755         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  756         
  757         # Torsion angle...
  758         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngle"][TorsionRuleID][Index])
  759 
  760         # Energy and its bounds...
  761         RotBondInfoValues.append(TorsionAlertsInfo["Energy"][TorsionRuleID][Index])
  762         RotBondInfoValues.append(TorsionAlertsInfo["EnergyLowerBound"][TorsionRuleID][Index])
  763         RotBondInfoValues.append(TorsionAlertsInfo["EnergyUpperBound"][TorsionRuleID][Index])
  764         
  765         # Angle not observed......
  766         RotBondInfoValues.append(TorsionAlertsInfo["AngleNotObserved"][TorsionRuleID][Index])
  767         
  768         # Max single energy alert type...
  769         RotBondInfoValues.append(TorsionAlertsInfo["MaxSingleEnergyAlertType"][TorsionRuleID][Index])
  770         
  771         # Track alerts informaiton...
  772         AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues))
  773         
  774     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"],  "%s" % (InterSetValuesDelim.join(AlertsInfoValues)))
  775     
  776 def SkipRotatableBondAlertInfo(ID, RotBondsAlertsInfo):
  777     """Skip rotatble bond alert info for a specific bond during writing to output files."""
  778 
  779     if not OptionsInfo["OutfileAlertsOnly"]:
  780         return False
  781     
  782     if RotBondsAlertsInfo["RotBondsAlertsStatus"] is None:
  783         return True
  784 
  785     Status = False
  786     if OptionsInfo["TotalEnergyMode"]:
  787         if not RotBondsAlertsInfo["RotBondsAlertsStatus"]:
  788             Status = True
  789     elif OptionsInfo["MaxSingleEnergyMode"]:
  790         if RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID] is None or not RotBondsAlertsInfo["MaxSingleEnergyAlertStatus"][ID]:
  791             Status = True
  792     elif OptionsInfo["TotalOrMaxSingleEnergyMode"]:
  793         if not RotBondsAlertsInfo["RotBondsAlertsStatus"]:
  794             Status = True
  795     
  796     return Status
  797 
  798 def SetupEnergyValueForSDField(Value, Precision):
  799     """Setup energy value for SD field."""
  800     
  801     if Value is None or math.isnan(Value) or math.isinf(Value):
  802         return "NA"
  803 
  804     return "%.*f" % (Precision, Value)
  805     
  806 def SetupAngleNotObservedValue(Value):
  807     """Setup angle not observed value."""
  808     
  809     if Value is None:
  810         return "NA"
  811 
  812     return "Yes" if Value else "No"
  813 
  814 def SetupMaxSingleEnergyAlertStatusValue(Value):
  815     """Setup max single energy alert status value."""
  816     
  817     if Value is None:
  818         return "NA"
  819 
  820     return "Yes" if Value else "No"
  821 
  822 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim):
  823     """Setup torsion atom indices value for output files."""
  824 
  825     Values = ["%s" % Value for Value in TorsionAtomIndicesList]
  826     
  827     return ValuesDelim.join(Values)
  828     
  829 def SetupOutfilesWriters():
  830     """Setup molecule and summary writers."""
  831 
  832     OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None}
  833 
  834     # Writers for SD files...
  835     WriterRemaining, WriterFiltered = SetupMoleculeWriters()
  836     OutfilesWriters["WriterRemaining"] = WriterRemaining
  837     OutfilesWriters["WriterFiltered"] = WriterFiltered
  838     
  839     # Writer for alert summary CSV file...
  840     WriterAlertSummary = SetupAlertSummaryWriter()
  841     OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary
  842 
  843     return OutfilesWriters
  844 
  845 def SetupMoleculeWriters():
  846     """Setup molecule writers."""
  847     
  848     Writer = None
  849     WriterFiltered = None
  850 
  851     if OptionsInfo["CountMode"]:
  852         return (Writer, WriterFiltered)
  853 
  854     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  855     if Writer is None:
  856         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  857     MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"])
  858     
  859     if OptionsInfo["OutfileFilteredMode"]:
  860         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
  861         if WriterFiltered is None:
  862             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
  863         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
  864     
  865     return (Writer, WriterFiltered)
  866 
  867 def SetupAlertSummaryWriter():
  868     """Setup a alert summary writer."""
  869     
  870     Writer = None
  871     
  872     if OptionsInfo["CountMode"]:
  873         return Writer
  874         
  875     if not OptionsInfo["OutfileSummaryMode"]:
  876         return Writer
  877     
  878     Outfile = OptionsInfo["OutfileSummary"]
  879     Writer = open(Outfile, "w")
  880     if Writer is None:
  881         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  882     
  883     MiscUtil.PrintInfo("Generating file %s..." % Outfile)
  884     
  885     return Writer
  886     
  887 def CloseOutfilesWriters(OutfilesWriters):
  888     """Close outfile writers."""
  889 
  890     for WriterType, Writer in OutfilesWriters.items():
  891         if Writer is not None:
  892             Writer.close()
  893 
  894 def SetupByRuleOutfilesWriters(RuleIDs):
  895     """Setup by rule outfiles writers."""
  896 
  897     # Initialize...
  898     OutfilesWriters = {}
  899     for RuleID in RuleIDs:
  900         OutfilesWriters[RuleID] = None
  901     
  902     if OptionsInfo["CountMode"]:
  903         return OutfilesWriters
  904         
  905     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  906         return OutfilesWriters
  907     
  908     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
  909     OutfilesRoot = "%s_Filtered_TopRule" % FileName
  910     OutfilesExt = "sdf"
  911 
  912     MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"]
  913     MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt))
  914     
  915     # Delete any existing output files...
  916     Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt))
  917     if len(Outfiles):
  918         MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt))
  919         for Outfile in Outfiles:
  920             try:
  921                 os.remove(Outfile)
  922             except Exception as ErrMsg:
  923                 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg)
  924     
  925     RuleIndex = 0
  926     for RuleID in RuleIDs:
  927         RuleIndex += 1
  928         Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt)
  929         Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
  930         if Writer is None:
  931             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  932             
  933         OutfilesWriters[RuleID] = Writer
  934 
  935     return OutfilesWriters
  936 
  937 def CloseByRuleOutfilesWriters(OutfilesWriters):
  938     """Close by rule outfile writers."""
  939 
  940     for RuleID, Writer in OutfilesWriters.items():
  941         if Writer is not None:
  942             Writer.close()
  943 
  944 def InstantiateTorsionStrainEnergyAlertsClass(Quiet = False):
  945     """Initialize torsion strain energy alerts class."""
  946 
  947     try:
  948         TorsionStrainEnergyAlertsHandle = TorsionStrainEnergyAlerts(AlertsMode = OptionsInfo["AlertsMode"], TotalEnergyCutoff = OptionsInfo["TotalEnergyCutoff"], MaxSingleEnergyCutoff = OptionsInfo["MaxSingleEnergyCutoff"], RotBondsSMARTSMode = OptionsInfo["RotBondsSMARTSMode"], RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPattern"], TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"], AlertTorsionsNotObserved = OptionsInfo["FilterTorsionsNotObserved"])
  949     except Exception as ErrMsg:
  950         MiscUtil.PrintError("Failed to instantiate TorsionStrainEnergyAlerts:\n%s\n" % (ErrMsg))
  951 
  952     if not Quiet:
  953         MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionStrainEnergyAlertsHandle.GetTorsionLibraryFilePath())
  954         TorsionStrainEnergyAlertsHandle.ListTorsionLibraryInfo()
  955 
  956     return TorsionStrainEnergyAlertsHandle
  957 
  958 def ProcessRotatableBondsSMARTSMode():
  959     """"Process SMARTS pattern for rotatable bonds."""
  960 
  961     RotBondsMode = OptionsInfo["RotBondsSMARTSMode"]
  962     
  963     RotBondsSMARTSPattern = None
  964     RotBondsSMARTSPatternSpecified = OptionsInfo["RotBondsSMARTSPatternSpecified"]
  965     
  966     if re.match("^(NonStrict|SemiStrict|Strict)$", RotBondsMode, re.I):
  967         RotBondsSMARTSPattern = None
  968     elif re.match("Specify", RotBondsMode, re.I):
  969         RotBondsSMARTSPatternSpecified = RotBondsSMARTSPatternSpecified.strip()
  970         if not len(RotBondsSMARTSPatternSpecified):
  971             MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in  \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode)
  972         
  973         RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPatternSpecified)
  974         if RotBondsPatternMol is None:
  975             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPatternSpecified))
  976     else:
  977         MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode)
  978     
  979     OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern
  980 
  981 def ProcessSDFieldLabelsOption():
  982     """Process SD data field label option."""
  983 
  984     ParamsOptionName = "--outfileSDFieldLabels"
  985     ParamsOptionValue = Options["--outfileSDFieldLabels"]
  986 
  987     ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TotalEnergyLabel": "TotalEnergy", "TotalEnergyLowerBoundCILabel": "TotalEnergyLowerBoundCI", "TotalEnergyUpperBoundCILabel": "TotalEnergyUpperBoundCI", "MaxSingleEnergyLabel": "MaxSingleEnergy", "MaxSingleEnergyAlertsCountLabel": "MaxSingleEnergyAlertsCount", "AnglesNotObservedCountLabel": "AnglesNotObservedCount", "TorsionAlertsLabel": "TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass HierarchySubClass TorsionRule EnergyMethod AngleNotObserved MaxSingleEnergyAlert)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionRule EnergyMethod)", "TorsionRuleMaxSingleEnergyAlertsCountLabel": "TorsionRuleMaxSingleEnergyAlertsCount", "TorsionRuleAnglesNotObservedCountLabel": "TorsionRuleAnglesNotObservedCount", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved MaxSingleEnergyAlert)"}
  988 
  989     if re.match("^auto$", ParamsOptionValue, re.I):
  990         OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
  991         return
  992     
  993     # Setup a canonical paramater names...
  994     ValidParamNames = []
  995     CanonicalParamNamesMap = {}
  996     for ParamName in sorted(ParamsIDsToLabels):
  997         ValidParamNames.append(ParamName)
  998         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  999     
 1000     ParamsOptionValue = ParamsOptionValue.strip()
 1001     if not ParamsOptionValue:
 1002         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
 1003     
 1004     ParamsOptionValueWords = ParamsOptionValue.split(",")
 1005     if len(ParamsOptionValueWords) % 2:
 1006         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
 1007     
 1008     # Validate paramater name and value pairs...
 1009     for Index in range(0, len(ParamsOptionValueWords), 2):
 1010         Name = ParamsOptionValueWords[Index].strip()
 1011         Value = ParamsOptionValueWords[Index + 1].strip()
 1012 
 1013         CanonicalName = Name.lower()
 1014         if  not CanonicalName in CanonicalParamNamesMap:
 1015             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
 1016 
 1017         ParamName = CanonicalParamNamesMap[CanonicalName]
 1018         ParamValue = Value
 1019         
 1020         # Set value...
 1021         ParamsIDsToLabels[ParamName] = ParamValue
 1022     
 1023     OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
 1024 
 1025 def ProcessOptions():
 1026     """Process and validate command line arguments and options."""
 1027     
 1028     MiscUtil.PrintInfo("Processing options...")
 1029 
 1030     # Validate options...
 1031     ValidateOptions()
 1032     
 1033     TotalEnergyMode, MaxSingleEnergyMode, TotalOrMaxSingleEnergyMode = [False] * 3
 1034     AlertsMode = Options["--alertsMode"]
 1035     if re.match("^TotalEnergy$", AlertsMode, re.I):
 1036         TotalEnergyMode = True
 1037     elif re.match("^MaxSingleEnergy$", AlertsMode, re.I):
 1038         MaxSingleEnergyMode = True
 1039     elif re.match("^TotalOrMaxSingleEnergy$", AlertsMode, re.I):
 1040         TotalOrMaxSingleEnergyMode = True
 1041     OptionsInfo["AlertsMode"] = AlertsMode
 1042     OptionsInfo["TotalEnergyMode"] = TotalEnergyMode
 1043     OptionsInfo["MaxSingleEnergyMode"] = MaxSingleEnergyMode
 1044     OptionsInfo["TotalOrMaxSingleEnergyMode"] = TotalOrMaxSingleEnergyMode
 1045     
 1046     OptionsInfo["FilterTorsionsNotObserved"] = True if re.match("^yes$", Options["--filterTorsionsNotObserved"], re.I) else False
 1047     
 1048     OptionsInfo["MaxSingleEnergyCutoff"] = float(Options["--alertsMaxSingleEnergyCutoff"])
 1049     OptionsInfo["TotalEnergyCutoff"] = float(Options["--alertsTotalEnergyCutoff"])
 1050     
 1051     OptionsInfo["Infile"] = Options["--infile"]
 1052     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1053     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 1054     
 1055     OptionsInfo["Outfile"] = Options["--outfile"]
 1056     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 1057     
 1058     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1059     OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 1060     OptionsInfo["OutfileFiltered"] = OutfileFiltered
 1061     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 1062 
 1063     OptionsInfo["OutfileSummary"] = "%s_AlertsSummary.csv" % (FileName)
 1064 
 1065     OutfileSummaryMode = Options["--outfileSummary"]
 1066     if re.match("^auto$", OutfileSummaryMode, re.I):
 1067         OutfileSummaryMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no'
 1068     OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", OutfileSummaryMode, re.I) else False
 1069     
 1070     if re.match("^yes$", Options["--outfileSummary"], re.I):
 1071         if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I):
 1072             MiscUtil.PrintError("The value \"%s\" specified for \"--outfileSummary\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"]))
 1073     
 1074     OutfilesFilteredByRulesMode = Options["--outfilesFilteredByRules"]
 1075     if re.match("^auto$", OutfilesFilteredByRulesMode, re.I):
 1076         OutfilesFilteredByRulesMode = 'yes' if re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I) else 'no'
 1077     OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", OutfilesFilteredByRulesMode, re.I) else False
 1078     
 1079     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1080         if not re.match("^MaxSingleEnergy$", Options["--alertsMode"], re.I):
 1081             MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"MaxSingleEnergy\" value of \"-a, --alertsMode\" option." % (Options["--outfileSummary"]))
 1082     
 1083     OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False
 1084     
 1085     OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"]
 1086     if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I):
 1087         OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount)
 1088     OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount
 1089     OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False
 1090     
 1091     OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False
 1092     
 1093     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1094         if not re.match("^yes$", Options["--outfileAlerts"], re.I):
 1095             MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"]))
 1096     
 1097     OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"]
 1098     OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False
 1099     
 1100     ProcessSDFieldLabelsOption()
 1101     
 1102     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1103     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 1104 
 1105     OptionsInfo["Precision"] = int(Options["--precision"])
 1106     
 1107     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1108     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1109     
 1110     OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"]
 1111     OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"]
 1112     ProcessRotatableBondsSMARTSMode()
 1113     
 1114     OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"]
 1115     
 1116     # Setup delimiter for writing out torsion alert information to output files...
 1117     OptionsInfo["IntraSetValuesDelim"] = ","
 1118     OptionsInfo["InterSetValuesDelim"] = " "
 1119     
 1120 def RetrieveOptions():
 1121     """Retrieve command line arguments and options."""
 1122     
 1123     # Get options...
 1124     global Options
 1125     Options = docopt(_docoptUsage_)
 1126     
 1127     # Set current working directory to the specified directory...
 1128     WorkingDir = Options["--workingdir"]
 1129     if WorkingDir:
 1130         os.chdir(WorkingDir)
 1131     
 1132     # Handle examples option...
 1133     if "--examples" in Options and Options["--examples"]:
 1134         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1135         sys.exit(0)
 1136     
 1137 def ProcessListTorsionLibraryOption():
 1138     """Process list torsion library information."""
 1139 
 1140     # Validate and process dataFile option for listing torsion library information...
 1141     OptionsInfo["TorsionEnergyLibraryFile"] = Options["--torsionEnergyLibraryFile"]
 1142     if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I):
 1143         MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"])
 1144 
 1145     # Instantiate TorsionStrainEnergyAlerts using defaults...
 1146     TorsionStrainEnergyAlertsHandle = TorsionStrainEnergyAlerts(TorsionLibraryFilePath = OptionsInfo["TorsionEnergyLibraryFile"])
 1147     MiscUtil.PrintInfo("\nRetrieving data from torsion strain energy library file %s..." % TorsionStrainEnergyAlertsHandle.GetTorsionLibraryFilePath())
 1148     TorsionStrainEnergyAlertsHandle.ListTorsionLibraryInfo()
 1149 
 1150 def ValidateOptions():
 1151     """Validate option values."""
 1152 
 1153     MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "TotalEnergy MaxSingleEnergy TotalOrMaxSingleEnergy")
 1154     
 1155     MiscUtil.ValidateOptionFloatValue("--alertsMaxSingleEnergyCutoff", Options["--alertsMaxSingleEnergyCutoff"], {">": 0.0})
 1156     MiscUtil.ValidateOptionFloatValue("--alertsTotalEnergyCutoff", Options["--alertsTotalEnergyCutoff"], {">": 0.0})
 1157     
 1158     MiscUtil.ValidateOptionTextValue("--filterTorsionsNotObserved", Options["--filterTorsionsNotObserved"], "yes no")
 1159     
 1160     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1161     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 1162     
 1163     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1164     if re.match("^filter$", Options["--mode"], re.I):
 1165         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1166         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1167 
 1168     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 1169     
 1170     MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no auto")
 1171     if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I):
 1172         MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0})
 1173     
 1174     MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no auto")
 1175     MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no")
 1176     MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly")
 1177     
 1178     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count")
 1179     if re.match("^filter$", Options["--mode"], re.I):
 1180         if not Options["--outfile"]:
 1181             MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option")
 1182         
 1183     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1184     
 1185     MiscUtil.ValidateOptionIntegerValue("-p, --precision", Options["--precision"], {">": 0})
 1186     
 1187     MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify")
 1188     if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I):
 1189         if not Options["--rotBondsSMARTSPattern"]:
 1190             MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option")
 1191     else:
 1192         if Options["--rotBondsSMARTSPattern"]:
 1193             MiscUtil.PrintError("The SMARTS pattern must not be specified using \"--rotBondsSMARTSPattern\" during \"%s\" value of \"-r, --rotBondsSMARTS\" option" % (Options["--rotBondsSMARTSMode"]))
 1194     
 1195     if not re.match("^auto$", Options["--torsionEnergyLibraryFile"], re.I):
 1196         MiscUtil.ValidateOptionFilePath("-t, --torsionEnergyLibraryFile", Options["--torsionEnergyLibraryFile"])
 1197 
 1198 # Setup a usage string for docopt...
 1199 _docoptUsage_ = """
 1200 RDKitFilterTorsionStrainEnergyAlerts.py - Filter torsion strain energy library alerts
 1201 
 1202 Usage:
 1203     RDKitFilterTorsionStrainEnergyAlerts.py [--alertsMode <TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy>]
 1204                                             [--alertsMaxSingleEnergyCutoff <Number>] [--alertsTotalEnergyCutoff <Number>]
 1205                                             [--filterTorsionsNotObserved <yes or no>] [--infileParams <Name,Value,...>] [--mode <filter or count>]
 1206                                             [--mp <yes or no>] [--mpParams <Name,Value,...>] [--outfileAlerts <yes or no>]
 1207                                             [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>]
 1208                                             [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>]
 1209                                             [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>] [--outfileParams <Name,Value,...>]
 1210                                             [--overwrite] [--precision <number>] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>]
 1211                                             [--rotBondsSMARTSPattern <SMARTS>] [--torsionEnergyLibraryFile <FileName or auto>]
 1212                                             [-w <dir>] -i <infile> -o <outfile>
 1213     RDKitFilterTorsionStrainEnergyAlerts.py [--torsionEnergyLibraryFile <FileName or auto>] -l | --list
 1214     RDKitFilterTorsionStrainEnergyAlerts.py -h | --help | -e | --examples
 1215 
 1216 Description:
 1217     Filter strained molecules from an input file for torsion strain energy library
 1218     [ Ref 153 ] alerts by matching rotatable bonds against SMARTS patterns specified
 1219     for torsion rules in a torsion energy library file and write out appropriate
 1220     molecules to output files. The molecules must have 3D coordinates in input file.
 1221     The default torsion strain energy library file, TorsionStrainEnergyLibrary.xml,
 1222     is available under MAYACHEMTOOLS/lib/python/TorsionAlerts directory.
 1223     
 1224     The data in torsion strain energy library file is organized in a hierarchical
 1225     manner. It consists of one generic class and six specific classes at the highest
 1226     level. Each class contains multiple subclasses corresponding to named functional
 1227     groups or substructure patterns. The subclasses consist of torsion rules sorted
 1228     from specific to generic torsion patterns. The torsion rule, in turn, contains a
 1229     list of peak values for torsion angles and two tolerance values. A pair of tolerance
 1230     values define torsion bins around a torsion peak value.
 1231     
 1232     A strain energy calculation method, 'exact' or 'approximate' [ Ref 153 ], is 
 1233     associated with each torsion rule for calculating torsion strain energy. The 'exact'
 1234     stain energy calculation relies on the energy bins available under the energy histogram
 1235     consisting of 36 bins covering angles from -180 to 180. The width of each bin is 10
 1236     degree. The energy bins are are defined at the right end points. The first and the
 1237     last energy bins correspond to -170 and 180 respectively. The torsion angle is mapped
 1238     to a energy bin. An angle offset is calculated for the torsion angle from the the right
 1239     end point angle of the bin. The strain energy is estimated for the angle offset based
 1240     on the energy difference between the current and previous bins. The torsion strain
 1241     energy, in terms of torsion energy units (TEUs), corresponds to the sum of bin strain
 1242     energy and the angle offset strain energy.
 1243         
 1244         Energy = BinEnergyDiff/10.0 * BinAngleOffset + BinEnergy[BinNum]
 1245         
 1246         Where:
 1247         
 1248         BinEnergyDiff = BinEnergy[BinNum] - BinEnergy[PreviousBinNum]
 1249         BinAngleOffset = TorsionAngle - BinAngleRightSide
 1250         
 1251     The 'approximate' strain energy calculation relies on the angle difference between a
 1252     torsion angle and the torsion peaks observed for the torsion rules in the torsion
 1253     energy library. The torsion angle is matched to a torsion peak based on the value of
 1254     torsion angle difference. It must be less than or equal to the value for the second
 1255     tolerance 'tolerance2'. Otherwise, the torsion angle is not observed in the torsion
 1256     energy library and a value of 'NA' is assigned for torsion energy along with the lower
 1257     and upper bounds on energy at 95% confidence interval. The 'approximate' torsion
 1258     energy (TEUs) for observed torsion angle is calculated using the following formula:
 1259         
 1260         Energy = beta_1 * (AngleDiff ** 2) + beta_2 * (AngleDiff ** 4)
 1261         
 1262     The coefficients 'beta_1' and 'beta_2' are available for the observed angles in 
 1263     the torsion strain energy library. The 'AngleDiff' is the difference between the
 1264     torsion angle and the matched torsion peak.
 1265     
 1266     For example:
 1267          
 1268         <library>
 1269             <hierarchyClass id1="G" id2="G" name="GG">
 1270             ...
 1271             </hierarchyClass>
 1272             <hierarchyClass id1="C" id2="O" name="CO">
 1273                 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]">
 1274                     <torsionRule method="exact" smarts=
 1275                         "[O:1]=[C:2]!@[O:3]~[CH0:4]">
 1276                         <angleList>
 1277                             <angle score="56.52" tolerance1="20.00"
 1278                             tolerance2="25.00" value="0.0"/>
 1279                         </angleList>
 1280                         <histogram>
 1281                             <bin count="1"/>
 1282                             ...
 1283                         </histogram>
 1284                         <histogram_shifted>
 1285                             <bin count="0"/>
 1286                             ...
 1287                         </histogram_shifted>
 1288                         <histogram_converted>
 1289                             <bin energy="4.67... lower="2.14..." upper="Inf"/>
 1290                             ...
 1291                             <bin energy="1.86..." lower="1.58..." upper="2.40..."/>
 1292                             ...
 1293                            </histogram_converted>
 1294                     </torsionRule>
 1295                     <torsionRule method="approximate" smarts=
 1296                         "[cH0:1][c:2]([cH0])!@[O:3][p:4]">
 1297                         <angleList>
 1298                         <angle beta_1="0.002..." beta_2="-7.843...e-07"
 1299                             score="27.14" theta_0="-90.0" tolerance1="30.00"
 1300                             tolerance2="45.00" value="-90.0"/>
 1301                         ...
 1302                         </angleList>
 1303                         <histogram>
 1304                             <bin count="0"/>
 1305                              ...
 1306                         </histogram>
 1307                         <histogram_shifted>
 1308                             <bin count="0"/>
 1309                             ...
 1310                         </histogram_shifted>
 1311                     </torsionRule>
 1312                 ...
 1313              ...
 1314             </hierarchyClass>
 1315              <hierarchyClass id1="N" id2="C" name="NC">
 1316              ...
 1317             </hierarchyClass>
 1318             <hierarchyClass id1="S" id2="N" name="SN">
 1319             ...
 1320             </hierarchyClass>
 1321             <hierarchyClass id1="C" id2="S" name="CS">
 1322             ...
 1323             </hierarchyClass>
 1324             <hierarchyClass id1="C" id2="C" name="CC">
 1325             ...
 1326             </hierarchyClass>
 1327             <hierarchyClass id1="S" id2="S" name="SS">
 1328              ...
 1329             </hierarchyClass>
 1330         </library>
 1331         
 1332     The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern.
 1333     A custom SMARTS pattern may be optionally specified to detect rotatable bonds.
 1334     Each rotatable bond is matched to a torsion rule in the torsion strain energy library.
 1335     The strain energy is calculated for each rotatable bond using the calculation
 1336     method, 'exact' or 'approximate', associated with the matched torsion rule.
 1337     
 1338     The total strain energy (TEUs) of a molecule corresponds to the sum of  'exact' and
 1339     'approximate' strain energies calculated for all matched rotatable bonds in the
 1340     molecule. The total strain energy is set to 'NA' for molecules containing a 'approximate'
 1341     energy estimate for a torsion angle not observed in the torsion energy library. In
 1342     addition, the lower and upper bounds on energy at 95% confidence interval are
 1343     set to 'NA'.
 1344     
 1345     The following output files are generated after the filtering:
 1346         
 1347         <OutfileRoot>.sdf
 1348         <OutfileRoot>_Filtered.sdf
 1349         <OutfileRoot>_AlertsSummary.csv
 1350         <OutfileRoot>_Filtered_TopRule*.sdf
 1351         
 1352     The last two set of outfile files, <OutfileRoot>_AlertsSummary.csv and
 1353     <OutfileRoot>_<OutfileRoot>_AlertsSummary.csv, are only generated during filtering
 1354     by 'MaxSingleEnergy'.
 1355     
 1356     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 1357     
 1358     The supported output file formats are: SD (.sdf, .sd)
 1359 
 1360 Options:
 1361     -a, --alertsMode <TotalEnergy,...>  [default: TotalEnergy]
 1362         Torsion strain energy library alert types to use for filtering molecules
 1363         containing rotatable bonds based on the calculated values for the total
 1364         torsion strain energy of a molecule and  the maximum single strain
 1365         energy of a rotatable bond in a molecule.
 1366         
 1367         Possible values: TotalEnergy, MaxSingleEnergy, or TotalOrMaxSingleEnergy
 1368         
 1369         The strain energy cutoff values in terms of torsion energy units (TEUs) are
 1370         used to filter molecules as shown below:
 1371             
 1372             AlertsMode                AlertsEnergyCutoffs (TEUs)
 1373             
 1374             TotalEnergy               >= TotalEnergyCutoff
 1375             
 1376             MaxSingleEnergy           >= MaxSingleEnergyCutoff
 1377             
 1378             TotalOrMaxSingleEnergy    >= TotalEnergyCutoff
 1379                                       or >= MaxSingleEnergyCutoff
 1380         
 1381     --alertsMaxSingleEnergyCutoff <Number>  [default: 1.8]
 1382         Maximum single strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules
 1383         based on the maximum value of a single strain energy of a rotatable bond
 1384         in  a molecule. This option is used during 'MaxSingleEnergy' or
 1385         'TotalOrMaxSingleEnergy' values of '-a, --alertsMode' option.
 1386         
 1387         The maximum single strain energy must be greater than or equal to the
 1388         specified cutoff value for filtering molecules.
 1389     --alertsTotalEnergyCutoff <Number>  [default: 6.0]
 1390         Total strain strain energy (TEUs) cutoff [ Ref 153 ] for filtering molecules
 1391         based on total strain energy for all rotatable bonds in a molecule. This
 1392         option is used during 'TotalEnergy' or 'TotalOrMaxSingleEnergy'
 1393         values of '-a, --alertsMode' option.
 1394         
 1395         The total strain energy must be greater than or equal to the specified
 1396         cutoff value for filtering molecules.
 1397     --filterTorsionsNotObserved <yes or no>  [default: no]
 1398         Filter molecules containing torsion angles not observed in torsion strain
 1399         energy library. It's not possible to calculate torsion strain energies for
 1400         these torsions during 'approximate' match to a specified torsion in the
 1401         library.
 1402         
 1403         The 'approximate' strain energy calculation relies on the angle difference
 1404         between a torsion angle and the torsion peaks observed for the torsion
 1405         rules in the torsion energy library. The torsion angle is matched to a
 1406         torsion peak based on the value of torsion angle difference. It must be
 1407         less than or equal to the value for the second tolerance 'tolerance2'.
 1408         Otherwise, the torsion angle is not observed in the torsion energy library
 1409         and a value of 'NA' is assigned for torsion energy along with the lower and
 1410         upper bounds on energy at 95% confidence interval.
 1411     -e, --examples
 1412         Print examples.
 1413     -h, --help
 1414         Print this help message.
 1415     -i, --infile <infile>
 1416         Input file name.
 1417     --infileParams <Name,Value,...>  [default: auto]
 1418         A comma delimited list of parameter name and value pairs for reading
 1419         molecules from files. The supported parameter names for different file
 1420         formats, along with their default values, are shown below:
 1421             
 1422             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1423             
 1424     -l, --list
 1425         List torsion library information without performing any filtering.
 1426     -m, --mode <filter or count>  [default: filter]
 1427         Specify whether to filter molecules for torsion strain energy library [ Ref 153 ]
 1428         alerts by matching rotatable bonds against SMARTS patterns specified for
 1429         torsion rules to calculate torsion strain energies and write out the rest
 1430         of the molecules to an outfile or simply count the number of matched
 1431         molecules marked for filtering.
 1432     --mp <yes or no>  [default: no]
 1433         Use multiprocessing.
 1434          
 1435         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1436         function employing lazy RDKit data iterable. This allows processing of
 1437         arbitrary large data sets without any additional requirements memory.
 1438         
 1439         All input data may be optionally loaded into memory by mp.Pool.map()
 1440         before starting worker processes in a process pool by setting the value
 1441         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1442         
 1443         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1444         data mode may adversely impact the performance. The '--mpParams' section
 1445         provides additional information to tune the value of 'chunkSize'.
 1446     --mpParams <Name,Value,...>  [default: auto]
 1447         A comma delimited list of parameter name and value pairs to configure
 1448         multiprocessing.
 1449         
 1450         The supported parameter names along with their default and possible
 1451         values are shown below:
 1452         
 1453             chunkSize, auto
 1454             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1455             numProcesses, auto   [ Default: mp.cpu_count() ]
 1456         
 1457         These parameters are used by the following functions to configure and
 1458         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1459         mp.Pool.imap().
 1460         
 1461         The chunkSize determines chunks of input data passed to each worker
 1462         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1463         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1464         
 1465         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1466         automatically converts RDKit data iterable into a list, loads all data into
 1467         memory, and calculates the default chunkSize using the following method
 1468         as shown in its code:
 1469         
 1470             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1471             if extra: chunkSize += 1
 1472         
 1473         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1474         and 100 data items.
 1475         
 1476         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1477         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1478         data into memory. Consequently, the size of input data is not known a priori.
 1479         It's not possible to estimate an optimal value for the chunkSize. The default 
 1480         chunkSize is set to 1.
 1481         
 1482         The default value for the chunkSize during 'Lazy' data mode may adversely
 1483         impact the performance due to the overhead associated with exchanging
 1484         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1485         a larger value during 'Lazy' input data mode, based on the size of your input
 1486         data and number of processes in the process pool.
 1487         
 1488         The mp.Pool.map() function waits for all worker processes to process all
 1489         the data and return the results. The mp.Pool.imap() function, however,
 1490         returns the the results obtained from worker processes as soon as the
 1491         results become available for specified chunks of data.
 1492         
 1493         The order of data in the results returned by both mp.Pool.map() and 
 1494         mp.Pool.imap() functions always corresponds to the input data.
 1495     -o, --outfile <outfile>
 1496         Output file name.
 1497     --outfileAlerts <yes or no>  [default: yes]
 1498         Write out alerts information to SD output files.
 1499     --outfileAlertsMode <All or AlertsOnly>  [default: AlertsOnly]
 1500         Write alerts information to SD output files for all alerts or only for alerts
 1501         specified by '--AlertsMode' option. Possible values: All or AlertsOnly
 1502         This option is only valid for 'Yes' value of '--outfileAlerts' option.
 1503         
 1504         The following alerts information is added to SD output files using
 1505         'TorsionAlerts' data field:
 1506             
 1507             RotBondIndices TorsionIndices TorsionAngle
 1508             Energy EnergyLowerBoundCI EnergyUpperBoundCI
 1509             HierarchyClass HierarchySubClass TorsionRule
 1510             EnergyMethod AngleNotObserved MaxSingleEnergyAlert
 1511             
 1512         The following data filelds are added to SD output files based on the value of
 1513         '--AlertsMode' option:
 1514             
 1515             TotalEnergy
 1516             TotalEnergyLowerBoundCI
 1517             TotalEnergyUpperBoundCI
 1518             
 1519             MaxSingleEnergy
 1520             MaxSingleEnergyAlertsCount
 1521             
 1522             AnglesNotObservedCount
 1523             
 1524         The 'RotBondsCount' is always added to SD output files containing both
 1525         remaining and filtered molecules.
 1526         
 1527         Format:
 1528             
 1529             > <RotBondsCount>
 1530             Number
 1531             
 1532             > <TotalEnergy>
 1533             Number
 1534             
 1535             > <TotalEnergyLowerBoundCI>
 1536             Number
 1537             
 1538             > <TotalEnergyUpperBoundCI>
 1539             Number
 1540             
 1541             > <MaxSingleEnergy>
 1542             Number
 1543             
 1544             > <MaxSingleEnergyAlertsCount>
 1545             Number
 1546             
 1547             > <AnglesNotObservedCount>
 1548             Number
 1549             
 1550             > <TorsionAlerts (RotBondIndices TorsionIndices TorsionAngle
 1551                 Energy EnergyLowerBoundCI EnergyUpperBoundCI
 1552                 HierarchyClass HierarchySubClass TorsionRule
 1553                 EnergyMethod AngleNotObserved MaxSingleEnergyAlert)>
 1554             AtomIndex2,AtomIndex3  AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4
 1555             Angle  Energy EnergyLowerBoundCI EnergyUpperBoundCI
 1556             ClassName SubClassName SMARTS EnergyMethod Yes|No|NA Yes|No|NA
 1557             ... ... ...
 1558             ... ... ...
 1559             
 1560         A set of 12 values is written out as value of 'TorsionAlerts' data field for
 1561         each torsion in a molecule. The space character is used as a delimiter
 1562         to separate values with in a set and across set. The comma character
 1563         is used to delimit multiple values for each value in a set.
 1564         
 1565         The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited
 1566         values representing atom indices for a rotatable bond and the matched
 1567         torsion.
 1568         
 1569         The 'Energy' value is the estimated strain energy for the matched torsion.
 1570         The 'EnergyLowerBoundCI' and 'EnergyUpperBoundCI' represent lower and
 1571         bound energy estimates at 95% confidence interval. The 'EnergyMethod',
 1572         exact or approximate, corresponds to the method employed to estimate
 1573         torsion strain energy.
 1574         
 1575         The 'AngleNotObserved' is only valid for 'approximate' value of 'EnergyMethod'.
 1576         It has three possible values: Yes, No, or NA. The 'Yes' value indicates that
 1577         the 'TorsionAngle' is outside the 'tolerance2' of all peaks for the matched
 1578         torsion rule in the torsion library.
 1579         
 1580         The 'MaxSingleEnergyAlert' is valid for the following values of '-a, --alertsMode'
 1581         option: 'MaxSingleEnergy' or 'TotalOrMaxSingleEnergy'. It has three possible
 1582         values: Yes, No, or NA. It's set to 'NA' for 'Yes' or 'NA' values of
 1583         'AngleNotObserved'. The 'Yes' value indicates that the estimated torsion
 1584         energy is greater than the specified value for '--alertsMaxSingleEnergyCutoff'
 1585         option.
 1586         
 1587         For example:
 1588             
 1589             >  <RotBondsCount>  (1) 
 1590             14
 1591             
 1592             >  <TotalEnergy>  (1) 
 1593             6.8065
 1594             
 1595             >  <TotalEnergyLowerBoundCI>  (1) 
 1596             5.9340
 1597             
 1598             >  <TotalEnergyUpperBoundCI>  (1) 
 1599             NA
 1600             
 1601             >  <MaxSingleEnergy>  (1) 
 1602             1.7108
 1603             
 1604             >  <MaxSingleEnergyAlertsCount>  (1) 
 1605             0
 1606             
 1607             >  <AnglesNotObservedCount>  (1) 
 1608             0
 1609              
 1610             >  <TorsionAlerts(RotBondIndices TorsionIndices TorsionAngle Energy
 1611                 EnergyLowerBoundCI EnergyUpperBoundCI HierarchyClass
 1612                 HierarchySubClass TorsionRule EnergyMethod AngleNotObserved
 1613                 MaxSingleEnergyAlert)>  (1) 
 1614             2,1 48,2,1,0 61.90 0.0159 -0.0320 0.0674 CO Ether [O:1][CX4:2]!
 1615             @[O:3][CX4:4] Exact NA No 2,3 1,2,3,4 109.12 1.5640 1.1175 NA CC
 1616             None/[CX4][CX3] [O:1][CX4:2]!@[CX3:3]=[O:4] Exact NA No
 1617             ... ... ...
 1618             
 1619     --outfileFiltered <yes or no>  [default: yes]
 1620         Write out a file containing filtered molecules. Its name is automatically
 1621         generated from the specified output file. Default: <OutfileRoot>_
 1622         Filtered.<OutfileExt>.
 1623     --outfilesFilteredByRules <yes or no>  [default: auto]
 1624         Write out SD files containing filtered molecules for individual torsion
 1625         rules triggering alerts in molecules. The name of SD files are automatically
 1626         generated from the specified output file. Default file names: <OutfileRoot>_
 1627         Filtered_TopRule*.sdf.
 1628         
 1629         Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option';
 1630         otherwise, 'no'.
 1631         
 1632         The output files are only generated for 'MaxSingleEnergy' of
 1633         '-a, --alertsMode' option.
 1634         
 1635         The following alerts information is added to SD output files:
 1636             
 1637             > <RotBondsCount>
 1638             Number
 1639             
 1640             > <TotalEnergy>
 1641             Number
 1642             
 1643             > <TotalEnergyLowerBoundCI>
 1644             Number
 1645             
 1646             > <TotalEnergyUpperBoundCI>
 1647             Number
 1648             
 1649             > <MaxSingleEnergy>
 1650             Number
 1651             
 1652             > <MaxSingleEnergyAlertsCount>
 1653             Number
 1654             
 1655             > <AnglesNotObservedCount>
 1656             Number
 1657             
 1658             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionRule
 1659                 EnergyMethod)> 
 1660             ClassName SubClassName EnergyMethod SMARTS
 1661              ... ... ...
 1662             
 1663             > <TorsionRuleMaxSingleEnergyAlertsCount>
 1664             Number
 1665             
 1666             > <TorsionRuleAnglesNotObservedCount>
 1667             Number
 1668             
 1669             >  <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle
 1670                 Energy EnergyLowerBoundCI EnergyUpperBoundCI
 1671                 AngleNotObserved MaxSingleEnergyAlert)>
 1672             AtomIndex2,AtomIndex3  AtomIndex1,AtomIndex2,AtomIndex3,AtomIndex4
 1673             Angle Energy EnergyLowerBoundCI EnergyUpperBoundCI EnergyMethod
 1674             Yes|No|NA Yes|No|NA
 1675              ... ... ...
 1676             
 1677         For example:
 1678             
 1679             >  <RotBondsCount>  (1) 
 1680             8
 1681             
 1682             >  <TotalEnergy>  (1) 
 1683             6.1889
 1684             
 1685             >  <TotalEnergyLowerBoundCI>  (1) 
 1686             5.1940
 1687             
 1688             >  <TotalEnergyUpperBoundCI>  (1) 
 1689             NA
 1690             
 1691             >  <MaxSingleEnergy>  (1) 
 1692             1.9576
 1693             
 1694             >  <MaxSingleEnergyAlertsCount>  (1) 
 1695             1
 1696             
 1697             >  <AnglesNotObservedCount>  (1) 
 1698             0
 1699             
 1700             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionRule
 1701                 EnergyMethod)>  (1) 
 1702             CC None/[CX4:2][CX4:3] [!#1:1][CX4:2]!@[CX4:3][!#1:4] Exact
 1703             
 1704             >  <TorsionRuleMaxSingleEnergyAlertsCount>  (1) 
 1705             0
 1706             
 1707             >  <TorsionRuleAnglesNotObservedCount>  (1) 
 1708             0
 1709             
 1710             >  <TorsionRuleAlerts (RotBondIndices TorsionIndices TorsionAngle
 1711                 Energy EnergyLowerBoundCI EnergyUpperBoundCI AngleNotObserved
 1712                MaxSingleEnergyAlert)>  (1) 
 1713             1,3 0,1,3,4 72.63 0.8946 0.8756 0.9145 NA No
 1714             
 1715     --outfilesFilteredByRulesMaxCount <All or number>  [default: 10]
 1716         Write out SD files containing filtered molecules for specified number of
 1717         top N torsion rules triggering alerts for the largest number of molecules
 1718         or for all torsion rules triggering alerts across all molecules.
 1719         
 1720         These output files are only generated for 'MaxSingleEnergy' value of
 1721         '-a, --alertsMode' option.
 1722     --outfileSummary <yes or no>  [default: auto]
 1723         Write out a CVS text file containing summary of torsions rules responsible
 1724         for triggering torsion alerts. Its name is automatically generated from the
 1725         specified output file. Default: <OutfileRoot>_AlertsSummary.csv.
 1726         
 1727         Default value: 'yes' for 'MaxSingleEnergy' of '-a, --alertsMode' option';
 1728         otherwise, 'no'.
 1729         
 1730         The summary output file is only generated for 'MaxSingleEnergy' of
 1731         '-a, --alertsMode' option.
 1732         
 1733         The following alerts information is written to summary text file:
 1734             
 1735             TorsionRule, HierarchyClass, HierarchySubClass, EnergyMethod,
 1736             MaxSingleEnergyTorsionAlertTypes, MaxSingleEnergyTorsionAlertCount,
 1737             MaxSingleEnergyTorsionAlertMolCount
 1738              
 1739         The double quotes characters are removed from SMART patterns before
 1740         before writing them to a CSV file. In addition, the torsion rules are sorted by
 1741         TorsionAlertMolCount.
 1742     --outfileSDFieldLabels <Type,Label,...>  [default: auto]
 1743         A comma delimited list of SD data field type and label value pairs for writing
 1744         torsion alerts information along with molecules to SD files.
 1745         
 1746         The supported SD data field label type along with their default values are
 1747         shown below:
 1748             
 1749             For all SD files:
 1750             
 1751             RotBondsCountLabel, RotBondsCount,
 1752             
 1753             TotalEnergyLabel, TotalEnergy,
 1754             TotalEnergyLowerBoundCILabel, TotalEnergyLowerBoundCI,
 1755             TotalEnergyUpperBoundCILabel, TotalEnergyUpperBoundCI,
 1756             
 1757             MaxSingleEnergyLabel, MaxSingleEnergy,
 1758             MaxSingleEnergyAlertsCountLabel,
 1759                 MaxSingleEnergyAlertsCount
 1760             
 1761             AnglesNotObservedCountLabel,
 1762                 AnglesNotObservedCount
 1763             
 1764             TorsionAlertsLabel, TorsionAlerts(RotBondIndices TorsionIndices
 1765                 TorsionAngle Energy EnergyLowerBoundCI EnergyUpperBoundCI
 1766                 HierarchyClass HierarchySubClass TorsionRule
 1767                 EnergyMethod AngleNotObserved)
 1768             
 1769             For individual SD files filtered by torsion rules:
 1770             
 1771             TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass
 1772                 EnergyMethod TorsionRule)
 1773             TorsionRuleMaxSingleEnergyAlertsCountLabel,
 1774                 TorsionRuleMaxSingleEnergyAlertsCount,
 1775             TorsionRuleAnglesNotObservedCountLabel,
 1776                 TorsionRuleAnglesNotObservedCount,
 1777             TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices
 1778                 TorsionIndices TorsionAngle Energy EnergyLowerBoundCI
 1779                 EnergyUpperBoundCI EnergyMethod AngleObserved)
 1780             
 1781     --outfileParams <Name,Value,...>  [default: auto]
 1782         A comma delimited list of parameter name and value pairs for writing
 1783         molecules to files. The supported parameter names for different file
 1784         formats, along with their default values, are shown below:
 1785             
 1786             SD: kekulize,yes,forceV3000,no
 1787             
 1788     --overwrite
 1789         Overwrite existing files.
 1790     --precision <number>  [default: 4]
 1791         Floating point precision for writing torsion strain energy values.
 1792     -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...>  [default: SemiStrict]
 1793         SMARTS pattern to use for identifying rotatable bonds in a molecule
 1794         for matching against torsion rules in the torsion library. Possible values:
 1795         NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches
 1796         are filtered to ensure that each atom in the rotatable bond is attached to
 1797         at least two heavy atoms.
 1798         
 1799         The following SMARTS patterns are used to identify rotatable bonds for
 1800         different modes:
 1801             
 1802             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 1803             
 1804             SemiStrict:
 1805             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1806             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1807             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1808             
 1809             Strict:
 1810             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1811             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 1812             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 1813             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1814             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1815             
 1816         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 1817         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 1818         specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 1819         derived from 'Strict' SMARTS pattern for its usage in this script.
 1820         
 1821         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 1822         choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its
 1823         value via '--rotBondsSMARTSPattern' option.
 1824     --rotBondsSMARTSPattern <SMARTS>
 1825         SMARTS pattern for identifying rotatable bonds. This option is only valid
 1826         for 'Specify' value of '-r, --rotBondsSMARTSMode' option.
 1827     -t, --torsionEnergyLibraryFile <FileName or auto>  [default: auto]
 1828         Specify a XML file name containing data for torsion starin energy library
 1829         hierarchy or use default file, TorsionEnergyLibrary.xml, available in
 1830         MAYACHEMTOOLS/lib/Python/TorsionAlerts directory.
 1831         
 1832         The format of data in local XML file must match format of the data in Torsion
 1833         Library [ Ref 153 ] file available in MAYACHEMTOOLS directory.
 1834     -w, --workingdir <dir>
 1835         Location of working directory which defaults to the current directory.
 1836 
 1837 Examples:
 1838     To filter molecules containing rotatable bonds with total strain energy value
 1839     of >= 6.0 (TEUs) based on torsion rules in the torsion energy library and write
 1840     write out SD files containing remaining and filtered molecules, type:
 1841 
 1842         % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf
 1843           -o Sample3DOut.sdf
 1844 
 1845     To filter molecules containing any rotatable bonds with strain energy value of
 1846     >= 1.8 (TEUs) based on torsion rules in the torsion energy library and write out
 1847     SD files containing remaining and filtered molecules, and individual SD files for
 1848     torsion rules triggering alerts along with appropriate torsion information for
 1849     red alerts, type:
 1850 
 1851         % RDKitFilterTorsionStrainEnergyAlerts.py -a MaxSingleEnergy
 1852           -i Sample3D.sdf -o Sample3DOut.sdf
 1853 
 1854     To filter molecules containing rotatable bonds with total strain energy value
 1855     of >= 6.0 (TEUs) or any single strain energy value of >= 1.8 (TEUs) and write out
 1856     SD files containing remaining and filtered molecules, type:
 1857 
 1858         % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy
 1859           -i Sample3D.sdf -o Sample3DOut.sdf
 1860 
 1861     To filter molecules containing rotatable bonds with specific cutoff values for
 1862     total or single torsion strain energy and write out SD files containing
 1863     remaining and filtered molecules, type:
 1864 
 1865         % RDKitFilterTorsionStrainEnergyAlerts.py -a TotalOrMaxSingleEnergy
 1866           -i Sample3D.sdf -o Sample3DOut.sdf --alertsTotalEnergyCutoff 6.0
 1867           --alertsMaxSingleEnergyCutoff 1.8
 1868 
 1869     To run the first example for filtering molecules and writing out torsion
 1870     information for all alert types to SD files, type:
 1871 
 1872         % RDKitFilterTorsionStrainEnergyAlerts.py -i Sample3D.sdf
 1873           -o Sample3DOut.sdf --outfileAlertsMode All
 1874 
 1875     To run the first example for filtering molecules in multiprocessing mode on
 1876     all available CPUs without loading all data into memory and write out SD files,
 1877     type:
 1878 
 1879         % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes -i Sample3D.sdf
 1880          -o Sample3DOut.sdf
 1881 
 1882     To run the first example for filtering molecules in multiprocessing mode on
 1883     all available CPUs by loading all data into memory and write out a SD files,
 1884     type:
 1885 
 1886         % RDKitFilterTorsionStrainEnergyAlerts.py  --mp yes --mpParams
 1887           "inputDataMode, InMemory" -i Sample3D.sdf  -o Sample3DOut.sdf
 1888 
 1889     To run the first example for filtering molecules in multiprocessing mode on
 1890     specific number of CPUs and chunksize without loading all data into memory
 1891     and write out SD files, type:
 1892 
 1893         % RDKitFilterTorsionStrainEnergyAlerts.py --mp yes --mpParams
 1894           "inputDataMode,lazy,numProcesses,4,chunkSize,8"  -i Sample3D.sdf
 1895           -o Sample3DOut.sdf
 1896 
 1897     To list information about default torsion library file without performing any
 1898     filtering, type:
 1899 
 1900         % RDKitFilterTorsionStrainEnergyAlerts.py -l
 1901 
 1902     To list information about a local torsion library XML file without performing
 1903     any, filtering, type:
 1904 
 1905         % RDKitFilterTorsionStrainEnergyAlerts.py --torsionEnergyLibraryFile
 1906           TorsionStrainEnergyLibrary.xml -l
 1907 
 1908 Author:
 1909     Manish Sud (msud@san.rr.com)
 1910 
 1911 Collaborator:
 1912     Pat Walters
 1913 
 1914 See also:
 1915     RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionLibraryAlerts.py,
 1916     RDKitConvertFileFormat.py, RDKitSearchSMARTS.py
 1917 
 1918 Copyright:
 1919     Copyright (C) 2024 Manish Sud. All rights reserved.
 1920 
 1921     This script uses the torsion strain energy library developed by Gu, S.;
 1922     Smith, M. S.; Yang, Y.; Irwin, J. J.; Shoichet, B. K. [ Ref 153 ].
 1923 
 1924     The torsion strain enegy library is based on the Torsion Library jointly
 1925     developed by the University of Hamburg, Center for Bioinformatics,
 1926     Hamburg, Germany and F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
 1927 
 1928     The functionality available in this script is implemented using RDKit, an
 1929     open source toolkit for cheminformatics developed by Greg Landrum.
 1930 
 1931     This file is part of MayaChemTools.
 1932 
 1933     MayaChemTools is free software; you can redistribute it and/or modify it under
 1934     the terms of the GNU Lesser General Public License as published by the Free
 1935     Software Foundation; either version 3 of the License, or (at your option) any
 1936     later version.
 1937 
 1938 """
 1939 
 1940 if __name__ == "__main__":
 1941     main()