MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitFilterTorsionLibraryAlerts.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Collaborator: Pat Walters
    7 #
    8 # Acknowledgments: Wolfgang Guba, Patrick Penner, and Levi Pierce
    9 #
   10 # Copyright (C) 2026 Manish Sud. All rights reserved.
   11 #
   12 # This script uses the Torsion Library jointly developed by the University
   13 # of Hamburg, Center for Bioinformatics, Hamburg, Germany and
   14 # F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
   15 #
   16 # The functionality available in this script is implemented using RDKit, an
   17 # open source toolkit for cheminformatics developed by Greg Landrum.
   18 #
   19 # This file is part of MayaChemTools.
   20 #
   21 # MayaChemTools is free software; you can redistribute it and/or modify it under
   22 # the terms of the GNU Lesser General Public License as published by the Free
   23 # Software Foundation; either version 3 of the License, or (at your option) any
   24 # later version.
   25 #
   26 # MayaChemTools is distributed in the hope that it will be useful, but without
   27 # any warranty; without even the implied warranty of merchantability of fitness
   28 # for a particular purpose.  See the GNU Lesser General Public License for more
   29 # details.
   30 #
   31 # You should have received a copy of the GNU Lesser General Public License
   32 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   33 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   34 # Boston, MA, 02111-1307, USA.
   35 #
   36 
   37 from __future__ import print_function
   38 
   39 import os
   40 import sys
   41 import time
   42 import re
   43 import glob
   44 import multiprocessing as mp
   45 
   46 # RDKit imports...
   47 try:
   48     from rdkit import rdBase
   49     from rdkit import Chem
   50 except ImportError as ErrMsg:
   51     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   52     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   53     sys.exit(1)
   54 
   55 # MayaChemTools imports...
   56 sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   57 try:
   58     from docopt import docopt
   59     import MiscUtil
   60     import RDKitUtil
   61     from TorsionAlerts.TorsionLibraryAlerts import TorsionLibraryAlerts
   62 except ImportError as ErrMsg:
   63     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   64     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   65     sys.exit(1)
   66 
   67 ScriptName = os.path.basename(sys.argv[0])
   68 Options = {}
   69 OptionsInfo = {}
   70 
   71 
   72 def main():
   73     """Start execution of the script."""
   74 
   75     MiscUtil.PrintInfo(
   76         "\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n"
   77         % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())
   78     )
   79 
   80     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   81 
   82     # Retrieve command line arguments and options...
   83     RetrieveOptions()
   84 
   85     if Options["--list"]:
   86         # Handle listing of torsion library information...
   87         ProcessListTorsionLibraryOption()
   88     else:
   89         # Process and validate command line arguments and options...
   90         ProcessOptions()
   91 
   92         # Perform actions required by the script...
   93         PerformFiltering()
   94 
   95     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   96     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   97 
   98 
   99 def PerformFiltering():
  100     """Filter molecules using SMARTS torsion rules in the torsion library file."""
  101 
  102     # Setup a molecule reader...
  103     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  104     Mols = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  105 
  106     MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols)
  107 
  108     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  109     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  110     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  111     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount))
  112 
  113     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
  114     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
  115 
  116 
  117 def ProcessMolecules(Mols):
  118     """Process and filter molecules."""
  119 
  120     if OptionsInfo["MPMode"]:
  121         return ProcessMoleculesUsingMultipleProcesses(Mols)
  122     else:
  123         return ProcessMoleculesUsingSingleProcess(Mols)
  124 
  125 
  126 def ProcessMoleculesUsingSingleProcess(Mols):
  127     """Process and filter molecules using a single process."""
  128 
  129     # Instantiate torsion library alerts class...
  130     TorsionLibraryAlertsHandle = InstantiateTorsionLibraryAlertsClass()
  131 
  132     MiscUtil.PrintInfo("\nFiltering molecules...")
  133 
  134     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  135 
  136     # Set up writers...
  137     OutfilesWriters = SetupOutfilesWriters()
  138 
  139     WriterRemaining = OutfilesWriters["WriterRemaining"]
  140     WriterFiltered = OutfilesWriters["WriterFiltered"]
  141     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  142 
  143     # Initialize alerts summary info...
  144     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  145 
  146     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  147     for Mol in Mols:
  148         MolCount += 1
  149 
  150         if Mol is None:
  151             continue
  152 
  153         if RDKitUtil.IsMolEmpty(Mol):
  154             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount))
  155             continue
  156 
  157         # Check for 3D flag...
  158         if not Mol.GetConformer().Is3D():
  159             MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount))
  160             continue
  161 
  162         ValidMolCount += 1
  163 
  164         # Identify torsion library alerts for rotatable bonds..
  165         RotBondsAlertsStatus, RotBondsAlertsInfo = (
  166             TorsionLibraryAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  167         )
  168 
  169         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  170 
  171         # Write out filtered and remaining molecules...
  172         WriteStatus = True
  173         if RotBondsAlertsStatus:
  174             if OutfileFilteredMode:
  175                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  176                 if WriteStatus:
  177                     FilteredMolWriteCount += 1
  178         else:
  179             RemainingMolCount += 1
  180             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  181 
  182         if not WriteStatus:
  183             WriteFailedCount += 1
  184 
  185     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  186     CloseOutfilesWriters(OutfilesWriters)
  187 
  188     if FilteredMolWriteCount:
  189         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  190 
  191     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  192 
  193 
  194 def ProcessMoleculesUsingMultipleProcesses(Mols):
  195     """Process and filter molecules using multiprocessing."""
  196 
  197     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
  198 
  199     MPParams = OptionsInfo["MPParams"]
  200     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  201 
  202     # Instantiate torsion strain energy alerts class to list torsion library information...
  203     InstantiateTorsionLibraryAlertsClass()
  204 
  205     # Set up writers...
  206     OutfilesWriters = SetupOutfilesWriters()
  207 
  208     WriterRemaining = OutfilesWriters["WriterRemaining"]
  209     WriterFiltered = OutfilesWriters["WriterFiltered"]
  210     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  211 
  212     # Initialize alerts summary info...
  213     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  214 
  215     # Setup data for initializing a worker process...
  216     MiscUtil.PrintInfo("Encoding options info and rotatable bond pattern molecule...")
  217     InitializeWorkerProcessArgs = (
  218         MiscUtil.ObjectToBase64EncodedString(Options),
  219         MiscUtil.ObjectToBase64EncodedString(OptionsInfo),
  220     )
  221 
  222     # Setup a encoded mols data iterable for a worker process...
  223     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  224 
  225     # Setup process pool along with data initialization for each process...
  226     MiscUtil.PrintInfo(
  227         "\nConfiguring multiprocessing using %s method..."
  228         % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()")
  229     )
  230     MiscUtil.PrintInfo(
  231         "NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n"
  232         % (
  233             MPParams["NumProcesses"],
  234             MPParams["InputDataMode"],
  235             ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"]),
  236         )
  237     )
  238 
  239     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  240 
  241     # Start processing...
  242     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  243         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  244     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  245         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  246     else:
  247         MiscUtil.PrintError(
  248             'The value, %s, specified for "--inputDataMode" is not supported.' % (MPParams["InputDataMode"])
  249         )
  250 
  251     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  252     for Result in Results:
  253         MolCount += 1
  254         MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result
  255 
  256         if EncodedMol is None:
  257             continue
  258         ValidMolCount += 1
  259 
  260         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  261 
  262         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  263 
  264         # Write out filtered and remaining molecules...
  265         WriteStatus = True
  266         if RotBondsAlertsStatus:
  267             if OutfileFilteredMode:
  268                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  269                 if WriteStatus:
  270                     FilteredMolWriteCount += 1
  271         else:
  272             RemainingMolCount += 1
  273             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  274 
  275         if not WriteStatus:
  276             WriteFailedCount += 1
  277 
  278     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  279     CloseOutfilesWriters(OutfilesWriters)
  280 
  281     if FilteredMolWriteCount:
  282         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  283 
  284     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  285 
  286 
  287 def InitializeWorkerProcess(*EncodedArgs):
  288     """Initialize data for a worker process."""
  289 
  290     global Options, OptionsInfo
  291 
  292     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  293 
  294     # Decode Options and OptionInfo...
  295     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  296     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  297 
  298     # Instantiate torsion slibrary alerts class...
  299     OptionsInfo["TorsionLibraryAlertsHandle"] = InstantiateTorsionLibraryAlertsClass(Quiet=True)
  300 
  301 
  302 def WorkerProcess(EncodedMolInfo):
  303     """Process data for a worker process."""
  304 
  305     MolIndex, EncodedMol = EncodedMolInfo
  306 
  307     if EncodedMol is None:
  308         return [MolIndex, None, False, None]
  309 
  310     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  311     if RDKitUtil.IsMolEmpty(Mol):
  312         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  313         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  314         return [MolIndex, None, False, None]
  315 
  316     # Check for 3D flag...
  317     if not Mol.GetConformer().Is3D():
  318         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  319         MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName)
  320         return [MolIndex, None, False, None]
  321 
  322     # Identify torsion library alerts for rotatable bonds..
  323     TorsionTorsionLibraryAlertsHandle = OptionsInfo["TorsionLibraryAlertsHandle"]
  324     RotBondsAlertsStatus, RotBondsAlertsInfo = (
  325         TorsionTorsionLibraryAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  326     )
  327 
  328     return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo]
  329 
  330 
  331 def InitializeTorsionAlertsSummaryInfo():
  332     """Initialize torsion alerts summary."""
  333 
  334     if OptionsInfo["CountMode"]:
  335         return None
  336 
  337     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  338         return None
  339 
  340     TorsionAlertsSummaryInfo = {}
  341     TorsionAlertsSummaryInfo["RuleIDs"] = []
  342 
  343     for DataLabel in [
  344         "SMARTSToRuleIDs",
  345         "RuleSMARTS",
  346         "HierarchyClassName",
  347         "HierarchySubClassName",
  348         "TorsionRulePeaks",
  349         "TorsionRuleTolerances1",
  350         "TorsionRuleTolerances2",
  351         "AlertTypes",
  352         "AlertTypesMolCount",
  353     ]:
  354         TorsionAlertsSummaryInfo[DataLabel] = {}
  355 
  356     return TorsionAlertsSummaryInfo
  357 
  358 
  359 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo):
  360     """Track torsion alerts summary information for matched torsion rules in a
  361     molecule."""
  362 
  363     if OptionsInfo["CountMode"]:
  364         return
  365 
  366     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  367         return
  368 
  369     if RotBondsAlertsInfo is None:
  370         return
  371 
  372     MolAlertsInfo = {}
  373     MolAlertsInfo["RuleIDs"] = []
  374     MolAlertsInfo["AlertTypes"] = {}
  375 
  376     for ID in RotBondsAlertsInfo["IDs"]:
  377         if not RotBondsAlertsInfo["MatchStatus"][ID]:
  378             continue
  379 
  380         if OptionsInfo["OutfileAlertsOnly"]:
  381             if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
  382                 continue
  383 
  384         AlertType = RotBondsAlertsInfo["AlertTypes"][ID]
  385         TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID]
  386         TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]
  387 
  388         # Track data for torsion alert summary information across molecules...
  389         if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]:
  390             TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID)
  391             TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID
  392 
  393             TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS
  394             TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo[
  395                 "HierarchyClassNames"
  396             ][ID]
  397             TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo[
  398                 "HierarchySubClassNames"
  399             ][ID]
  400 
  401             TorsionAlertsSummaryInfo["TorsionRulePeaks"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRulePeaks"][ID]
  402             TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = RotBondsAlertsInfo[
  403                 "TorsionRuleTolerances1"
  404             ][ID]
  405             TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = RotBondsAlertsInfo[
  406                 "TorsionRuleTolerances2"
  407             ][ID]
  408 
  409             # Initialize number of alert types across all molecules...
  410             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID] = {}
  411 
  412             # Initialize number of molecules flagged by each alert type...
  413             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID] = {}
  414 
  415         if AlertType not in TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID]:
  416             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
  417             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] = 0
  418 
  419         TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
  420 
  421         # Track data for torsion alert information in a molecule...
  422         if TorsionRuleNodeID not in MolAlertsInfo["AlertTypes"]:
  423             MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  424             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID] = {}
  425 
  426         if AlertType not in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
  427             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
  428         MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
  429 
  430     # Track number of molecules flagged by a specific torsion alert...
  431     for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]:
  432         for AlertType in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
  433             if MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType]:
  434                 TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] += 1
  435 
  436 
  437 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo):
  438     """Write out torsion alerts summary informatio to a CSV file."""
  439 
  440     if OptionsInfo["CountMode"]:
  441         return
  442 
  443     if not OptionsInfo["OutfileSummaryMode"]:
  444         return
  445 
  446     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  447         return
  448 
  449     # Write headers...
  450     QuoteValues = True
  451     Values = [
  452         "TorsionRule",
  453         "TorsionPeaks",
  454         "Tolerances1",
  455         "Tolerances2",
  456         "HierarchyClass",
  457         "HierarchySubClass",
  458         "TorsionAlertTypes",
  459         "TorsionAlertCount",
  460         "TorsionAlertMolCount",
  461     ]
  462     Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  463 
  464     SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  465 
  466     # Write alerts information...
  467     for ID in SortedRuleIDs:
  468         # Remove any double quotes in SMARTS...
  469         RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID]
  470         RuleSMARTS = re.sub('"', "", RuleSMARTS, re.I)
  471 
  472         HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID]
  473         HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID]
  474 
  475         TorsionPeaks = MiscUtil.JoinWords(
  476             ["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRulePeaks"][ID]], ","
  477         )
  478         TorsionRuleTolerances1 = MiscUtil.JoinWords(
  479             ["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][ID]], ","
  480         )
  481         TorsionRuleTolerances2 = MiscUtil.JoinWords(
  482             ["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][ID]], ","
  483         )
  484 
  485         AlertTypes = []
  486         AlertTypeCount = []
  487         AlertTypeMolCount = []
  488         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
  489             AlertTypes.append(AlertType)
  490             AlertTypeCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypes"][ID][AlertType])
  491             AlertTypeMolCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType])
  492 
  493         Values = [
  494             RuleSMARTS,
  495             TorsionPeaks,
  496             TorsionRuleTolerances1,
  497             TorsionRuleTolerances2,
  498             HierarchyClassName,
  499             HierarchySubClassName,
  500             "%s" % MiscUtil.JoinWords(AlertTypes, ","),
  501             "%s" % (MiscUtil.JoinWords(AlertTypeCount, ",")),
  502             "%s" % (MiscUtil.JoinWords(AlertTypeMolCount, ",")),
  503         ]
  504         Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  505 
  506 
  507 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo):
  508     """Sort torsion rule IDs by  alert types molecule count in descending order."""
  509 
  510     SortedRuleIDs = []
  511 
  512     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  513     if len(RuleIDs) == 0:
  514         return SortedRuleIDs
  515 
  516     # Setup a map from AlertTypesMolCount to IDs for sorting alerts...
  517     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  518     MolCountMap = {}
  519     for ID in RuleIDs:
  520         MolCount = 0
  521         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
  522             MolCount += TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType]
  523         MolCountMap[ID] = MolCount
  524 
  525     SortedRuleIDs = sorted(RuleIDs, key=lambda ID: MolCountMap[ID], reverse=True)
  526 
  527     return SortedRuleIDs
  528 
  529 
  530 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo):
  531     """Write out torsion alerts SD files for individual torsion rules."""
  532 
  533     if OptionsInfo["CountMode"]:
  534         return
  535 
  536     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  537         return
  538 
  539     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  540         return
  541 
  542     # Setup a molecule reader for filtered molecules...
  543     FilteredMols = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"])
  544 
  545     # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules...
  546     TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo)
  547 
  548     # Setup writers...
  549     ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs)
  550 
  551     for Mol in FilteredMols:
  552         # Retrieve torsion alerts info...
  553         TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo)
  554         if TorsionAlertsInfo is None:
  555             continue
  556 
  557         for TorsionRuleID in TorsionRuleIDs:
  558             if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]:
  559                 continue
  560 
  561             WriteMoleculeFilteredByRuleID(
  562                 ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo
  563             )
  564 
  565     CloseByRuleOutfilesWriters(ByRuleOutfilesWriters)
  566 
  567 
  568 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo):
  569     """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules."""
  570 
  571     # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to
  572     # the least number of unique molecules...
  573     RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  574 
  575     # Select torsion rule IDs for writing out SD files...
  576     if not OptionsInfo["OutfilesFilteredByRulesAllMode"]:
  577         MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"]
  578         if MaxRuleIDs < len(RuleIDs):
  579             RuleIDs = RuleIDs[0:MaxRuleIDs]
  580 
  581     return RuleIDs
  582 
  583 
  584 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo):
  585     """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds."""
  586 
  587     TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]
  588     TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None
  589 
  590     if TorsionAlerts is None or len(TorsionAlerts) == 0:
  591         return None
  592 
  593     # Initialize for tracking by rule IDs...
  594     TorsionAlertsInfo = {}
  595     TorsionAlertsInfo["RuleIDs"] = []
  596 
  597     for DataLabel in [
  598         "RuleSMARTS",
  599         "HierarchyClassName",
  600         "HierarchySubClassName",
  601         "TorsionRulePeaks",
  602         "TorsionRuleTolerances1",
  603         "TorsionRuleTolerances2",
  604         "AlertTypes",
  605         "AtomIndices",
  606         "TorsionAtomIndices",
  607         "TorsionAngles",
  608         "TorsionAngleViolations",
  609         "AlertTypesCount",
  610     ]:
  611         TorsionAlertsInfo[DataLabel] = {}
  612 
  613     ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"]
  614     TorsionAlertsSetSize = 11
  615 
  616     TorsionAlertsWords = TorsionAlerts.split()
  617     if len(TorsionAlertsWords) % TorsionAlertsSetSize:
  618         MiscUtil.PrintError(
  619             "The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s."
  620             % (len(TorsionAlertsWords), TorsionAlertsSetSize)
  621         )
  622 
  623     ID = 0
  624     for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize):
  625         ID += 1
  626 
  627         (
  628             RotBondIndices,
  629             TorsionAlertType,
  630             TorsionIndices,
  631             TorsionAngle,
  632             TorsionAngleViolation,
  633             HierarchyClass,
  634             HierarchySubClass,
  635             TorsionPeaks,
  636             Tolerances1,
  637             Tolerances2,
  638             TorsionRule,
  639         ) = TorsionAlertsWords[Index : Index + TorsionAlertsSetSize]
  640         RotBondIndices = RotBondIndices.split(ValuesDelimiter)
  641         TorsionIndices = TorsionIndices.split(ValuesDelimiter)
  642         TorsionPeaks = TorsionPeaks.split(ValuesDelimiter)
  643         Tolerances1 = Tolerances1.split(ValuesDelimiter)
  644         Tolerances2 = Tolerances2.split(ValuesDelimiter)
  645 
  646         if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]:
  647             MiscUtil.PrintWarning(
  648                 "The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..."
  649                 % TorsionRule
  650             )
  651             continue
  652         TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule]
  653 
  654         # Track data for torsion alerts in a molecule...
  655         if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]:
  656             TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  657 
  658             TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule
  659             TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass
  660             TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass
  661             TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleNodeID] = TorsionPeaks
  662             TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = Tolerances1
  663             TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = Tolerances2
  664 
  665             TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID] = []
  666             TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = []
  667             TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = []
  668             TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID] = []
  669             TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID] = []
  670 
  671             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID] = {}
  672 
  673         # Track multiple values for a rule ID...
  674         TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID].append(TorsionAlertType)
  675         TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices)
  676         TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices)
  677         TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID].append(TorsionAngle)
  678         TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID].append(TorsionAngleViolation)
  679 
  680         # Count alert type for a rule ID...
  681         if TorsionAlertType not in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID]:
  682             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] = 0
  683         TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] += 1
  684 
  685     return TorsionAlertsInfo
  686 
  687 
  688 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo):
  689     """Write out molecule."""
  690 
  691     if OptionsInfo["CountMode"]:
  692         return True
  693 
  694     SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo)
  695 
  696     try:
  697         Writer.write(Mol)
  698     except Exception as ErrMsg:
  699         MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg))
  700         return False
  701 
  702     return True
  703 
  704 
  705 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo):
  706     """Setup molecule properties containing alerts information for rotatable bonds."""
  707 
  708     if not OptionsInfo["OutfileAlerts"]:
  709         return
  710 
  711     # Setup rotatable bonds count...
  712     RotBondsCount = 0
  713     if RotBondsAlertsInfo is not None:
  714         RotBondsCount = len(RotBondsAlertsInfo["IDs"])
  715     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["RotBondsCountLabel"], "%s" % RotBondsCount)
  716 
  717     # Setup alert counts for rotatable bonds...
  718     AlertsCount = []
  719     if RotBondsAlertsInfo is not None:
  720         for AlertType in ["Green", "Orange", "Red"]:
  721             if AlertType in RotBondsAlertsInfo["Count"]:
  722                 AlertsCount.append("%s" % RotBondsAlertsInfo["Count"][AlertType])
  723             else:
  724                 AlertsCount.append("0")
  725 
  726     if len(AlertsCount):
  727         Mol.SetProp(
  728             OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsCountLabel"], "%s" % MiscUtil.JoinWords(AlertsCount, " ")
  729         )
  730 
  731     # Setup alert information for rotatable bonds...
  732     AlertsInfoValues = []
  733 
  734     # Delimiter for multiple values corresponding to specific set of information for
  735     # a rotatable bond. For example: TorsionAtomIndices
  736     ValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  737 
  738     # Delimiter for various values for a rotatable bond...
  739     RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"]
  740 
  741     # Delimiter for values corresponding to multiple rotatable bonds...
  742     AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"]
  743 
  744     if RotBondsAlertsInfo is not None:
  745         for ID in RotBondsAlertsInfo["IDs"]:
  746             if not RotBondsAlertsInfo["MatchStatus"][ID]:
  747                 continue
  748 
  749             if OptionsInfo["OutfileAlertsOnly"]:
  750                 if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
  751                     continue
  752 
  753             RotBondValues = []
  754 
  755             # Bond atom indices...
  756             Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]]
  757             RotBondValues.append(ValuesDelim.join(Values))
  758 
  759             # Alert type...
  760             RotBondValues.append(RotBondsAlertsInfo["AlertTypes"][ID])
  761 
  762             # Torsion atom indices...
  763             TorsionAtomIndices = SetupTorsionAtomIndicesValues(
  764                 RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim
  765             )
  766             RotBondValues.append(TorsionAtomIndices)
  767 
  768             # Torsion angle...
  769             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngles"][ID])
  770 
  771             # Torsion angle violation...
  772             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngleViolations"][ID])
  773 
  774             # Hierarchy class and subclass names...
  775             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassNames"][ID])
  776             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassNames"][ID])
  777 
  778             # Torsion rule peaks...
  779             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRulePeaks"][ID]]
  780             RotBondValues.append(ValuesDelim.join(Values))
  781 
  782             # Torsion rule tolerances...
  783             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]]
  784             RotBondValues.append(ValuesDelim.join(Values))
  785             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]]
  786             RotBondValues.append(ValuesDelim.join(Values))
  787 
  788             # Torsion rule SMARTS...
  789             RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID])
  790 
  791             # Track joined values for a rotatable bond...
  792             AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues))
  793 
  794     if len(AlertsInfoValues):
  795         Mol.SetProp(
  796             OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"],
  797             "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues)),
  798         )
  799 
  800 
  801 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
  802     """Write out molecule."""
  803 
  804     if OptionsInfo["CountMode"]:
  805         return
  806 
  807     SetupMolPropertiesForFilteredByRuleIDAlertsInformation(
  808         Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo
  809     )
  810 
  811     Writer.write(Mol)
  812 
  813 
  814 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(
  815     Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo
  816 ):
  817     """Setup molecule properties containing alerts information for torsion alerts
  818     fileted by Rule IDs."""
  819 
  820     # Delete torsion alerts information for rotatable bonds...
  821     if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]):
  822         Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"])
  823 
  824     # Delimiter for values...
  825     IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  826     InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"]
  827 
  828     # Setup alert rule information...
  829     AlertRuleInfoValues = []
  830 
  831     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID])
  832     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID])
  833 
  834     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleID]]
  835     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  836 
  837     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleID]]
  838     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  839     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleID]]
  840     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  841 
  842     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID])
  843 
  844     Mol.SetProp(
  845         OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"],
  846         "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues)),
  847     )
  848 
  849     # Setup alerts count for torsion rule...
  850     AlertsCount = []
  851     for AlertType in ["Green", "Orange", "Red"]:
  852         if AlertType in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID]:
  853             AlertsCount.append("%s" % TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID][AlertType])
  854         else:
  855             AlertsCount.append("0")
  856 
  857     Mol.SetProp(
  858         OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsCountLabel"], "%s" % (InterSetValuesDelim.join(AlertsCount))
  859     )
  860 
  861     # Setup torsion rule alerts...
  862     AlertsInfoValues = []
  863     for Index in range(0, len(TorsionAlertsInfo["AlertTypes"][TorsionRuleID])):
  864         RotBondInfoValues = []
  865 
  866         # Bond atom indices...
  867         Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]]
  868         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  869 
  870         # Alert type...
  871         RotBondInfoValues.append(TorsionAlertsInfo["AlertTypes"][TorsionRuleID][Index])
  872 
  873         # Torsion atom indices retrieved from the filtered SD file and stored as strings...
  874         Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]]
  875         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  876 
  877         # Torsion angle...
  878         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngles"][TorsionRuleID][Index])
  879 
  880         # Torsion angle violation...
  881         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID][Index])
  882 
  883         # Track alerts informaiton...
  884         AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues))
  885 
  886     Mol.SetProp(
  887         OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"], "%s" % (InterSetValuesDelim.join(AlertsInfoValues))
  888     )
  889 
  890     # Setup torsion rule alert max angle violation...
  891     TorsionAngleViolations = [float(Angle) for Angle in TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID]]
  892     Mol.SetProp(
  893         OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxAngleViolationLabel"], "%.2f" % (max(TorsionAngleViolations))
  894     )
  895 
  896 
  897 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim):
  898     """Setup torsion atom indices value for output files."""
  899 
  900     # Check for any list values in the list of torsion atom indices used as placeholders
  901     # for positions of lone pairs in torsion rules containing  N_lp...
  902     TorsionAtomsInfo = []
  903     for Value in TorsionAtomIndicesList:
  904         if type(Value) is list:
  905             TorsionAtomsInfo.append("N_lp")
  906         else:
  907             TorsionAtomsInfo.append(Value)
  908 
  909     Values = ["%s" % Value for Value in TorsionAtomsInfo]
  910 
  911     return ValuesDelim.join(Values)
  912 
  913 
  914 def SetupOutfilesWriters():
  915     """Setup molecule and summary writers."""
  916 
  917     OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None}
  918 
  919     # Writers for SD files...
  920     WriterRemaining, WriterFiltered = SetupMoleculeWriters()
  921     OutfilesWriters["WriterRemaining"] = WriterRemaining
  922     OutfilesWriters["WriterFiltered"] = WriterFiltered
  923 
  924     # Writer for alert summary CSV file...
  925     WriterAlertSummary = SetupAlertSummaryWriter()
  926     OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary
  927 
  928     return OutfilesWriters
  929 
  930 
  931 def SetupMoleculeWriters():
  932     """Setup molecule writers."""
  933 
  934     Writer = None
  935     WriterFiltered = None
  936 
  937     if OptionsInfo["CountMode"]:
  938         return (Writer, WriterFiltered)
  939 
  940     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  941     if Writer is None:
  942         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  943     MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"])
  944 
  945     if OptionsInfo["OutfileFilteredMode"]:
  946         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
  947         if WriterFiltered is None:
  948             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
  949         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
  950 
  951     return (Writer, WriterFiltered)
  952 
  953 
  954 def SetupAlertSummaryWriter():
  955     """Setup a alert summary writer."""
  956 
  957     Writer = None
  958 
  959     if OptionsInfo["CountMode"]:
  960         return Writer
  961 
  962     if not OptionsInfo["OutfileSummaryMode"]:
  963         return Writer
  964 
  965     Outfile = OptionsInfo["OutfileSummary"]
  966     Writer = open(Outfile, "w")
  967     if Writer is None:
  968         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  969 
  970     MiscUtil.PrintInfo("Generating file %s..." % Outfile)
  971 
  972     return Writer
  973 
  974 
  975 def CloseOutfilesWriters(OutfilesWriters):
  976     """Close outfile writers."""
  977 
  978     for WriterType, Writer in OutfilesWriters.items():
  979         if Writer is not None:
  980             Writer.close()
  981 
  982 
  983 def SetupByRuleOutfilesWriters(RuleIDs):
  984     """Setup by rule outfiles writers."""
  985 
  986     # Initialize...
  987     OutfilesWriters = {}
  988     for RuleID in RuleIDs:
  989         OutfilesWriters[RuleID] = None
  990 
  991     if OptionsInfo["CountMode"]:
  992         return OutfilesWriters
  993 
  994     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  995         return OutfilesWriters
  996 
  997     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
  998     OutfilesRoot = "%s_Filtered_TopRule" % FileName
  999     OutfilesExt = "sdf"
 1000 
 1001     MsgTxt = (
 1002         "all"
 1003         if OptionsInfo["OutfilesFilteredByRulesAllMode"]
 1004         else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"]
 1005     )
 1006     MiscUtil.PrintInfo(
 1007         "\nGenerating output files %s*.%s for %s torsion rules triggering alerts..."
 1008         % (OutfilesRoot, OutfilesExt, MsgTxt)
 1009     )
 1010 
 1011     # Delete any existing output files...
 1012     Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt))
 1013     if len(Outfiles):
 1014         MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt))
 1015         for Outfile in Outfiles:
 1016             try:
 1017                 os.remove(Outfile)
 1018             except Exception as ErrMsg:
 1019                 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg)
 1020 
 1021     RuleIndex = 0
 1022     for RuleID in RuleIDs:
 1023         RuleIndex += 1
 1024         Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt)
 1025         Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
 1026         if Writer is None:
 1027             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
 1028 
 1029         OutfilesWriters[RuleID] = Writer
 1030 
 1031     return OutfilesWriters
 1032 
 1033 
 1034 def CloseByRuleOutfilesWriters(OutfilesWriters):
 1035     """Close by rule outfile writers."""
 1036 
 1037     for RuleID, Writer in OutfilesWriters.items():
 1038         if Writer is not None:
 1039             Writer.close()
 1040 
 1041 
 1042 def InstantiateTorsionLibraryAlertsClass(Quiet=False):
 1043     """Initialize torsion library alerts class."""
 1044 
 1045     try:
 1046         TorsionLibraryAlertsHandle = TorsionLibraryAlerts(
 1047             AlertsMode=OptionsInfo["AlertsMode"],
 1048             MinAlertsCount=OptionsInfo["MinAlertsCount"],
 1049             NitrogenLonePairAllowHydrogenNbrs=OptionsInfo["NitrogenLonePairParams"]["AllowHydrogenNbrs"],
 1050             NitrogenLonePairPlanarityTolerance=OptionsInfo["NitrogenLonePairParams"]["PlanarityTolerance"],
 1051             RotBondsSMARTSMode=OptionsInfo["RotBondsSMARTSMode"],
 1052             RotBondsSMARTSPattern=OptionsInfo["RotBondsSMARTSPattern"],
 1053             TorsionLibraryFilePath=OptionsInfo["TorsionLibraryFile"],
 1054         )
 1055     except Exception as ErrMsg:
 1056         MiscUtil.PrintError("Failed to instantiate TorsionLibraryAlerts:\n%s\n" % (ErrMsg))
 1057 
 1058     if not Quiet:
 1059         MiscUtil.PrintInfo(
 1060             "\nRetrieving data from library file %s..." % TorsionLibraryAlertsHandle.GetTorsionLibraryFilePath()
 1061         )
 1062         TorsionLibraryAlertsHandle.ListTorsionLibraryInfo()
 1063 
 1064     return TorsionLibraryAlertsHandle
 1065 
 1066 
 1067 def ProcessRotatableBondsSMARTSMode():
 1068     """ "Process SMARTS pattern for rotatable bonds."""
 1069 
 1070     RotBondsMode = OptionsInfo["RotBondsSMARTSMode"]
 1071 
 1072     RotBondsSMARTSPattern = None
 1073     RotBondsSMARTSPatternSpecified = OptionsInfo["RotBondsSMARTSPatternSpecified"]
 1074 
 1075     if re.match("^(NonStrict|SemiStrict|Strict)$", RotBondsMode, re.I):
 1076         RotBondsSMARTSPattern = None
 1077     elif re.match("Specify", RotBondsMode, re.I):
 1078         RotBondsSMARTSPatternSpecified = RotBondsSMARTSPatternSpecified.strip()
 1079         if not len(RotBondsSMARTSPatternSpecified):
 1080             MiscUtil.PrintError(
 1081                 'Empty value specified for SMILES/SMARTS pattern in  "--rotBondsSMARTSPattern" option, %s.'
 1082                 % RotBondsMode
 1083             )
 1084 
 1085         RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPatternSpecified)
 1086         if RotBondsPatternMol is None:
 1087             MiscUtil.PrintError(
 1088                 'Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, "%s", specified using "--rotBondsSMARTSPattern" option is not valid.'
 1089                 % (RotBondsSMARTSPatternSpecified)
 1090             )
 1091     else:
 1092         MiscUtil.PrintError(
 1093             'The value, %s, specified for option "-r, --rotBondsSMARTSMode" is not valid. ' % RotBondsMode
 1094         )
 1095 
 1096     OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern
 1097 
 1098 
 1099 def ProcessSDFieldLabelsOption():
 1100     """Process SD data field label option."""
 1101 
 1102     ParamsOptionName = "--outfileSDFieldLabels"
 1103     ParamsOptionValue = Options["--outfileSDFieldLabels"]
 1104 
 1105     ParamsIDsToLabels = {
 1106         "RotBondsCountLabel": "RotBondsCount",
 1107         "TorsionAlertsCountLabel": "TorsionAlertsCount (Green Orange Red)",
 1108         "TorsionAlertsLabel": "TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)",
 1109         "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)",
 1110         "TorsionRuleAlertsCountLabel": "TorsionRuleAlertsCount (Green Orange Red)",
 1111         "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)",
 1112         "TorsionRuleMaxAngleViolationLabel": "TorsionRuleMaxAngleViolation",
 1113     }
 1114 
 1115     if re.match("^auto$", ParamsOptionValue, re.I):
 1116         OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
 1117         return
 1118 
 1119     # Setup a canonical paramater names...
 1120     ValidParamNames = []
 1121     CanonicalParamNamesMap = {}
 1122     for ParamName in sorted(ParamsIDsToLabels):
 1123         ValidParamNames.append(ParamName)
 1124         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 1125 
 1126     ParamsOptionValue = ParamsOptionValue.strip()
 1127     if not ParamsOptionValue:
 1128         MiscUtil.PrintError('No valid parameter name and value pairs specified using "%s" option' % ParamsOptionName)
 1129 
 1130     ParamsOptionValueWords = ParamsOptionValue.split(",")
 1131     if len(ParamsOptionValueWords) % 2:
 1132         MiscUtil.PrintError(
 1133             'The number of comma delimited paramater names and values, %d, specified using "%s" option must be an even number.'
 1134             % (len(ParamsOptionValueWords), ParamsOptionName)
 1135         )
 1136 
 1137     # Validate paramater name and value pairs...
 1138     for Index in range(0, len(ParamsOptionValueWords), 2):
 1139         Name = ParamsOptionValueWords[Index].strip()
 1140         Value = ParamsOptionValueWords[Index + 1].strip()
 1141 
 1142         CanonicalName = Name.lower()
 1143         if CanonicalName not in CanonicalParamNamesMap:
 1144             MiscUtil.PrintError(
 1145                 'The parameter name, %s, specified using "%s" is not a valid name. Supported parameter names: %s'
 1146                 % (Name, ParamsOptionName, " ".join(ValidParamNames))
 1147             )
 1148 
 1149         ParamName = CanonicalParamNamesMap[CanonicalName]
 1150         ParamValue = Value
 1151 
 1152         # Set value...
 1153         ParamsIDsToLabels[ParamName] = ParamValue
 1154 
 1155     OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
 1156 
 1157 
 1158 def ProcessOptionNitrogenLonePairParameters():
 1159     """Process nitrogen lone pair parameters option."""
 1160 
 1161     ParamsOptionName = "--nitrogenLonePairParams"
 1162     ParamsOptionValue = Options["--nitrogenLonePairParams"]
 1163 
 1164     ParamsInfo = {
 1165         "AllowHydrogenNbrs": True,
 1166         "PlanarityTolerance": 1.0,
 1167     }
 1168 
 1169     if re.match("^auto$", ParamsOptionValue, re.I):
 1170         OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
 1171         return
 1172 
 1173     # Setup a canonical paramater names...
 1174     ValidParamNames = []
 1175     CanonicalParamNamesMap = {}
 1176     for ParamName in sorted(ParamsInfo):
 1177         ValidParamNames.append(ParamName)
 1178         CanonicalParamNamesMap[ParamName.lower()] = ParamName
 1179 
 1180     ParamsOptionValue = ParamsOptionValue.strip()
 1181     if not ParamsOptionValue:
 1182         MiscUtil.PrintError('No valid parameter name and value pairs specified using "%s" option' % ParamsOptionName)
 1183 
 1184     ParamsOptionValueWords = ParamsOptionValue.split(",")
 1185     if len(ParamsOptionValueWords) % 2:
 1186         MiscUtil.PrintError(
 1187             'The number of comma delimited paramater names and values, %d, specified using "%s" option must be an even number.'
 1188             % (len(ParamsOptionValueWords), ParamsOptionName)
 1189         )
 1190 
 1191     # Validate paramater name and value pairs...
 1192     for Index in range(0, len(ParamsOptionValueWords), 2):
 1193         Name = ParamsOptionValueWords[Index].strip()
 1194         Value = ParamsOptionValueWords[Index + 1].strip()
 1195 
 1196         CanonicalName = Name.lower()
 1197         if CanonicalName not in CanonicalParamNamesMap:
 1198             MiscUtil.PrintError(
 1199                 'The parameter name, %s, specified using "%s" is not a valid name. Supported parameter names: %s'
 1200                 % (Name, ParamsOptionName, " ".join(ValidParamNames))
 1201             )
 1202 
 1203         ParamName = CanonicalParamNamesMap[CanonicalName]
 1204         ParamValue = Value
 1205 
 1206         if re.match("^PlanarityTolerance$", ParamName, re.I):
 1207             Value = float(Value)
 1208             if Value < 0:
 1209                 MiscUtil.PrintError(
 1210                     'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: >= 0'
 1211                     % (Value, Name, ParamsOptionName)
 1212                 )
 1213             ParamValue = Value
 1214         elif re.match("^AllowHydrogenNbrs$", ParamName, re.I):
 1215             if not re.match("^(yes|no)$", Value, re.I):
 1216                 MiscUtil.PrintError(
 1217                     'The parameter value, %s, specified for parameter name, %s, using "%s" option is not a valid value. Supported values: yes or no'
 1218                     % (Value, Name, ParamsOptionName)
 1219                 )
 1220             ParamValue = True if re.match("^yes$", Value, re.I) else False
 1221 
 1222         # Set value...
 1223         ParamsInfo[ParamName] = ParamValue
 1224 
 1225     OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
 1226 
 1227 
 1228 def ProcessOptions():
 1229     """Process and validate command line arguments and options."""
 1230 
 1231     MiscUtil.PrintInfo("Processing options...")
 1232 
 1233     # Validate options...
 1234     ValidateOptions()
 1235 
 1236     OptionsInfo["Infile"] = Options["--infile"]
 1237     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1238     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters(
 1239         "--infileParams",
 1240         Options["--infileParams"],
 1241         InfileName=Options["--infile"],
 1242         ParamsDefaultInfo=ParamsDefaultInfoOverride,
 1243     )
 1244 
 1245     OptionsInfo["Outfile"] = Options["--outfile"]
 1246     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters(
 1247         "--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]
 1248     )
 1249 
 1250     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1251     OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 1252     OptionsInfo["OutfileFiltered"] = OutfileFiltered
 1253     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 1254 
 1255     OutfileSummary = "%s_AlertsSummary.csv" % (FileName)
 1256     OptionsInfo["OutfileSummary"] = OutfileSummary
 1257     OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", Options["--outfileSummary"], re.I) else False
 1258 
 1259     OptionsInfo["OutfilesFilteredByRulesMode"] = (
 1260         True if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I) else False
 1261     )
 1262     OptionsInfo["TrackAlertsSummaryInfo"] = (
 1263         True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False
 1264     )
 1265 
 1266     OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"]
 1267     if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I):
 1268         OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount)
 1269     OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount
 1270     OptionsInfo["OutfilesFilteredByRulesAllMode"] = (
 1271         True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False
 1272     )
 1273 
 1274     OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False
 1275 
 1276     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1277         if not re.match("^yes$", Options["--outfileAlerts"], re.I):
 1278             MiscUtil.PrintError(
 1279                 'The value "%s" specified for "--outfilesFilteredByRules" option is not valid. The specified value is only allowed during "yes" value of "--outfileAlerts" option.'
 1280                 % (Options["--outfilesFilteredByRules"])
 1281             )
 1282 
 1283     OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"]
 1284     OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False
 1285 
 1286     ProcessSDFieldLabelsOption()
 1287 
 1288     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1289     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 1290 
 1291     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1292     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1293 
 1294     ProcessOptionNitrogenLonePairParameters()
 1295 
 1296     OptionsInfo["AlertsMode"] = Options["--alertsMode"]
 1297     OptionsInfo["SpecifiedAlertsModeList"] = []
 1298     if re.match("^Red$", Options["--alertsMode"], re.I):
 1299         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1300     elif re.match("^RedAndOrange$", Options["--alertsMode"], re.I):
 1301         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1302         OptionsInfo["SpecifiedAlertsModeList"].append("Orange")
 1303 
 1304     OptionsInfo["MinAlertsCount"] = int(Options["--alertsMinCount"])
 1305 
 1306     OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"]
 1307     OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"]
 1308     ProcessRotatableBondsSMARTSMode()
 1309 
 1310     OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1311 
 1312     # Setup delimiter for writing out torsion alert information to output files...
 1313     OptionsInfo["IntraSetValuesDelim"] = ","
 1314     OptionsInfo["InterSetValuesDelim"] = " "
 1315 
 1316 
 1317 def RetrieveOptions():
 1318     """Retrieve command line arguments and options."""
 1319 
 1320     # Get options...
 1321     global Options
 1322     Options = docopt(_docoptUsage_)
 1323 
 1324     # Set current working directory to the specified directory...
 1325     WorkingDir = Options["--workingdir"]
 1326     if WorkingDir:
 1327         os.chdir(WorkingDir)
 1328 
 1329     # Handle examples option...
 1330     if "--examples" in Options and Options["--examples"]:
 1331         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1332         sys.exit(0)
 1333 
 1334 
 1335 def ProcessListTorsionLibraryOption():
 1336     """Process list torsion library information."""
 1337 
 1338     # Validate and process dataFile option for listing torsion library information...
 1339     OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1340     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1341         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1342 
 1343     # Instantiate TorsionLibraryAlerts using defaults...
 1344     TorsionLibraryAlertsHandle = TorsionLibraryAlerts(TorsionLibraryFilePath=OptionsInfo["TorsionLibraryFile"])
 1345     MiscUtil.PrintInfo(
 1346         "\nRetrieving data from torsion library file %s..." % TorsionLibraryAlertsHandle.GetTorsionLibraryFilePath()
 1347     )
 1348     TorsionLibraryAlertsHandle.ListTorsionLibraryInfo()
 1349 
 1350 
 1351 def ValidateOptions():
 1352     """Validate option values."""
 1353 
 1354     MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "Red RedAndOrange")
 1355     MiscUtil.ValidateOptionIntegerValue("--alertsMinCount", Options["--alertsMinCount"], {">=": 1})
 1356 
 1357     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1358     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 1359 
 1360     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1361     if re.match("^filter$", Options["--mode"], re.I):
 1362         MiscUtil.ValidateOptionsOutputFileOverwrite(
 1363             "-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]
 1364         )
 1365         MiscUtil.ValidateOptionsDistinctFileNames(
 1366             "-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]
 1367         )
 1368 
 1369     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 1370 
 1371     MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no")
 1372     if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I):
 1373         MiscUtil.ValidateOptionIntegerValue(
 1374             "--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0}
 1375         )
 1376 
 1377     MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no")
 1378     MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no")
 1379     MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly")
 1380 
 1381     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count")
 1382     if re.match("^filter$", Options["--mode"], re.I):
 1383         if not Options["--outfile"]:
 1384             MiscUtil.PrintError(
 1385                 'The outfile must be specified using "-o, --outfile" during "filter" value of "-m, --mode" option'
 1386             )
 1387 
 1388     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1389 
 1390     MiscUtil.ValidateOptionTextValue(
 1391         "-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify"
 1392     )
 1393     if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I):
 1394         if not Options["--rotBondsSMARTSPattern"]:
 1395             MiscUtil.PrintError(
 1396                 'The SMARTS pattern must be specified using "--rotBondsSMARTSPattern" during "Specify" value of "-r, --rotBondsSMARTS" option'
 1397             )
 1398 
 1399     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1400         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1401 
 1402 
 1403 # Setup a usage string for docopt...
 1404 _docoptUsage_ = """
 1405 RDKitFilterTorsionLibraryAlerts.py - Filter torsion library alerts
 1406 
 1407 Usage:
 1408     RDKitFilterTorsionLibraryAlerts.py  [--alertsMode <Red, RedAndOrange>] [--alertsMinCount <Number>]
 1409                                         [--infileParams <Name,Value,...>] [--mode <filter or count>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
 1410                                         [--nitrogenLonePairParams <Name,Value,...>] [--outfileAlerts <yes or no>]
 1411                                         [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>]
 1412                                         [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>]
 1413                                         [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>]
 1414                                         [--outfileParams <Name,Value,...>] [--overwrite] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>]
 1415                                         [--rotBondsSMARTSPattern <SMARTS>] [--torsionLibraryFile <FileName or auto>] [-w <dir>] -i <infile> -o <outfile>
 1416     RDKitFilterTorsionLibraryAlerts.py [--torsionLibraryFile <FileName or auto>] -l | --list
 1417     RDKitFilterTorsionLibraryAlerts.py -h | --help | -e | --examples
 1418 
 1419 Description:
 1420     Filter strained molecules from an input file for torsion library [ Ref 146, 152, 159 ]
 1421     alerts by matching rotatable bonds against SMARTS patterns specified for torsion
 1422     rules in a torsion library file and write out appropriate molecules to output
 1423     files. The molecules must have 3D coordinates in input file.
 1424     
 1425     The default torsion library file, TorsionLibrary.xml, is available under
 1426     MAYACHEMTOOLS/lib/python/TorsionAlerts directory.
 1427     
 1428     The data in torsion library file is organized in a hierarchical manner. It consists
 1429     of one generic class and six specific classes at the highest level. Each class
 1430     contains multiple subclasses corresponding to named functional groups or
 1431     substructure patterns. The subclasses consist of torsion rules sorted from
 1432     specific to generic torsion patterns. The torsion rule, in turn, contains a list
 1433     of peak values for torsion angles and two tolerance values. A pair of tolerance
 1434     values define torsion bins around a torsion peak value. For example:
 1435         
 1436         <library>
 1437             <hierarchyClass name="GG" id1="G" id2="G">
 1438             ...
 1439             </hierarchyClass>
 1440             <hierarchyClass name="CO" id1="C" id2="O">
 1441                 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]">
 1442                     <torsionRule smarts="[O:1]=[C:2]!@[O:3]~[CH0:4]">
 1443                         <angleList>
 1444                             <angle value="0.0" tolerance1="20.00"
 1445                             tolerance2="25.00" score="56.52"/>
 1446                         </angleList>
 1447                     </torsionRule>
 1448                     ...
 1449                 ...
 1450              ...
 1451             </hierarchyClass>
 1452             <hierarchyClass name="NC" id1="N" id2="C">
 1453              ...
 1454             </hierarchyClass>
 1455             <hierarchyClass name="SN" id1="S" id2="N">
 1456             ...
 1457             </hierarchyClass>
 1458             <hierarchyClass name="CS" id1="C" id2="S">
 1459             ...
 1460             </hierarchyClass>
 1461             <hierarchyClass name="CC" id1="C" id2="C">
 1462             ...
 1463             </hierarchyClass>
 1464             <hierarchyClass name="SS" id1="S" id2="S">
 1465              ...
 1466             </hierarchyClass>
 1467         </library>
 1468         
 1469     The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern.
 1470     A custom SMARTS pattern may be optionally specified to detect rotatable bonds.
 1471     Each rotatable bond is matched to a torsion rule in the torsion library and
 1472     assigned one of the following three alert categories: Green, Orange or Red. The 
 1473     rotatable bond is marked Green or Orange for the measured angle of the torsion
 1474     pattern within the first or second tolerance bins around a torsion peak.
 1475     Otherwise, it's marked Red implying that the measured angle is not observed in
 1476     the structure databases employed to generate the torsion library.
 1477 
 1478     The following output files are generated after the filtering:
 1479         
 1480         <OutfileRoot>.sdf
 1481         <OutfileRoot>_Filtered.sdf
 1482         <OutfileRoot>_AlertsSummary.csv
 1483         <OutfileRoot>_Filtered_TopRule*.sdf
 1484         
 1485     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 1486 
 1487     The supported output file formats are: SD (.sdf, .sd)
 1488 
 1489 Options:
 1490     -a, --alertsMode <Red, RedAndOrange>  [default: Red]
 1491         Torsion library alert types to use for filtering molecules containing
 1492         rotatable bonds marked with Green, Orange, or Red alerts. Possible
 1493         values: Red or RedAndOrange.
 1494     --alertsMinCount <Number>  [default: 1]
 1495         Minimum number of rotatable bond alerts in a molecule for filtering the
 1496         molecule.
 1497     -e, --examples
 1498         Print examples.
 1499     -h, --help
 1500         Print this help message.
 1501     -i, --infile <infile>
 1502         Input file name.
 1503     --infileParams <Name,Value,...>  [default: auto]
 1504         A comma delimited list of parameter name and value pairs for reading
 1505         molecules from files. The supported parameter names for different file
 1506         formats, along with their default values, are shown below:
 1507             
 1508             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1509             
 1510     -l, --list
 1511         List torsion library information without performing any filtering.
 1512     -m, --mode <filter or count>  [default: filter]
 1513         Specify whether to filter molecules for torsion library [ Ref 146, 152, 159 ] alerts
 1514         by matching rotatable bonds against SMARTS patterns specified for torsion
 1515         rules and write out the rest of the molecules to an outfile or simply count
 1516         the number of matched molecules marked for filtering.
 1517     --mp <yes or no>  [default: no]
 1518         Use multiprocessing.
 1519          
 1520         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1521         function employing lazy RDKit data iterable. This allows processing of
 1522         arbitrary large data sets without any additional requirements memory.
 1523         
 1524         All input data may be optionally loaded into memory by mp.Pool.map()
 1525         before starting worker processes in a process pool by setting the value
 1526         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1527         
 1528         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1529         data mode may adversely impact the performance. The '--mpParams' section
 1530         provides additional information to tune the value of 'chunkSize'.
 1531     --mpParams <Name,Value,...>  [default: auto]
 1532         A comma delimited list of parameter name and value pairs to configure
 1533         multiprocessing.
 1534         
 1535         The supported parameter names along with their default and possible
 1536         values are shown below:
 1537         
 1538             chunkSize, auto
 1539             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1540             numProcesses, auto   [ Default: mp.cpu_count() ]
 1541         
 1542         These parameters are used by the following functions to configure and
 1543         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1544         mp.Pool.imap().
 1545         
 1546         The chunkSize determines chunks of input data passed to each worker
 1547         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1548         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1549         
 1550         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1551         automatically converts RDKit data iterable into a list, loads all data into
 1552         memory, and calculates the default chunkSize using the following method
 1553         as shown in its code:
 1554         
 1555             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1556             if extra: chunkSize += 1
 1557         
 1558         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1559         and 100 data items.
 1560         
 1561         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1562         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1563         data into memory. Consequently, the size of input data is not known a priori.
 1564         It's not possible to estimate an optimal value for the chunkSize. The default 
 1565         chunkSize is set to 1.
 1566         
 1567         The default value for the chunkSize during 'Lazy' data mode may adversely
 1568         impact the performance due to the overhead associated with exchanging
 1569         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1570         a larger value during 'Lazy' input data mode, based on the size of your input
 1571         data and number of processes in the process pool.
 1572         
 1573         The mp.Pool.map() function waits for all worker processes to process all
 1574         the data and return the results. The mp.Pool.imap() function, however,
 1575         returns the the results obtained from worker processes as soon as the
 1576         results become available for specified chunks of data.
 1577         
 1578         The order of data in the results returned by both mp.Pool.map() and 
 1579         mp.Pool.imap() functions always corresponds to the input data.
 1580     -n, --nitrogenLonePairParams <Name,Value,...>  [default: auto]
 1581         A comma delimited list of parameter name and value pairs to match
 1582         torsion SMARTS patterns containing non-standard construct 'N_lp'
 1583         corresponding to nitrogen lone pair.
 1584         
 1585         The supported parameter names along with their default and possible
 1586         values are shown below:
 1587         
 1588             allowHydrogenNbrs, yes   [ Possible values: yes or no ]
 1589             planarityTolerance, 1  [Possible values: >=0] 
 1590             
 1591         These parameters are used during the matching of torsion rules containing
 1592         'N_lp' in their SMARTS patterns. The 'allowHydrogensNbrs' allows the use
 1593         hydrogen neighbors attached to nitrogen during the determination of its
 1594         planarity. The 'planarityTolerance' in degrees represents the tolerance
 1595         allowed for nitrogen to be considered coplanar with its three neighbors.
 1596         
 1597         The torsion rules containing 'N_lp' in their SMARTS patterns are categorized
 1598         into the following two types of rules:
 1599          
 1600             TypeOne:  
 1601             
 1602             [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4]
 1603             [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4]
 1604             ... ... ...
 1605          
 1606             TypeTwo:  
 1607             
 1608             [!#1:1][CX4:2]!@[NX3;"N_lp":3]
 1609             [C:1][$(S(=O)=O):2]!@["N_lp":3]
 1610             ... ... ...
 1611             
 1612         The torsions are matched to torsion rules containing 'N_lp' using specified
 1613         SMARTS patterns without the 'N_lp' along with additional constraints using
 1614         the following methodology:
 1615             
 1616             TypeOne:  
 1617             
 1618             . SMARTS pattern must contain four mapped atoms and the third
 1619                 mapped atom must be a nitrogen matched with 'NX3:3'
 1620             . Nitrogen atom must have 3 neighbors. The 'allowHydrogens'
 1621                 parameter controls inclusion of hydrogens as its neighbors.
 1622             . Nitrogen atom and its 3 neighbors must be coplanar.
 1623                 'planarityTolerance' parameter provides tolerance in degrees
 1624                 for nitrogen to be considered coplanar with its 3 neighbors.
 1625             
 1626             TypeTwo:  
 1627             
 1628             . SMARTS pattern must contain three mapped atoms and the third
 1629                 mapped atom must be a nitrogen matched with 'NX3:3'. The 
 1630                 third mapped atom may contain only 'N_lp:3' The missing 'NX3'
 1631                 is automatically detected.
 1632             . Nitrogen atom must have 3 neighbors. 'allowHydrogens'
 1633                 parameter controls inclusion of hydrogens as neighbors.
 1634             . Nitrogen atom and its 3 neighbors must not be coplanar.
 1635                 'planarityTolerance' parameter provides tolerance in degrees
 1636                 for nitrogen to be considered coplanar with its 3 neighbors.
 1637             . Nitrogen lone pair position equivalent to VSEPR theory is
 1638                 determined based on the position of nitrogen and its neighbors.
 1639                 A vector normal to 3 nitrogen neighbors is calculated and added
 1640                 to the coordinates of nitrogen atom to determine the approximate
 1641                 position of the lone pair. It is used as the fourth position to
 1642                 calculate the torsion angle.
 1643             
 1644     -o, --outfile <outfile>
 1645         Output file name.
 1646     --outfileAlerts <yes or no>  [default: yes]
 1647         Write out alerts information to SD output files.
 1648     --outfileAlertsMode <All or AlertsOnly>  [default: AlertsOnly]
 1649         Write alerts information to SD output files for all alerts or only for alerts
 1650         specified by '--AlertsMode' option. Possible values: All or AlertsOnly
 1651         This option is only valid for 'Yes' value of '--outfileAlerts' option.
 1652         
 1653         The following alerts information is added to SD output files using
 1654         'TorsionAlerts' data field:
 1655             
 1656             RotBondIndices TorsionAlert TorsionIndices TorsionAngle
 1657             TorsionAngleViolation HierarchyClass HierarchySubClass
 1658             TorsionRule TorsionPeaks Tolerances1 Tolerances2
 1659             
 1660         The 'RotBondsCount' and 'TorsionAlertsCount' data fields are always added
 1661         to SD output files containing both remaining and filtered molecules.
 1662         
 1663         Format:
 1664             
 1665             > <RotBondsCount>
 1666             Number
 1667             
 1668             > <TorsionAlertsCount (Green Orange Red)>
 1669             Number Number Number
 1670             
 1671             > <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 1672                 TorsionAngle TorsionAngleViolation HierarchyClass
 1673                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 1674                 TorsionRule)>
 1675             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 1676             AtomIndex4 Angle AngleViolation ClassName SubClassName
 1677             CommaDelimPeakValues CommaDelimTol1Values CommDelimTol2Values
 1678             SMARTS ... ... ...
 1679              ... ... ...
 1680             
 1681         A set of 11 values is written out as value of 'TorsionAlerts' data field for
 1682         each torsion in a molecule. The space character is used as a delimiter
 1683         to separate values with in a set and across set. The comma character
 1684         is used to delimit multiple values for each value in a set.
 1685         
 1686         The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited
 1687         values representing atom indices for a rotatable bond and matched torsion.
 1688         The 'TorsionPeaks',  'Tolerances1', and 'Tolerances2' contain same number
 1689         of comma delimited values corresponding to  torsion angle peaks and
 1690         tolerance intervals specified in torsion library. For example:
 1691             
 1692             ... ... ...
 1693             >  <RotBondsCount>  (1) 
 1694             7
 1695             
 1696             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 1697             3 2 2
 1698             
 1699             >  <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 1700                 TorsionAngle TorsionAngleViolation HierarchyClass
 1701                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 1702                 TorsionRule)>
 1703             1,2 Red 32,2,1,0 0.13 149.87 NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]
 1704             ([cH,nX2H0])!@[NX3H1:3][CX4:4] 8,9 Red 10,9,8,28 -0.85 GG
 1705             None -90.0,90.0 30.0,30.0 60.0,60.0 [cH1:1][a:2]([cH1])!@[a:3]
 1706             ([cH0])[cH0:4]
 1707             ... ... ...
 1708             
 1709     --outfileFiltered <yes or no>  [default: yes]
 1710         Write out a file containing filtered molecules. Its name is automatically
 1711         generated from the specified output file. Default: <OutfileRoot>_
 1712         Filtered.<OutfileExt>.
 1713     --outfilesFilteredByRules <yes or no>  [default: yes]
 1714         Write out SD files containing filtered molecules for individual torsion
 1715         rules triggering alerts in molecules. The name of SD files are automatically
 1716         generated from the specified output file. Default file names: <OutfileRoot>_
 1717         Filtered_TopRule*.sdf
 1718                 
 1719         The following alerts information is added to SD output files:
 1720             
 1721             > <RotBondsCount>
 1722             Number
 1723             
 1724             >  <TorsionAlertsCount (Green Orange Red)> 
 1725             Number Number Number
 1726             
 1727             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 1728                 Tolerances1 Tolerances2 TorsionRule)> 
 1729             ClassName SubClassName CommaDelimPeakValues CommaDelimTol1Values
 1730             CommDelimTol2Values SMARTS ... ... ...
 1731              ... ... ...
 1732             
 1733             > <TorsionRuleAlertsCount (Green Orange Red)>
 1734             Number Number Number
 1735             
 1736             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 1737                 TorsionAngle TorsionAngleViolation)>
 1738             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 1739             AtomIndex4 Angle AngleViolation ... ... ...
 1740             
 1741             >  <TorsionRuleMaxAngleViolation>
 1742             Number
 1743              ... ... ...
 1744             
 1745         For example:
 1746             
 1747             ... ... ...
 1748             >  <RotBondsCount>  (1) 
 1749             7
 1750              
 1751             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 1752             3 2 2
 1753             
 1754             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 1755                 Tolerances1 Tolerances2 TorsionRule)>  (1) 
 1756             NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]([cH,nX2H0])!@[NX3H1:3][CX4:4]
 1757             
 1758             >  <TorsionRuleAlertsCount (Green Orange Red)>  (1) 
 1759             0 0 1
 1760             
 1761             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 1762                 TorsionAngle TorsionAngleViolation)>  (1) 
 1763             1,2 Red 32,2,1,0 0.13 149.87
 1764             
 1765             >  <TorsionRuleMaxAngleViolation>  (1) 
 1766             149.87
 1767             ... ... ...
 1768             
 1769     --outfilesFilteredByRulesMaxCount <All or number>  [default: 10]
 1770         Write out SD files containing filtered molecules for specified number of
 1771         top N torsion rules triggering alerts for the largest number of molecules
 1772         or for all torsion rules triggering alerts across all molecules.
 1773     --outfileSummary <yes or no>  [default: yes] 
 1774         Write out a CVS text file containing summary of torsions rules responsible
 1775         for triggering torsion alerts. Its name is automatically generated from the
 1776         specified output file. Default: <OutfileRoot>_AlertsSummary.csv.
 1777         
 1778         The following alerts information is written to summary text file:
 1779             
 1780             TorsionRule, TorsionPeaks, Tolerances1, Tolerances2,
 1781             HierarchyClass, HierarchySubClass, TorsionAlertType,
 1782             TorsionAlertCount, TorsionAlertMolCount
 1783              
 1784         The double quotes characters are removed from SMART patterns before
 1785         before writing them to a CSV file. In addition, the torsion rules are sorted by
 1786         TorsionAlertMolCount. For example:
 1787             
 1788             "TorsionRule","TorsionPeaks","Tolerances1","Tolerances2",
 1789                 "HierarchyClass","HierarchySubClass","TorsionAlertTypes",
 1790                 "TorsionAlertCount","TorsionAlertMolCount"
 1791             "[!#1:1][CX4H2:2]!@[CX4H2:3][!#1:4]","-60.0,60.0,180.0",
 1792                 "20.0,20.0,20.0","30.0,30.0,30.0","CC","None/[CX4:2][CX4:3]",
 1793                 "Red","16","11"
 1794             ... ... ...
 1795             
 1796     --outfileSDFieldLabels <Type,Label,...>  [default: auto]
 1797         A comma delimited list of SD data field type and label value pairs for writing
 1798         torsion alerts information along with molecules to SD files.
 1799         
 1800         The supported SD data field label type along with their default values are
 1801         shown below:
 1802             
 1803             For all SD files:
 1804             
 1805             RotBondsCountLabel, RotBondsCount
 1806             TorsionAlertsCountLabel, TorsionAlertsCount (Green Orange Red)
 1807             TorsionAlertsLabel, TorsionAlerts (RotBondIndices TorsionAlert
 1808                 TorsionIndices TorsionAngle TorsionAngleViolation
 1809                 HierarchyClass HierarchySubClass TorsionPeaks Tolerances1
 1810                 Tolerances2 TorsionRule)
 1811             
 1812             For individual SD files filtered by torsion rules:
 1813             
 1814             TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass
 1815                 TorsionPeaks Tolerances1 Tolerances2 TorsionRule)
 1816             TorsionRuleAlertsCountLabel, TorsionRuleAlertsCount (Green Orange
 1817                 Red)
 1818             TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices
 1819                 TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)
 1820             TorsionRuleMaxAngleViolationLabel, TorsionRuleMaxAngleViolation
 1821             
 1822     --outfileParams <Name,Value,...>  [default: auto]
 1823         A comma delimited list of parameter name and value pairs for writing
 1824         molecules to files. The supported parameter names for different file
 1825         formats, along with their default values, are shown below:
 1826             
 1827             SD: kekulize,yes,forceV3000,no
 1828             
 1829     --overwrite
 1830         Overwrite existing files.
 1831     -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...>  [default: SemiStrict]
 1832         SMARTS pattern to use for identifying rotatable bonds in a molecule
 1833         for matching against torsion rules in the torsion library. Possible values:
 1834         NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches
 1835         are filtered to ensure that each atom in the rotatable bond is attached to
 1836         at least two heavy atoms.
 1837         
 1838         The following SMARTS patterns are used to identify rotatable bonds for
 1839         different modes:
 1840             
 1841             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 1842             
 1843             SemiStrict:
 1844             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1845             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1846             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1847             
 1848             Strict:
 1849             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1850             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 1851             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 1852             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1853             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1854             
 1855         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 1856         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 1857          specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 1858          derived from 'Strict' SMARTS patterns for its usage in this script.
 1859         
 1860         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 1861         choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its
 1862         value via '--rotBondsSMARTSPattern' option.
 1863     --rotBondsSMARTSPattern <SMARTS>
 1864         SMARTS pattern for identifying rotatable bonds. This option is only valid
 1865         for 'Specify' value of '-r, --rotBondsSMARTSMode' option.
 1866     -t, --torsionLibraryFile <FileName or auto>  [default: auto]
 1867         Specify a XML file name containing data for torsion library hierarchy
 1868         or use default file, TorsionLibrary.xml, available in
 1869         MAYACHEMTOOLS/lib/Python/TorsionAlerts directory.
 1870         
 1871         The format of data in local XML file must match format of the data in Torsion
 1872         Library [ Ref 146, 152, 159 ] file available in MAYACHEMTOOLS directory.
 1873     -w, --workingdir <dir>
 1874         Location of working directory which defaults to the current directory.
 1875 
 1876 Examples:
 1877     To filter molecules containing any rotatable bonds marked with Red alerts
 1878     based on torsion rules in the torsion library and write out SD files containing
 1879     remaining and filtered molecules, and individual SD files for torsion rules
 1880     triggering alerts along with appropriate torsion information for red alerts,
 1881     type:
 1882 
 1883         % RDKitFilterTorsionLibraryAlerts.py -i Sample3D.sdf -o Sample3DOut.sdf
 1884 
 1885     To run the first example for only counting number of alerts without writing
 1886     out any SD files, type:
 1887 
 1888         % RDKitFilterTorsionLibraryAlerts.py -m count -i Sample3D.sdf -o
 1889           Sample3DOut.sdf
 1890     
 1891     To run the first example for filtertering molecules marked with Orange or
 1892     Red alerts and write out SD files, tye:
 1893 
 1894         % RDKitFilterTorsionLibraryAlerts.py -m Filter --alertsMode RedAndOrange
 1895           -i Sample3D.sdf -o Sample3DOut.sdf
 1896     
 1897     To run the first example for filtering molecules and writing out torsion
 1898     information for all alert types to SD files, type:
 1899 
 1900         % RDKitFilterTorsionLibraryAlerts.py --outfileAlertsMode All
 1901           -i Sample3D.sdf -o Sample3DOut.sdf
 1902 
 1903     To run the first example for filtering molecules in multiprocessing mode on
 1904     all available CPUs without loading all data into memory and write out SD files,
 1905     type:
 1906 
 1907         % RDKitFilterTorsionLibraryAlerts.py --mp yes -i Sample3D.sdf
 1908          -o Sample3DOut.sdf
 1909 
 1910     To run the first example for filtering molecules in multiprocessing mode on
 1911     all available CPUs by loading all data into memory and write out a SD files,
 1912     type:
 1913 
 1914         % RDKitFilterTorsionLibraryAlerts.py  --mp yes --mpParams
 1915           "inputDataMode, InMemory" -i Sample3D.sdf  -o Sample3DOut.sdf
 1916 
 1917     To run the first example for filtering molecules in multiprocessing mode on
 1918     specific number of CPUs and chunksize without loading all data into memory
 1919     and write out SD files, type:
 1920 
 1921         % RDKitFilterTorsionLibraryAlerts.py --mp yes --mpParams
 1922           "inputDataMode,lazy,numProcesses,4,chunkSize,8"  -i Sample3D.sdf
 1923           -o Sample3DOut.sdf
 1924 
 1925     To list information about default torsion library file without performing any
 1926     filtering, type:
 1927 
 1928         % RDKitFilterTorsionLibraryAlerts.py -l
 1929 
 1930     To list information about a local torsion library XML file without performing
 1931     any, filtering, type:
 1932 
 1933         % RDKitFilterTorsionLibraryAlerts.py --torsionLibraryFile
 1934           TorsionLibrary.xml -l
 1935 
 1936 Author:
 1937     Manish Sud (msud@san.rr.com)
 1938 
 1939 Collaborator:
 1940     Pat Walters
 1941 
 1942 Acknowledgments:
 1943     Wolfgang Guba, Patrick Penner, and Levi Pierce
 1944 
 1945 See also:
 1946     RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionStrainEnergyAlerts.py,
 1947     RDKitConvertFileFormat.py, RDKitSearchSMARTS.py
 1948 
 1949 Copyright:
 1950     Copyright (C) 2026 Manish Sud. All rights reserved.
 1951 
 1952     This script uses the Torsion Library jointly developed by the University
 1953     of Hamburg, Center for Bioinformatics, Hamburg, Germany and
 1954     F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
 1955 
 1956     The functionality available in this script is implemented using RDKit, an
 1957     open source toolkit for cheminformatics developed by Greg Landrum.
 1958 
 1959     This file is part of MayaChemTools.
 1960 
 1961     MayaChemTools is free software; you can redistribute it and/or modify it under
 1962     the terms of the GNU Lesser General Public License as published by the Free
 1963     Software Foundation; either version 3 of the License, or (at your option) any
 1964     later version.
 1965 
 1966 """
 1967 
 1968 if __name__ == "__main__":
 1969     main()