MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitFilterTorsionLibraryAlerts.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Collaborator: Pat Walters
    7 #
    8 # Acknowledgments: Wolfgang Guba, Patrick Penner, and Levi Pierce
    9 #
   10 # Copyright (C) 2024 Manish Sud. All rights reserved.
   11 #
   12 # This script uses the Torsion Library jointly developed by the University
   13 # of Hamburg, Center for Bioinformatics, Hamburg, Germany and
   14 # F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
   15 #
   16 # The functionality available in this script is implemented using RDKit, an
   17 # open source toolkit for cheminformatics developed by Greg Landrum.
   18 #
   19 # This file is part of MayaChemTools.
   20 #
   21 # MayaChemTools is free software; you can redistribute it and/or modify it under
   22 # the terms of the GNU Lesser General Public License as published by the Free
   23 # Software Foundation; either version 3 of the License, or (at your option) any
   24 # later version.
   25 #
   26 # MayaChemTools is distributed in the hope that it will be useful, but without
   27 # any warranty; without even the implied warranty of merchantability of fitness
   28 # for a particular purpose.  See the GNU Lesser General Public License for more
   29 # details.
   30 #
   31 # You should have received a copy of the GNU Lesser General Public License
   32 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   33 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   34 # Boston, MA, 02111-1307, USA.
   35 #
   36 
   37 from __future__ import print_function
   38 
   39 # Add local python path to the global path and import standard library modules...
   40 import os
   41 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   42 import time
   43 import re
   44 import glob
   45 import multiprocessing as mp
   46 
   47 # RDKit imports...
   48 try:
   49     from rdkit import rdBase
   50     from rdkit import Chem
   51     from rdkit.Chem import rdMolTransforms
   52 except ImportError as ErrMsg:
   53     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   54     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   55     sys.exit(1)
   56 
   57 # MayaChemTools imports...
   58 try:
   59     from docopt import docopt
   60     import MiscUtil
   61     import RDKitUtil
   62     from TorsionAlerts.TorsionLibraryAlerts import TorsionLibraryAlerts
   63 except ImportError as ErrMsg:
   64     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   65     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   66     sys.exit(1)
   67 
   68 ScriptName = os.path.basename(sys.argv[0])
   69 Options = {}
   70 OptionsInfo = {}
   71 
   72 def main():
   73     """Start execution of the script."""
   74     
   75     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   76     
   77     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   78     
   79     # Retrieve command line arguments and options...
   80     RetrieveOptions()
   81     
   82     if  Options["--list"]:
   83         # Handle listing of torsion library information...
   84         ProcessListTorsionLibraryOption()
   85     else:
   86         # Process and validate command line arguments and options...
   87         ProcessOptions()
   88         
   89         # Perform actions required by the script...
   90         PerformFiltering()
   91     
   92     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   93     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   94 
   95 def PerformFiltering():
   96     """Filter molecules using SMARTS torsion rules in the torsion library file."""
   97 
   98     # Setup a molecule reader...
   99     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  100     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  101     
  102     MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols)
  103 
  104     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  105     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  106     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  107     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount))
  108 
  109     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
  110     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
  111 
  112 def ProcessMolecules(Mols):
  113     """Process and filter molecules."""
  114     
  115     if OptionsInfo["MPMode"]:
  116         return ProcessMoleculesUsingMultipleProcesses(Mols)
  117     else:
  118         return ProcessMoleculesUsingSingleProcess(Mols)
  119 
  120 def ProcessMoleculesUsingSingleProcess(Mols):
  121     """Process and filter molecules using a single process."""
  122     
  123     # Instantiate torsion library alerts class...
  124     TorsionLibraryAlertsHandle = InstantiateTorsionLibraryAlertsClass()
  125     
  126     MiscUtil.PrintInfo("\nFiltering molecules...")
  127     
  128     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  129 
  130     # Set up writers...
  131     OutfilesWriters = SetupOutfilesWriters()
  132     
  133     WriterRemaining = OutfilesWriters["WriterRemaining"]
  134     WriterFiltered = OutfilesWriters["WriterFiltered"]
  135     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  136     
  137     # Initialize alerts summary info...
  138     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  139 
  140     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  141     for Mol in Mols:
  142         MolCount += 1
  143         
  144         if Mol is None:
  145             continue
  146         
  147         if RDKitUtil.IsMolEmpty(Mol):
  148             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount))
  149             continue
  150 
  151         # Check for 3D flag...
  152         if not Mol.GetConformer().Is3D():
  153             MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount))
  154             continue
  155         
  156         ValidMolCount += 1
  157         
  158         # Identify torsion library alerts for rotatable bonds..
  159         RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionLibraryAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  160         
  161         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  162         
  163         # Write out filtered and remaining molecules...
  164         WriteStatus = True
  165         if RotBondsAlertsStatus:
  166             if OutfileFilteredMode:
  167                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  168                 if WriteStatus:
  169                     FilteredMolWriteCount += 1
  170         else:
  171             RemainingMolCount += 1
  172             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  173         
  174         if not WriteStatus:
  175             WriteFailedCount += 1
  176 
  177     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  178     CloseOutfilesWriters(OutfilesWriters)
  179 
  180     if FilteredMolWriteCount:
  181         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  182     
  183     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  184 
  185 def ProcessMoleculesUsingMultipleProcesses(Mols):
  186     """Process and filter molecules using multiprocessing."""
  187 
  188     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
  189     
  190     MPParams = OptionsInfo["MPParams"]
  191     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  192     
  193     # Instantiate torsion strain energy alerts class to list torsion library information...
  194     TorsionLibraryAlertsHandle = InstantiateTorsionLibraryAlertsClass()
  195     
  196     # Set up writers...
  197     OutfilesWriters = SetupOutfilesWriters()
  198     
  199     WriterRemaining = OutfilesWriters["WriterRemaining"]
  200     WriterFiltered = OutfilesWriters["WriterFiltered"]
  201     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  202     
  203     # Initialize alerts summary info...
  204     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  205     
  206     # Setup data for initializing a worker process...
  207     MiscUtil.PrintInfo("Encoding options info and rotatable bond pattern molecule...")
  208     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  209 
  210     # Setup a encoded mols data iterable for a worker process...
  211     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  212     
  213     # Setup process pool along with data initialization for each process...
  214     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  215     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  216     
  217     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  218     
  219     # Start processing...
  220     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  221         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  222     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  223         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  224     else:
  225         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  226     
  227     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  228     for Result in Results:
  229         MolCount += 1
  230         MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result
  231         
  232         if EncodedMol is None:
  233             continue
  234         ValidMolCount += 1
  235         
  236         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  237         
  238         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  239         
  240         # Write out filtered and remaining molecules...
  241         WriteStatus = True
  242         if RotBondsAlertsStatus:
  243             if OutfileFilteredMode:
  244                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  245                 if WriteStatus:
  246                     FilteredMolWriteCount += 1
  247         else:
  248             RemainingMolCount += 1
  249             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  250         
  251         if not WriteStatus:
  252             WriteFailedCount += 1
  253     
  254     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  255     CloseOutfilesWriters(OutfilesWriters)
  256     
  257     if FilteredMolWriteCount:
  258         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  259     
  260     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  261 
  262 def InitializeWorkerProcess(*EncodedArgs):
  263     """Initialize data for a worker process."""
  264     
  265     global Options, OptionsInfo
  266 
  267     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  268 
  269     # Decode Options and OptionInfo...
  270     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  271     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  272 
  273     # Instantiate torsion slibrary alerts class...
  274     OptionsInfo["TorsionLibraryAlertsHandle"] = InstantiateTorsionLibraryAlertsClass(Quiet = True)
  275 
  276 def WorkerProcess(EncodedMolInfo):
  277     """Process data for a worker process."""
  278 
  279     MolIndex, EncodedMol = EncodedMolInfo
  280     
  281     if EncodedMol is None:
  282         return [MolIndex, None, False, None]
  283     
  284     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  285     if RDKitUtil.IsMolEmpty(Mol):
  286         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  287         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  288         return [MolIndex, None, False, None]
  289         
  290     # Check for 3D flag...
  291     if not Mol.GetConformer().Is3D():
  292         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  293         MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName)
  294         return [MolIndex, None, False, None]
  295     
  296     # Identify torsion library alerts for rotatable bonds..
  297     TorsionTorsionLibraryAlertsHandle = OptionsInfo["TorsionLibraryAlertsHandle"]
  298     RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionTorsionLibraryAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  299 
  300     return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo]
  301 
  302 def InitializeTorsionAlertsSummaryInfo():
  303     """Initialize torsion alerts summary."""
  304 
  305     if OptionsInfo["CountMode"]:
  306         return None
  307     
  308     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  309         return None
  310     
  311     TorsionAlertsSummaryInfo = {}
  312     TorsionAlertsSummaryInfo["RuleIDs"] = []
  313 
  314     for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AlertTypesMolCount"]:
  315         TorsionAlertsSummaryInfo[DataLabel] = {}
  316         
  317     return TorsionAlertsSummaryInfo
  318 
  319 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo):
  320     """Track torsion alerts summary information for matched torsion rules in a
  321     molecule."""
  322     
  323     if OptionsInfo["CountMode"]:
  324         return
  325     
  326     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  327         return
  328 
  329     if RotBondsAlertsInfo is None:
  330         return
  331 
  332     MolAlertsInfo = {}
  333     MolAlertsInfo["RuleIDs"] = []
  334     MolAlertsInfo["AlertTypes"] = {}
  335     
  336     for ID in RotBondsAlertsInfo["IDs"]:
  337         if not RotBondsAlertsInfo["MatchStatus"][ID]:
  338             continue
  339 
  340         if OptionsInfo["OutfileAlertsOnly"]:
  341             if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
  342                 continue
  343         
  344         AlertType = RotBondsAlertsInfo["AlertTypes"][ID]
  345         TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID]
  346         TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]
  347 
  348         # Track data for torsion alert summary information across molecules...
  349         if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]:
  350             TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID)
  351             TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID
  352             
  353             TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS
  354             TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassNames"][ID]
  355             TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassNames"][ID]
  356 
  357             TorsionAlertsSummaryInfo["TorsionRulePeaks"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRulePeaks"][ID]
  358             TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]
  359             TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]
  360             
  361             # Initialize number of alert types across all molecules...
  362             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID] = {}
  363             
  364             # Initialize number of molecules flagged by each alert type...
  365             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID] = {}
  366         
  367         if AlertType not in TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID]:
  368             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
  369             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] = 0
  370         
  371         TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
  372 
  373         # Track data for torsion alert information in a molecule...
  374         if TorsionRuleNodeID not in MolAlertsInfo["AlertTypes"]:
  375             MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  376             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID] = {}
  377         
  378         if AlertType not in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
  379             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
  380         MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
  381 
  382     # Track number of molecules flagged by a specific torsion alert...
  383     for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]:
  384         for AlertType in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
  385             if MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType]:
  386                 TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] += 1
  387 
  388 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo):
  389     """Write out torsion alerts summary informatio to a CSV file."""
  390     
  391     if OptionsInfo["CountMode"]:
  392         return
  393     
  394     if not OptionsInfo["OutfileSummaryMode"]:
  395         return
  396 
  397     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  398         return
  399     
  400     # Write headers...
  401     QuoteValues = True
  402     Values = ["TorsionRule", "TorsionPeaks", "Tolerances1", "Tolerances2", "HierarchyClass", "HierarchySubClass", "TorsionAlertTypes", "TorsionAlertCount", "TorsionAlertMolCount"]
  403     Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  404 
  405     SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  406 
  407     # Write alerts information...
  408     for ID in SortedRuleIDs:
  409         # Remove any double quotes in SMARTS...
  410         RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID]
  411         RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I)
  412         
  413         HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID]
  414         HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID]
  415 
  416         TorsionPeaks = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRulePeaks"][ID]], ",")
  417         TorsionRuleTolerances1 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][ID]], ",")
  418         TorsionRuleTolerances2 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][ID]], ",")
  419         
  420         AlertTypes = []
  421         AlertTypeCount = []
  422         AlertTypeMolCount = []
  423         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
  424             AlertTypes.append(AlertType)
  425             AlertTypeCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypes"][ID][AlertType])
  426             AlertTypeMolCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType])
  427         
  428         Values = [RuleSMARTS, TorsionPeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, HierarchyClassName, HierarchySubClassName, "%s" % MiscUtil.JoinWords(AlertTypes, ","), "%s" % (MiscUtil.JoinWords(AlertTypeCount, ",")), "%s" % (MiscUtil.JoinWords(AlertTypeMolCount, ","))]
  429         Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  430 
  431 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo):
  432     """Sort torsion rule IDs by  alert types molecule count in descending order."""
  433 
  434     SortedRuleIDs = []
  435     
  436     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  437     if len(RuleIDs) == 0:
  438         return SortedRuleIDs
  439     
  440     # Setup a map from AlertTypesMolCount to IDs for sorting alerts...
  441     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  442     MolCountMap = {}
  443     for ID in RuleIDs:
  444         MolCount = 0
  445         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
  446             MolCount += TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType]
  447         MolCountMap[ID] = MolCount
  448 
  449     SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True)
  450     
  451     return SortedRuleIDs
  452 
  453 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo):
  454     """Write out torsion alerts SD files for individual torsion rules."""
  455     
  456     if OptionsInfo["CountMode"]:
  457         return
  458     
  459     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  460         return
  461 
  462     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  463         return
  464 
  465     # Setup a molecule reader for filtered molecules...
  466     FilteredMols  = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"])
  467 
  468     # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 
  469     TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo)
  470 
  471     # Setup writers...
  472     ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs)
  473     
  474     for Mol in FilteredMols:
  475         # Retrieve torsion alerts info...
  476         TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo)
  477         if TorsionAlertsInfo is None:
  478             continue
  479         
  480         for TorsionRuleID in TorsionRuleIDs:
  481             if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]:
  482                 continue
  483             
  484             WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
  485         
  486     CloseByRuleOutfilesWriters(ByRuleOutfilesWriters)
  487     
  488 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo):
  489     """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules."""
  490     
  491     # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to
  492     # the least number of unique molecules...
  493     RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  494 
  495     # Select torsion rule IDs for writing out SD files...
  496     if not OptionsInfo["OutfilesFilteredByRulesAllMode"]:
  497         MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"]
  498         if MaxRuleIDs < len(RuleIDs):
  499             RuleIDs = RuleIDs[0:MaxRuleIDs]
  500     
  501     return RuleIDs
  502 
  503 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo):
  504     """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds."""
  505     
  506     TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]
  507     TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None
  508     
  509     if TorsionAlerts is None or len(TorsionAlerts) == 0:
  510         return None
  511 
  512     # Initialize for tracking by rule IDs...
  513     TorsionAlertsInfo = {}
  514     TorsionAlertsInfo["RuleIDs"] = []
  515     
  516     for DataLabel in ["RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AtomIndices", "TorsionAtomIndices", "TorsionAngles", "TorsionAngleViolations", "AlertTypesCount"]:
  517         TorsionAlertsInfo[DataLabel] = {}
  518         
  519     ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"]
  520     TorsionAlertsSetSize = 11
  521     
  522     TorsionAlertsWords = TorsionAlerts.split()
  523     if len(TorsionAlertsWords) % TorsionAlertsSetSize:
  524         MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize))
  525 
  526     ID = 0
  527     for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize):
  528         ID += 1
  529         
  530         RotBondIndices, TorsionAlertType, TorsionIndices, TorsionAngle, TorsionAngleViolation, HierarchyClass, HierarchySubClass, TorsionPeaks, Tolerances1, Tolerances2, TorsionRule = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize]
  531         RotBondIndices = RotBondIndices.split(ValuesDelimiter)
  532         TorsionIndices = TorsionIndices.split(ValuesDelimiter)
  533         TorsionPeaks = TorsionPeaks.split(ValuesDelimiter)
  534         Tolerances1 = Tolerances1.split(ValuesDelimiter)
  535         Tolerances2 = Tolerances2.split(ValuesDelimiter)
  536 
  537         if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]:
  538             MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule)
  539             continue
  540         TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule]
  541 
  542         # Track data for torsion alerts in a molecule...
  543         if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]:
  544             TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  545 
  546             TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule
  547             TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass
  548             TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass
  549             TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleNodeID] = TorsionPeaks
  550             TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = Tolerances1
  551             TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = Tolerances2
  552             
  553             TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID] = []
  554             TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = []
  555             TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = []
  556             TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID] = []
  557             TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID] = []
  558 
  559             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID] = {}
  560             
  561         # Track multiple values for a rule ID...
  562         TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID].append(TorsionAlertType)
  563         TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices)
  564         TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices)
  565         TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID].append(TorsionAngle)
  566         TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID].append(TorsionAngleViolation)
  567         
  568         # Count alert type for a rule ID...
  569         if TorsionAlertType not in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID]:
  570             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] = 0
  571         TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] += 1
  572         
  573     return TorsionAlertsInfo
  574     
  575 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo):
  576     """Write out molecule."""
  577     
  578     if OptionsInfo["CountMode"]:
  579         return True
  580 
  581     SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo)
  582 
  583     try:
  584         Writer.write(Mol)
  585     except Exception as ErrMsg:
  586         MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg))
  587         return False
  588     
  589     return True
  590 
  591 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo):
  592     """Setup molecule properties containing alerts information for rotatable bonds."""
  593 
  594     if not OptionsInfo["OutfileAlerts"]:
  595         return
  596     
  597     # Setup rotatable bonds count...
  598     RotBondsCount = 0
  599     if RotBondsAlertsInfo is not None:
  600         RotBondsCount =  len(RotBondsAlertsInfo["IDs"])
  601     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["RotBondsCountLabel"],  "%s" % RotBondsCount)
  602     
  603     # Setup alert counts for rotatable bonds...
  604     AlertsCount = []
  605     if RotBondsAlertsInfo is not None:
  606         for AlertType in ["Green", "Orange", "Red"]:
  607             if AlertType in RotBondsAlertsInfo["Count"]:
  608                 AlertsCount.append("%s" % RotBondsAlertsInfo["Count"][AlertType])
  609             else:
  610                 AlertsCount.append("0")
  611     
  612     if len(AlertsCount):
  613         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsCountLabel"],  "%s" % MiscUtil.JoinWords(AlertsCount, " "))
  614 
  615     # Setup alert information for rotatable bonds...
  616     AlertsInfoValues = []
  617 
  618     # Delimiter for multiple values corresponding to specific set of information for
  619     # a rotatable bond. For example: TorsionAtomIndices
  620     ValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  621 
  622     # Delimiter for various values for a rotatable bond...
  623     RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"]
  624     
  625     # Delimiter for values corresponding to multiple rotatable bonds...
  626     AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"]
  627     
  628     if RotBondsAlertsInfo is not None:
  629         for ID in RotBondsAlertsInfo["IDs"]:
  630             if not RotBondsAlertsInfo["MatchStatus"][ID]:
  631                 continue
  632             
  633             if OptionsInfo["OutfileAlertsOnly"]:
  634                 if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
  635                     continue
  636             
  637             RotBondValues = []
  638             
  639             # Bond atom indices...
  640             Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]]
  641             RotBondValues.append(ValuesDelim.join(Values))
  642 
  643             # Alert type...
  644             RotBondValues.append(RotBondsAlertsInfo["AlertTypes"][ID])
  645 
  646             # Torsion atom indices...
  647             TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim)
  648             RotBondValues.append(TorsionAtomIndices)
  649 
  650             # Torsion angle...
  651             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngles"][ID])
  652 
  653             # Torsion angle violation...
  654             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngleViolations"][ID])
  655 
  656             # Hierarchy class and subclass names...
  657             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassNames"][ID])
  658             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassNames"][ID])
  659 
  660             # Torsion rule peaks...
  661             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRulePeaks"][ID]]
  662             RotBondValues.append(ValuesDelim.join(Values))
  663             
  664             # Torsion rule tolerances...
  665             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]]
  666             RotBondValues.append(ValuesDelim.join(Values))
  667             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]]
  668             RotBondValues.append(ValuesDelim.join(Values))
  669             
  670             # Torsion rule SMARTS...
  671             RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID])
  672 
  673             # Track joined values for a rotatable bond...
  674             AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues))
  675 
  676     if len(AlertsInfoValues):
  677         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues)))
  678     
  679 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
  680     """Write out molecule."""
  681     
  682     if OptionsInfo["CountMode"]:
  683         return
  684 
  685     SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
  686         
  687     Writer.write(Mol)
  688 
  689 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
  690     """Setup molecule properties containing alerts information for torsion alerts
  691     fileted by Rule IDs."""
  692 
  693     # Delete torsion alerts information for rotatable bonds...
  694     if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]):
  695         Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"])
  696 
  697     # Delimiter for values...
  698     IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  699     InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"]
  700     
  701     # Setup alert rule information...
  702     AlertRuleInfoValues = []
  703     
  704     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID])
  705     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID])
  706     
  707     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleID]]
  708     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  709     
  710     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleID]]
  711     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  712     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleID]]
  713     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  714     
  715     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID])
  716     
  717     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues)))
  718 
  719     # Setup alerts count for torsion rule...
  720     AlertsCount = []
  721     for AlertType in ["Green", "Orange", "Red"]:
  722         if AlertType in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID]:
  723             AlertsCount.append("%s" % TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID][AlertType])
  724         else:
  725             AlertsCount.append("0")
  726     
  727     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsCountLabel"],  "%s" % (InterSetValuesDelim.join(AlertsCount)))
  728     
  729     # Setup torsion rule alerts...
  730     AlertsInfoValues = []
  731     for Index in range(0, len(TorsionAlertsInfo["AlertTypes"][TorsionRuleID])):
  732         RotBondInfoValues = []
  733         
  734         # Bond atom indices...
  735         Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]]
  736         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  737         
  738         # Alert type...
  739         RotBondInfoValues.append(TorsionAlertsInfo["AlertTypes"][TorsionRuleID][Index])
  740         
  741         # Torsion atom indices retrieved from the filtered SD file and stored as strings...
  742         Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]]
  743         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  744         
  745         # Torsion angle...
  746         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngles"][TorsionRuleID][Index])
  747         
  748         # Torsion angle violation...
  749         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID][Index])
  750 
  751         # Track alerts informaiton...
  752         AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues))
  753     
  754     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"],  "%s" % (InterSetValuesDelim.join(AlertsInfoValues)))
  755     
  756     # Setup torsion rule alert max angle violation...
  757     TorsionAngleViolations = [float(Angle) for Angle in TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID]]
  758     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxAngleViolationLabel"],  "%.2f" % (max(TorsionAngleViolations)))
  759 
  760 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim):
  761     """Setup torsion atom indices value for output files."""
  762 
  763     # Check for any list values in the list of torsion atom indices used as placeholders
  764     # for positions of lone pairs in torsion rules containing  N_lp...
  765     TorsionAtomsInfo = []
  766     for Value in TorsionAtomIndicesList:
  767         if type(Value) is list:
  768             TorsionAtomsInfo.append("N_lp")
  769         else:
  770             TorsionAtomsInfo.append(Value)
  771             
  772     Values = ["%s" % Value for Value in TorsionAtomsInfo]
  773     
  774     return ValuesDelim.join(Values)
  775     
  776 def SetupOutfilesWriters():
  777     """Setup molecule and summary writers."""
  778 
  779     OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None}
  780 
  781     # Writers for SD files...
  782     WriterRemaining, WriterFiltered = SetupMoleculeWriters()
  783     OutfilesWriters["WriterRemaining"] = WriterRemaining
  784     OutfilesWriters["WriterFiltered"] = WriterFiltered
  785     
  786     # Writer for alert summary CSV file...
  787     WriterAlertSummary = SetupAlertSummaryWriter()
  788     OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary
  789 
  790     return OutfilesWriters
  791 
  792 def SetupMoleculeWriters():
  793     """Setup molecule writers."""
  794     
  795     Writer = None
  796     WriterFiltered = None
  797 
  798     if OptionsInfo["CountMode"]:
  799         return (Writer, WriterFiltered)
  800 
  801     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  802     if Writer is None:
  803         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  804     MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"])
  805     
  806     if OptionsInfo["OutfileFilteredMode"]:
  807         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
  808         if WriterFiltered is None:
  809             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
  810         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
  811     
  812     return (Writer, WriterFiltered)
  813 
  814 def SetupAlertSummaryWriter():
  815     """Setup a alert summary writer."""
  816     
  817     Writer = None
  818     
  819     if OptionsInfo["CountMode"]:
  820         return Writer
  821         
  822     if not OptionsInfo["OutfileSummaryMode"]:
  823         return Writer
  824     
  825     Outfile = OptionsInfo["OutfileSummary"]
  826     Writer = open(Outfile, "w")
  827     if Writer is None:
  828         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  829     
  830     MiscUtil.PrintInfo("Generating file %s..." % Outfile)
  831     
  832     return Writer
  833     
  834 def CloseOutfilesWriters(OutfilesWriters):
  835     """Close outfile writers."""
  836 
  837     for WriterType, Writer in OutfilesWriters.items():
  838         if Writer is not None:
  839             Writer.close()
  840 
  841 def SetupByRuleOutfilesWriters(RuleIDs):
  842     """Setup by rule outfiles writers."""
  843 
  844     # Initialize...
  845     OutfilesWriters = {}
  846     for RuleID in RuleIDs:
  847         OutfilesWriters[RuleID] = None
  848     
  849     if OptionsInfo["CountMode"]:
  850         return OutfilesWriters
  851         
  852     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  853         return OutfilesWriters
  854     
  855     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
  856     OutfilesRoot = "%s_Filtered_TopRule" % FileName
  857     OutfilesExt = "sdf"
  858 
  859     MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"]
  860     MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt))
  861     
  862     # Delete any existing output files...
  863     Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt))
  864     if len(Outfiles):
  865         MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt))
  866         for Outfile in Outfiles:
  867             try:
  868                 os.remove(Outfile)
  869             except Exception as ErrMsg:
  870                 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg)
  871     
  872     RuleIndex = 0
  873     for RuleID in RuleIDs:
  874         RuleIndex += 1
  875         Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt)
  876         Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
  877         if Writer is None:
  878             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  879             
  880         OutfilesWriters[RuleID] = Writer
  881 
  882     return OutfilesWriters
  883 
  884 def CloseByRuleOutfilesWriters(OutfilesWriters):
  885     """Close by rule outfile writers."""
  886 
  887     for RuleID, Writer in OutfilesWriters.items():
  888         if Writer is not None:
  889             Writer.close()
  890 
  891 def InstantiateTorsionLibraryAlertsClass(Quiet = False):
  892     """Initialize torsion library alerts class."""
  893 
  894     try:
  895         TorsionLibraryAlertsHandle = TorsionLibraryAlerts(AlertsMode = OptionsInfo["AlertsMode"], MinAlertsCount = OptionsInfo["MinAlertsCount"], NitrogenLonePairAllowHydrogenNbrs = OptionsInfo["NitrogenLonePairParams"]["AllowHydrogenNbrs"], NitrogenLonePairPlanarityTolerance = OptionsInfo["NitrogenLonePairParams"]["PlanarityTolerance"], RotBondsSMARTSMode = OptionsInfo["RotBondsSMARTSMode"], RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPattern"], TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"])
  896     except Exception as ErrMsg:
  897         MiscUtil.PrintError("Failed to instantiate TorsionLibraryAlerts:\n%s\n" % (ErrMsg))
  898 
  899     if not Quiet:
  900         MiscUtil.PrintInfo("\nRetrieving data from library file %s..." % TorsionLibraryAlertsHandle.GetTorsionLibraryFilePath())
  901         TorsionLibraryAlertsHandle.ListTorsionLibraryInfo()
  902 
  903     return TorsionLibraryAlertsHandle
  904 
  905 
  906 def ProcessRotatableBondsSMARTSMode():
  907     """"Process SMARTS pattern for rotatable bonds."""
  908 
  909     RotBondsMode = OptionsInfo["RotBondsSMARTSMode"]
  910     
  911     RotBondsSMARTSPattern = None
  912     RotBondsSMARTSPatternSpecified = OptionsInfo["RotBondsSMARTSPatternSpecified"]
  913     
  914     if re.match("^(NonStrict|SemiStrict|Strict)$", RotBondsMode, re.I):
  915         RotBondsSMARTSPattern = None
  916     elif re.match("Specify", RotBondsMode, re.I):
  917         RotBondsSMARTSPatternSpecified = RotBondsSMARTSPatternSpecified.strip()
  918         if not len(RotBondsSMARTSPatternSpecified):
  919             MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in  \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode)
  920         
  921         RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPatternSpecified)
  922         if RotBondsPatternMol is None:
  923             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPatternSpecified))
  924     else:
  925         MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode)
  926     
  927     OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern
  928 
  929 def ProcessSDFieldLabelsOption():
  930     """Process SD data field label option."""
  931 
  932     ParamsOptionName = "--outfileSDFieldLabels"
  933     ParamsOptionValue = Options["--outfileSDFieldLabels"]
  934     
  935     ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TorsionAlertsCountLabel": "TorsionAlertsCount (Green Orange Red)", "TorsionAlertsLabel": "TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleAlertsCountLabel": "TorsionRuleAlertsCount (Green Orange Red)", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)", "TorsionRuleMaxAngleViolationLabel": "TorsionRuleMaxAngleViolation"}
  936     
  937     if re.match("^auto$", ParamsOptionValue, re.I):
  938         OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
  939         return
  940     
  941     # Setup a canonical paramater names...
  942     ValidParamNames = []
  943     CanonicalParamNamesMap = {}
  944     for ParamName in sorted(ParamsIDsToLabels):
  945         ValidParamNames.append(ParamName)
  946         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  947     
  948     ParamsOptionValue = ParamsOptionValue.strip()
  949     if not ParamsOptionValue:
  950         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
  951     
  952     ParamsOptionValueWords = ParamsOptionValue.split(",")
  953     if len(ParamsOptionValueWords) % 2:
  954         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
  955     
  956     # Validate paramater name and value pairs...
  957     for Index in range(0, len(ParamsOptionValueWords), 2):
  958         Name = ParamsOptionValueWords[Index].strip()
  959         Value = ParamsOptionValueWords[Index + 1].strip()
  960 
  961         CanonicalName = Name.lower()
  962         if  not CanonicalName in CanonicalParamNamesMap:
  963             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
  964 
  965         ParamName = CanonicalParamNamesMap[CanonicalName]
  966         ParamValue = Value
  967         
  968         # Set value...
  969         ParamsIDsToLabels[ParamName] = ParamValue
  970     
  971     OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
  972 
  973 def ProcessOptionNitrogenLonePairParameters():
  974     """Process nitrogen lone pair parameters option."""
  975 
  976     ParamsOptionName = "--nitrogenLonePairParams"
  977     ParamsOptionValue = Options["--nitrogenLonePairParams"]
  978     
  979     ParamsInfo = {"AllowHydrogenNbrs": True, "PlanarityTolerance": 1.0,}
  980     
  981     if re.match("^auto$", ParamsOptionValue, re.I):
  982         OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
  983         return
  984     
  985     # Setup a canonical paramater names...
  986     ValidParamNames = []
  987     CanonicalParamNamesMap = {}
  988     for ParamName in sorted(ParamsInfo):
  989         ValidParamNames.append(ParamName)
  990         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  991     
  992     ParamsOptionValue = ParamsOptionValue.strip()
  993     if not ParamsOptionValue:
  994         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
  995     
  996     ParamsOptionValueWords = ParamsOptionValue.split(",")
  997     if len(ParamsOptionValueWords) % 2:
  998         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
  999     
 1000     # Validate paramater name and value pairs...
 1001     for Index in range(0, len(ParamsOptionValueWords), 2):
 1002         Name = ParamsOptionValueWords[Index].strip()
 1003         Value = ParamsOptionValueWords[Index + 1].strip()
 1004 
 1005         CanonicalName = Name.lower()
 1006         if  not CanonicalName in CanonicalParamNamesMap:
 1007             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
 1008 
 1009         ParamName = CanonicalParamNamesMap[CanonicalName]
 1010         ParamValue = Value
 1011         
 1012         if re.match("^PlanarityTolerance$", ParamName, re.I):
 1013             Value = float(Value)
 1014             if Value < 0:
 1015                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: >= 0" % (Value, Name, ParamsOptionName))
 1016             ParamValue = Value
 1017         elif re.match("^AllowHydrogenNbrs$", ParamName, re.I):
 1018             if not re.match("^(yes|no)$", Value, re.I):
 1019                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: yes or no" % (Value, Name, ParamsOptionName))
 1020             ParamValue = True if re.match("^yes$", Value, re.I) else False
 1021             
 1022         # Set value...
 1023         ParamsInfo[ParamName] = ParamValue
 1024     
 1025     OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
 1026     
 1027 def ProcessOptions():
 1028     """Process and validate command line arguments and options."""
 1029     
 1030     MiscUtil.PrintInfo("Processing options...")
 1031 
 1032     # Validate options...
 1033     ValidateOptions()
 1034     
 1035     OptionsInfo["Infile"] = Options["--infile"]
 1036     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1037     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 1038     
 1039     OptionsInfo["Outfile"] = Options["--outfile"]
 1040     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 1041     
 1042     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1043     OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 1044     OptionsInfo["OutfileFiltered"] = OutfileFiltered
 1045     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 1046     
 1047     OutfileSummary = "%s_AlertsSummary.csv" % (FileName)
 1048     OptionsInfo["OutfileSummary"] = OutfileSummary
 1049     OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", Options["--outfileSummary"], re.I) else False
 1050 
 1051     OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I) else False
 1052     OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False
 1053 
 1054     OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"]
 1055     if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I):
 1056         OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount)
 1057     OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount
 1058     OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False
 1059     
 1060     OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False
 1061 
 1062     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1063         if not re.match("^yes$", Options["--outfileAlerts"], re.I):
 1064             MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"]))
 1065     
 1066     OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"]
 1067     OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False
 1068 
 1069     ProcessSDFieldLabelsOption()
 1070     
 1071     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1072     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 1073 
 1074     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1075     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1076 
 1077     ProcessOptionNitrogenLonePairParameters()
 1078     
 1079     OptionsInfo["AlertsMode"] = Options["--alertsMode"]
 1080     OptionsInfo["SpecifiedAlertsModeList"] = []
 1081     if re.match("^Red$", Options["--alertsMode"], re.I):
 1082         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1083     elif re.match("^RedAndOrange$", Options["--alertsMode"], re.I):
 1084         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1085         OptionsInfo["SpecifiedAlertsModeList"].append("Orange")
 1086     
 1087     OptionsInfo["MinAlertsCount"] = int(Options["--alertsMinCount"])
 1088 
 1089     OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"]
 1090     OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"]
 1091     ProcessRotatableBondsSMARTSMode()
 1092 
 1093     OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1094 
 1095     # Setup delimiter for writing out torsion alert information to output files...
 1096     OptionsInfo["IntraSetValuesDelim"] = ","
 1097     OptionsInfo["InterSetValuesDelim"] = " "
 1098 
 1099 def RetrieveOptions():
 1100     """Retrieve command line arguments and options."""
 1101     
 1102     # Get options...
 1103     global Options
 1104     Options = docopt(_docoptUsage_)
 1105 
 1106     # Set current working directory to the specified directory...
 1107     WorkingDir = Options["--workingdir"]
 1108     if WorkingDir:
 1109         os.chdir(WorkingDir)
 1110     
 1111     # Handle examples option...
 1112     if "--examples" in Options and Options["--examples"]:
 1113         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1114         sys.exit(0)
 1115     
 1116 def ProcessListTorsionLibraryOption():
 1117     """Process list torsion library information."""
 1118 
 1119     # Validate and process dataFile option for listing torsion library information...
 1120     OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1121     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1122         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1123 
 1124     # Instantiate TorsionLibraryAlerts using defaults...
 1125     TorsionLibraryAlertsHandle = TorsionLibraryAlerts(TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"])
 1126     MiscUtil.PrintInfo("\nRetrieving data from torsion library file %s..." % TorsionLibraryAlertsHandle.GetTorsionLibraryFilePath())
 1127     TorsionLibraryAlertsHandle.ListTorsionLibraryInfo()
 1128 
 1129 def ValidateOptions():
 1130     """Validate option values."""
 1131 
 1132     MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "Red RedAndOrange")
 1133     MiscUtil.ValidateOptionIntegerValue("--alertsMinCount", Options["--alertsMinCount"], {">=": 1})
 1134     
 1135     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1136     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 1137     
 1138     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1139     if re.match("^filter$", Options["--mode"], re.I):
 1140         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1141         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1142 
 1143     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 1144     
 1145     MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no")
 1146     if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I):
 1147         MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0})
 1148     
 1149     MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no")
 1150     MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no")
 1151     MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly")
 1152     
 1153     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count")
 1154     if re.match("^filter$", Options["--mode"], re.I):
 1155         if not Options["--outfile"]:
 1156             MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option")
 1157         
 1158     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1159     
 1160     MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify")
 1161     if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I):
 1162         if not Options["--rotBondsSMARTSPattern"]:
 1163             MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option")
 1164     
 1165     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1166         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1167 
 1168 # Setup a usage string for docopt...
 1169 _docoptUsage_ = """
 1170 RDKitFilterTorsionLibraryAlerts.py - Filter torsion library alerts
 1171 
 1172 Usage:
 1173     RDKitFilterTorsionLibraryAlerts.py  [--alertsMode <Red, RedAndOrange>] [--alertsMinCount <Number>]
 1174                                         [--infileParams <Name,Value,...>] [--mode <filter or count>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
 1175                                         [--nitrogenLonePairParams <Name,Value,...>] [--outfileAlerts <yes or no>]
 1176                                         [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>]
 1177                                         [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>]
 1178                                         [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>]
 1179                                         [--outfileParams <Name,Value,...>] [--overwrite] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>]
 1180                                         [--rotBondsSMARTSPattern <SMARTS>] [--torsionLibraryFile <FileName or auto>] [-w <dir>] -i <infile> -o <outfile>
 1181     RDKitFilterTorsionLibraryAlerts.py [--torsionLibraryFile <FileName or auto>] -l | --list
 1182     RDKitFilterTorsionLibraryAlerts.py -h | --help | -e | --examples
 1183 
 1184 Description:
 1185     Filter strained molecules from an input file for torsion library [ Ref 146, 152, 159 ]
 1186     alerts by matching rotatable bonds against SMARTS patterns specified for torsion
 1187     rules in a torsion library file and write out appropriate molecules to output
 1188     files. The molecules must have 3D coordinates in input file.
 1189     
 1190     The default torsion library file, TorsionLibrary.xml, is available under
 1191     MAYACHEMTOOLS/lib/python/TorsionAlerts directory.
 1192     
 1193     The data in torsion library file is organized in a hierarchical manner. It consists
 1194     of one generic class and six specific classes at the highest level. Each class
 1195     contains multiple subclasses corresponding to named functional groups or
 1196     substructure patterns. The subclasses consist of torsion rules sorted from
 1197     specific to generic torsion patterns. The torsion rule, in turn, contains a list
 1198     of peak values for torsion angles and two tolerance values. A pair of tolerance
 1199     values define torsion bins around a torsion peak value. For example:
 1200          
 1201         <library>
 1202             <hierarchyClass name="GG" id1="G" id2="G">
 1203             ...
 1204             </hierarchyClass>
 1205             <hierarchyClass name="CO" id1="C" id2="O">
 1206                 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]">
 1207                     <torsionRule smarts="[O:1]=[C:2]!@[O:3]~[CH0:4]">
 1208                         <angleList>
 1209                             <angle value="0.0" tolerance1="20.00"
 1210                              tolerance2="25.00" score="56.52"/>
 1211                         </angleList>
 1212                     </torsionRule>
 1213                     ...
 1214                 ...
 1215              ...
 1216             </hierarchyClass>
 1217             <hierarchyClass name="NC" id1="N" id2="C">
 1218              ...
 1219             </hierarchyClass>
 1220             <hierarchyClass name="SN" id1="S" id2="N">
 1221             ...
 1222             </hierarchyClass>
 1223             <hierarchyClass name="CS" id1="C" id2="S">
 1224             ...
 1225             </hierarchyClass>
 1226             <hierarchyClass name="CC" id1="C" id2="C">
 1227             ...
 1228             </hierarchyClass>
 1229             <hierarchyClass name="SS" id1="S" id2="S">
 1230              ...
 1231             </hierarchyClass>
 1232         </library>
 1233         
 1234     The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern.
 1235     A custom SMARTS pattern may be optionally specified to detect rotatable bonds.
 1236     Each rotatable bond is matched to a torsion rule in the torsion library and
 1237     assigned one of the following three alert categories: Green, Orange or Red. The 
 1238     rotatable bond is marked Green or Orange for the measured angle of the torsion
 1239     pattern within the first or second tolerance bins around a torsion peak.
 1240     Otherwise, it's marked Red implying that the measured angle is not observed in
 1241     the structure databases employed to generate the torsion library.
 1242 
 1243     The following output files are generated after the filtering:
 1244         
 1245         <OutfileRoot>.sdf
 1246         <OutfileRoot>_Filtered.sdf
 1247         <OutfileRoot>_AlertsSummary.csv
 1248         <OutfileRoot>_Filtered_TopRule*.sdf
 1249         
 1250     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 1251 
 1252     The supported output file formats are: SD (.sdf, .sd)
 1253 
 1254 Options:
 1255     -a, --alertsMode <Red, RedAndOrange>  [default: Red]
 1256         Torsion library alert types to use for filtering molecules containing
 1257         rotatable bonds marked with Green, Orange, or Red alerts. Possible
 1258         values: Red or RedAndOrange.
 1259     --alertsMinCount <Number>  [default: 1]
 1260         Minimum number of rotatable bond alerts in a molecule for filtering the
 1261         molecule.
 1262     -e, --examples
 1263         Print examples.
 1264     -h, --help
 1265         Print this help message.
 1266     -i, --infile <infile>
 1267         Input file name.
 1268     --infileParams <Name,Value,...>  [default: auto]
 1269         A comma delimited list of parameter name and value pairs for reading
 1270         molecules from files. The supported parameter names for different file
 1271         formats, along with their default values, are shown below:
 1272             
 1273             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1274             
 1275     -l, --list
 1276         List torsion library information without performing any filtering.
 1277     -m, --mode <filter or count>  [default: filter]
 1278         Specify whether to filter molecules for torsion library [ Ref 146, 152, 159 ] alerts
 1279         by matching rotatable bonds against SMARTS patterns specified for torsion
 1280         rules and write out the rest of the molecules to an outfile or simply count
 1281         the number of matched molecules marked for filtering.
 1282     --mp <yes or no>  [default: no]
 1283         Use multiprocessing.
 1284          
 1285         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1286         function employing lazy RDKit data iterable. This allows processing of
 1287         arbitrary large data sets without any additional requirements memory.
 1288         
 1289         All input data may be optionally loaded into memory by mp.Pool.map()
 1290         before starting worker processes in a process pool by setting the value
 1291         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1292         
 1293         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1294         data mode may adversely impact the performance. The '--mpParams' section
 1295         provides additional information to tune the value of 'chunkSize'.
 1296     --mpParams <Name,Value,...>  [default: auto]
 1297         A comma delimited list of parameter name and value pairs to configure
 1298         multiprocessing.
 1299         
 1300         The supported parameter names along with their default and possible
 1301         values are shown below:
 1302         
 1303             chunkSize, auto
 1304             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1305             numProcesses, auto   [ Default: mp.cpu_count() ]
 1306         
 1307         These parameters are used by the following functions to configure and
 1308         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1309         mp.Pool.imap().
 1310         
 1311         The chunkSize determines chunks of input data passed to each worker
 1312         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1313         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1314         
 1315         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1316         automatically converts RDKit data iterable into a list, loads all data into
 1317         memory, and calculates the default chunkSize using the following method
 1318         as shown in its code:
 1319         
 1320             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1321             if extra: chunkSize += 1
 1322         
 1323         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1324         and 100 data items.
 1325         
 1326         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1327         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1328         data into memory. Consequently, the size of input data is not known a priori.
 1329         It's not possible to estimate an optimal value for the chunkSize. The default 
 1330         chunkSize is set to 1.
 1331         
 1332         The default value for the chunkSize during 'Lazy' data mode may adversely
 1333         impact the performance due to the overhead associated with exchanging
 1334         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1335         a larger value during 'Lazy' input data mode, based on the size of your input
 1336         data and number of processes in the process pool.
 1337         
 1338         The mp.Pool.map() function waits for all worker processes to process all
 1339         the data and return the results. The mp.Pool.imap() function, however,
 1340         returns the the results obtained from worker processes as soon as the
 1341         results become available for specified chunks of data.
 1342         
 1343         The order of data in the results returned by both mp.Pool.map() and 
 1344         mp.Pool.imap() functions always corresponds to the input data.
 1345     -n, --nitrogenLonePairParams <Name,Value,...>  [default: auto]
 1346         A comma delimited list of parameter name and value pairs to match
 1347         torsion SMARTS patterns containing non-standard construct 'N_lp'
 1348         corresponding to nitrogen lone pair.
 1349         
 1350         The supported parameter names along with their default and possible
 1351         values are shown below:
 1352         
 1353             allowHydrogenNbrs, yes   [ Possible values: yes or no ]
 1354             planarityTolerance, 1  [Possible values: >=0] 
 1355             
 1356         These parameters are used during the matching of torsion rules containing
 1357         'N_lp' in their SMARTS patterns. The 'allowHydrogensNbrs' allows the use
 1358         hydrogen neighbors attached to nitrogen during the determination of its
 1359         planarity. The 'planarityTolerance' in degrees represents the tolerance
 1360         allowed for nitrogen to be considered coplanar with its three neighbors.
 1361         
 1362         The torsion rules containing 'N_lp' in their SMARTS patterns are categorized
 1363         into the following two types of rules:
 1364          
 1365             TypeOne:  
 1366             
 1367             [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4]
 1368             [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4]
 1369             ... ... ...
 1370          
 1371             TypeTwo:  
 1372             
 1373             [!#1:1][CX4:2]!@[NX3;"N_lp":3]
 1374             [C:1][$(S(=O)=O):2]!@["N_lp":3]
 1375             ... ... ...
 1376             
 1377         The torsions are matched to torsion rules containing 'N_lp' using specified
 1378         SMARTS patterns without the 'N_lp' along with additional constraints using
 1379         the following methodology:
 1380             
 1381             TypeOne:  
 1382             
 1383             . SMARTS pattern must contain four mapped atoms and the third
 1384                 mapped atom must be a nitrogen matched with 'NX3:3'
 1385             . Nitrogen atom must have 3 neighbors. The 'allowHydrogens'
 1386                 parameter controls inclusion of hydrogens as its neighbors.
 1387             . Nitrogen atom and its 3 neighbors must be coplanar.
 1388                 'planarityTolerance' parameter provides tolerance in degrees
 1389                 for nitrogen to be considered coplanar with its 3 neighbors.
 1390             
 1391             TypeTwo:  
 1392             
 1393             . SMARTS pattern must contain three mapped atoms and the third
 1394                 mapped atom must be a nitrogen matched with 'NX3:3'. The 
 1395                 third mapped atom may contain only 'N_lp:3' The missing 'NX3'
 1396                 is automatically detected.
 1397             . Nitrogen atom must have 3 neighbors. 'allowHydrogens'
 1398                 parameter controls inclusion of hydrogens as neighbors.
 1399             . Nitrogen atom and its 3 neighbors must not be coplanar.
 1400                 'planarityTolerance' parameter provides tolerance in degrees
 1401                 for nitrogen to be considered coplanar with its 3 neighbors.
 1402             . Nitrogen lone pair position equivalent to VSEPR theory is
 1403                 determined based on the position of nitrogen and its neighbors.
 1404                 A vector normal to 3 nitrogen neighbors is calculated and added
 1405                 to the coordinates of nitrogen atom to determine the approximate
 1406                 position of the lone pair. It is used as the fourth position to
 1407                 calculate the torsion angle.
 1408             
 1409     -o, --outfile <outfile>
 1410         Output file name.
 1411     --outfileAlerts <yes or no>  [default: yes]
 1412         Write out alerts information to SD output files.
 1413     --outfileAlertsMode <All or AlertsOnly>  [default: AlertsOnly]
 1414         Write alerts information to SD output files for all alerts or only for alerts
 1415         specified by '--AlertsMode' option. Possible values: All or AlertsOnly
 1416         This option is only valid for 'Yes' value of '--outfileAlerts' option.
 1417         
 1418         The following alerts information is added to SD output files using
 1419         'TorsionAlerts' data field:
 1420             
 1421             RotBondIndices TorsionAlert TorsionIndices TorsionAngle
 1422             TorsionAngleViolation HierarchyClass HierarchySubClass
 1423             TorsionRule TorsionPeaks Tolerances1 Tolerances2
 1424             
 1425         The 'RotBondsCount' and 'TorsionAlertsCount' data fields are always added
 1426         to SD output files containing both remaining and filtered molecules.
 1427         
 1428         Format:
 1429             
 1430             > <RotBondsCount>
 1431             Number
 1432             
 1433             > <TorsionAlertsCount (Green Orange Red)>
 1434             Number Number Number
 1435             
 1436             > <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 1437                 TorsionAngle TorsionAngleViolation HierarchyClass
 1438                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 1439                 TorsionRule)>
 1440             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 1441             AtomIndex4 Angle AngleViolation ClassName SubClassName
 1442             CommaDelimPeakValues CommaDelimTol1Values CommDelimTol2Values
 1443             SMARTS ... ... ...
 1444              ... ... ...
 1445             
 1446         A set of 11 values is written out as value of 'TorsionAlerts' data field for
 1447         each torsion in a molecule. The space character is used as a delimiter
 1448         to separate values with in a set and across set. The comma character
 1449         is used to delimit multiple values for each value in a set.
 1450         
 1451         The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited
 1452         values representing atom indices for a rotatable bond and matched torsion.
 1453         The 'TorsionPeaks',  'Tolerances1', and 'Tolerances2' contain same number
 1454         of comma delimited values corresponding to  torsion angle peaks and
 1455         tolerance intervals specified in torsion library. For example:
 1456             
 1457             ... ... ...
 1458             >  <RotBondsCount>  (1) 
 1459             7
 1460             
 1461             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 1462             3 2 2
 1463             
 1464             >  <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 1465                 TorsionAngle TorsionAngleViolation HierarchyClass
 1466                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 1467                 TorsionRule)>
 1468             1,2 Red 32,2,1,0 0.13 149.87 NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]
 1469             ([cH,nX2H0])!@[NX3H1:3][CX4:4] 8,9 Red 10,9,8,28 -0.85 GG
 1470             None -90.0,90.0 30.0,30.0 60.0,60.0 [cH1:1][a:2]([cH1])!@[a:3]
 1471             ([cH0])[cH0:4]
 1472             ... ... ...
 1473             
 1474     --outfileFiltered <yes or no>  [default: yes]
 1475         Write out a file containing filtered molecules. Its name is automatically
 1476         generated from the specified output file. Default: <OutfileRoot>_
 1477         Filtered.<OutfileExt>.
 1478     --outfilesFilteredByRules <yes or no>  [default: yes]
 1479         Write out SD files containing filtered molecules for individual torsion
 1480         rules triggering alerts in molecules. The name of SD files are automatically
 1481         generated from the specified output file. Default file names: <OutfileRoot>_
 1482         Filtered_TopRule*.sdf
 1483                 
 1484         The following alerts information is added to SD output files:
 1485             
 1486             > <RotBondsCount>
 1487             Number
 1488             
 1489             >  <TorsionAlertsCount (Green Orange Red)> 
 1490             Number Number Number
 1491             
 1492             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 1493                 Tolerances1 Tolerances2 TorsionRule)> 
 1494             ClassName SubClassName CommaDelimPeakValues CommaDelimTol1Values
 1495             CommDelimTol2Values SMARTS ... ... ...
 1496              ... ... ...
 1497             
 1498             > <TorsionRuleAlertsCount (Green Orange Red)>
 1499             Number Number Number
 1500             
 1501             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 1502                 TorsionAngle TorsionAngleViolation)>
 1503             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 1504             AtomIndex4 Angle AngleViolation ... ... ...
 1505             
 1506             >  <TorsionRuleMaxAngleViolation>
 1507             Number
 1508              ... ... ...
 1509             
 1510         For example:
 1511             
 1512             ... ... ...
 1513             >  <RotBondsCount>  (1) 
 1514             7
 1515              
 1516             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 1517             3 2 2
 1518             
 1519             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 1520                 Tolerances1 Tolerances2 TorsionRule)>  (1) 
 1521             NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]([cH,nX2H0])!@[NX3H1:3][CX4:4]
 1522             
 1523             >  <TorsionRuleAlertsCount (Green Orange Red)>  (1) 
 1524             0 0 1
 1525             
 1526             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 1527                 TorsionAngle TorsionAngleViolation)>  (1) 
 1528             1,2 Red 32,2,1,0 0.13 149.87
 1529             
 1530             >  <TorsionRuleMaxAngleViolation>  (1) 
 1531             149.87
 1532             ... ... ...
 1533             
 1534     --outfilesFilteredByRulesMaxCount <All or number>  [default: 10]
 1535         Write out SD files containing filtered molecules for specified number of
 1536         top N torsion rules triggering alerts for the largest number of molecules
 1537         or for all torsion rules triggering alerts across all molecules.
 1538     --outfileSummary <yes or no>  [default: yes] 
 1539         Write out a CVS text file containing summary of torsions rules responsible
 1540         for triggering torsion alerts. Its name is automatically generated from the
 1541         specified output file. Default: <OutfileRoot>_AlertsSummary.csv.
 1542         
 1543         The following alerts information is written to summary text file:
 1544             
 1545             TorsionRule, TorsionPeaks, Tolerances1, Tolerances2,
 1546             HierarchyClass, HierarchySubClass, TorsionAlertType,
 1547             TorsionAlertCount, TorsionAlertMolCount
 1548              
 1549         The double quotes characters are removed from SMART patterns before
 1550         before writing them to a CSV file. In addition, the torsion rules are sorted by
 1551         TorsionAlertMolCount. For example:
 1552             
 1553             "TorsionRule","TorsionPeaks","Tolerances1","Tolerances2",
 1554                 "HierarchyClass","HierarchySubClass","TorsionAlertTypes",
 1555                 "TorsionAlertCount","TorsionAlertMolCount"
 1556             "[!#1:1][CX4H2:2]!@[CX4H2:3][!#1:4]","-60.0,60.0,180.0",
 1557                 "20.0,20.0,20.0","30.0,30.0,30.0","CC","None/[CX4:2][CX4:3]",
 1558                 "Red","16","11"
 1559             ... ... ...
 1560             
 1561     --outfileSDFieldLabels <Type,Label,...>  [default: auto]
 1562         A comma delimited list of SD data field type and label value pairs for writing
 1563         torsion alerts information along with molecules to SD files.
 1564         
 1565         The supported SD data field label type along with their default values are
 1566         shown below:
 1567             
 1568             For all SD files:
 1569             
 1570             RotBondsCountLabel, RotBondsCount
 1571             TorsionAlertsCountLabel, TorsionAlertsCount (Green Orange Red)
 1572             TorsionAlertsLabel, TorsionAlerts (RotBondIndices TorsionAlert
 1573                 TorsionIndices TorsionAngle TorsionAngleViolation
 1574                 HierarchyClass HierarchySubClass TorsionPeaks Tolerances1
 1575                 Tolerances2 TorsionRule)
 1576             
 1577             For individual SD files filtered by torsion rules:
 1578             
 1579             TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass
 1580                 TorsionPeaks Tolerances1 Tolerances2 TorsionRule)
 1581             TorsionRuleAlertsCountLabel, TorsionRuleAlertsCount (Green Orange
 1582                 Red)
 1583             TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices
 1584                 TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)
 1585             TorsionRuleMaxAngleViolationLabel, TorsionRuleMaxAngleViolation
 1586             
 1587     --outfileParams <Name,Value,...>  [default: auto]
 1588         A comma delimited list of parameter name and value pairs for writing
 1589         molecules to files. The supported parameter names for different file
 1590         formats, along with their default values, are shown below:
 1591             
 1592             SD: kekulize,yes,forceV3000,no
 1593             
 1594     --overwrite
 1595         Overwrite existing files.
 1596     -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...>  [default: SemiStrict]
 1597         SMARTS pattern to use for identifying rotatable bonds in a molecule
 1598         for matching against torsion rules in the torsion library. Possible values:
 1599         NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches
 1600         are filtered to ensure that each atom in the rotatable bond is attached to
 1601         at least two heavy atoms.
 1602         
 1603         The following SMARTS patterns are used to identify rotatable bonds for
 1604         different modes:
 1605             
 1606             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 1607             
 1608             SemiStrict:
 1609             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1610             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1611             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1612             
 1613             Strict:
 1614             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1615             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 1616             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 1617             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1618             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1619             
 1620         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 1621         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 1622          specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 1623          derived from 'Strict' SMARTS patterns for its usage in this script.
 1624         
 1625         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 1626         choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its
 1627         value via '--rotBondsSMARTSPattern' option.
 1628     --rotBondsSMARTSPattern <SMARTS>
 1629         SMARTS pattern for identifying rotatable bonds. This option is only valid
 1630         for 'Specify' value of '-r, --rotBondsSMARTSMode' option.
 1631     -t, --torsionLibraryFile <FileName or auto>  [default: auto]
 1632         Specify a XML file name containing data for torsion library hierarchy
 1633         or use default file, TorsionLibrary.xml, available in
 1634         MAYACHEMTOOLS/lib/Python/TorsionAlerts directory.
 1635         
 1636         The format of data in local XML file must match format of the data in Torsion
 1637         Library [ Ref 146, 152, 159 ] file available in MAYACHEMTOOLS directory.
 1638     -w, --workingdir <dir>
 1639         Location of working directory which defaults to the current directory.
 1640 
 1641 Examples:
 1642     To filter molecules containing any rotatable bonds marked with Red alerts
 1643     based on torsion rules in the torsion library and write out SD files containing
 1644     remaining and filtered molecules, and individual SD files for torsion rules
 1645     triggering alerts along with appropriate torsion information for red alerts,
 1646     type:
 1647 
 1648         % RDKitFilterTorsionLibraryAlerts.py -i Sample3D.sdf -o Sample3DOut.sdf
 1649 
 1650     To run the first example for only counting number of alerts without writing
 1651     out any SD files, type:
 1652 
 1653         % RDKitFilterTorsionLibraryAlerts.py -m count -i Sample3D.sdf -o
 1654           Sample3DOut.sdf
 1655     
 1656     To run the first example for filtertering molecules marked with Orange or
 1657     Red alerts and write out SD files, tye:
 1658 
 1659         % RDKitFilterTorsionLibraryAlerts.py -m Filter --alertsMode RedAndOrange
 1660           -i Sample3D.sdf -o Sample3DOut.sdf
 1661     
 1662     To run the first example for filtering molecules and writing out torsion
 1663     information for all alert types to SD files, type:
 1664 
 1665         % RDKitFilterTorsionLibraryAlerts.py --outfileAlertsMode All
 1666           -i Sample3D.sdf -o Sample3DOut.sdf
 1667 
 1668     To run the first example for filtering molecules in multiprocessing mode on
 1669     all available CPUs without loading all data into memory and write out SD files,
 1670     type:
 1671 
 1672         % RDKitFilterTorsionLibraryAlerts.py --mp yes -i Sample3D.sdf
 1673          -o Sample3DOut.sdf
 1674 
 1675     To run the first example for filtering molecules in multiprocessing mode on
 1676     all available CPUs by loading all data into memory and write out a SD files,
 1677     type:
 1678 
 1679         % RDKitFilterTorsionLibraryAlerts.py  --mp yes --mpParams
 1680           "inputDataMode, InMemory" -i Sample3D.sdf  -o Sample3DOut.sdf
 1681 
 1682     To run the first example for filtering molecules in multiprocessing mode on
 1683     specific number of CPUs and chunksize without loading all data into memory
 1684     and write out SD files, type:
 1685 
 1686         % RDKitFilterTorsionLibraryAlerts.py --mp yes --mpParams
 1687           "inputDataMode,lazy,numProcesses,4,chunkSize,8"  -i Sample3D.sdf
 1688           -o Sample3DOut.sdf
 1689 
 1690     To list information about default torsion library file without performing any
 1691     filtering, type:
 1692 
 1693         % RDKitFilterTorsionLibraryAlerts.py -l
 1694 
 1695     To list information about a local torsion library XML file without performing
 1696     any, filtering, type:
 1697 
 1698         % RDKitFilterTorsionLibraryAlerts.py --torsionLibraryFile
 1699           TorsionLibrary.xml -l
 1700 
 1701 Author:
 1702     Manish Sud (msud@san.rr.com)
 1703 
 1704 Collaborator:
 1705     Pat Walters
 1706 
 1707 Acknowledgments:
 1708     Wolfgang Guba, Patrick Penner, and Levi Pierce
 1709 
 1710 See also:
 1711     RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionStrainEnergyAlerts.py,
 1712     RDKitConvertFileFormat.py, RDKitSearchSMARTS.py
 1713 
 1714 Copyright:
 1715     Copyright (C) 2024 Manish Sud. All rights reserved.
 1716 
 1717     This script uses the Torsion Library jointly developed by the University
 1718     of Hamburg, Center for Bioinformatics, Hamburg, Germany and
 1719     F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
 1720 
 1721     The functionality available in this script is implemented using RDKit, an
 1722     open source toolkit for cheminformatics developed by Greg Landrum.
 1723 
 1724     This file is part of MayaChemTools.
 1725 
 1726     MayaChemTools is free software; you can redistribute it and/or modify it under
 1727     the terms of the GNU Lesser General Public License as published by the Free
 1728     Software Foundation; either version 3 of the License, or (at your option) any
 1729     later version.
 1730 
 1731 """
 1732 
 1733 if __name__ == "__main__":
 1734     main()