MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitFilterTorsionLibraryAlerts.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Collaborator: Pat Walters
    7 #
    8 # Acknowledgments: Wolfgang Guba, Patrick Penner, and Levi Pierce
    9 #
   10 # Copyright (C) 2024 Manish Sud. All rights reserved.
   11 #
   12 # This script uses the Torsion Library jointly developed by the University
   13 # of Hamburg, Center for Bioinformatics, Hamburg, Germany and
   14 # F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
   15 #
   16 # The functionality available in this script is implemented using RDKit, an
   17 # open source toolkit for cheminformatics developed by Greg Landrum.
   18 #
   19 # This file is part of MayaChemTools.
   20 #
   21 # MayaChemTools is free software; you can redistribute it and/or modify it under
   22 # the terms of the GNU Lesser General Public License as published by the Free
   23 # Software Foundation; either version 3 of the License, or (at your option) any
   24 # later version.
   25 #
   26 # MayaChemTools is distributed in the hope that it will be useful, but without
   27 # any warranty; without even the implied warranty of merchantability of fitness
   28 # for a particular purpose.  See the GNU Lesser General Public License for more
   29 # details.
   30 #
   31 # You should have received a copy of the GNU Lesser General Public License
   32 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   33 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   34 # Boston, MA, 02111-1307, USA.
   35 #
   36 
   37 from __future__ import print_function
   38 
   39 # Add local python path to the global path and import standard library modules...
   40 import os
   41 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   42 import time
   43 import re
   44 import glob
   45 import multiprocessing as mp
   46 
   47 # RDKit imports...
   48 try:
   49     from rdkit import rdBase
   50     from rdkit import Chem
   51     from rdkit.Chem import rdMolTransforms
   52 except ImportError as ErrMsg:
   53     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   54     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   55     sys.exit(1)
   56 
   57 # MayaChemTools imports...
   58 try:
   59     from docopt import docopt
   60     import MiscUtil
   61     import RDKitUtil
   62     from TorsionAlerts.TorsionLibraryAlerts import TorsionLibraryAlerts
   63 except ImportError as ErrMsg:
   64     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   65     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   66     sys.exit(1)
   67 
   68 ScriptName = os.path.basename(sys.argv[0])
   69 Options = {}
   70 OptionsInfo = {}
   71 
   72 def main():
   73     """Start execution of the script."""
   74     
   75     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   76     
   77     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   78     
   79     # Retrieve command line arguments and options...
   80     RetrieveOptions()
   81     
   82     if  Options["--list"]:
   83         # Handle listing of torsion library information...
   84         ProcessListTorsionLibraryOption()
   85     else:
   86         # Process and validate command line arguments and options...
   87         ProcessOptions()
   88         
   89         # Perform actions required by the script...
   90         PerformFiltering()
   91     
   92     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   93     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   94 
   95 def PerformFiltering():
   96     """Filter molecules using SMARTS torsion rules in the torsion library file."""
   97 
   98     # Setup a molecule reader...
   99     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  100     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  101     
  102     MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount = ProcessMolecules(Mols)
  103 
  104     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  105     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  106     MiscUtil.PrintInfo("Number of molecules failed during writing: %d" % WriteFailedCount)
  107     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + WriteFailedCount))
  108 
  109     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
  110     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
  111 
  112 def ProcessMolecules(Mols):
  113     """Process and filter molecules."""
  114     
  115     if OptionsInfo["MPMode"]:
  116         return ProcessMoleculesUsingMultipleProcesses(Mols)
  117     else:
  118         return ProcessMoleculesUsingSingleProcess(Mols)
  119 
  120 def ProcessMoleculesUsingSingleProcess(Mols):
  121     """Process and filter molecules using a single process."""
  122     
  123     # Instantiate torsion library alerts class...
  124     TorsionLibraryAlertsHandle = InstantiateTorsionLibraryAlertsClass()
  125     
  126     MiscUtil.PrintInfo("\nFiltering molecules...")
  127     
  128     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  129 
  130     # Set up writers...
  131     OutfilesWriters = SetupOutfilesWriters()
  132     
  133     WriterRemaining = OutfilesWriters["WriterRemaining"]
  134     WriterFiltered = OutfilesWriters["WriterFiltered"]
  135     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  136     
  137     # Initialize alerts summary info...
  138     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  139 
  140     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  141     for Mol in Mols:
  142         MolCount += 1
  143         
  144         if Mol is None:
  145             continue
  146         
  147         if RDKitUtil.IsMolEmpty(Mol):
  148             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % RDKitUtil.GetMolName(Mol, MolCount))
  149             continue
  150 
  151         # Check for 3D flag...
  152         if not Mol.GetConformer().Is3D():
  153             MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % RDKitUtil.GetMolName(Mol, MolCount))
  154             continue
  155         
  156         ValidMolCount += 1
  157         
  158         # Identify torsion library alerts for rotatable bonds..
  159         RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionLibraryAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  160         
  161         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  162         
  163         # Write out filtered and remaining molecules...
  164         WriteStatus = True
  165         if RotBondsAlertsStatus:
  166             if OutfileFilteredMode:
  167                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  168                 if WriteStatus:
  169                     FilteredMolWriteCount += 1
  170         else:
  171             RemainingMolCount += 1
  172             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  173         
  174         if not WriteStatus:
  175             WriteFailedCount += 1
  176 
  177     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  178     CloseOutfilesWriters(OutfilesWriters)
  179 
  180     if FilteredMolWriteCount:
  181         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  182     
  183     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  184 
  185 def ProcessMoleculesUsingMultipleProcesses(Mols):
  186     """Process and filter molecules using multiprocessing."""
  187 
  188     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
  189     
  190     MPParams = OptionsInfo["MPParams"]
  191     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
  192     
  193     # Instantiate torsion strain energy alerts class to list torsion library information...
  194     TorsionLibraryAlertsHandle = InstantiateTorsionLibraryAlertsClass()
  195     
  196     # Set up writers...
  197     OutfilesWriters = SetupOutfilesWriters()
  198     
  199     WriterRemaining = OutfilesWriters["WriterRemaining"]
  200     WriterFiltered = OutfilesWriters["WriterFiltered"]
  201     WriterAlertSummary = OutfilesWriters["WriterAlertSummary"]
  202     
  203     # Initialize alerts summary info...
  204     TorsionAlertsSummaryInfo = InitializeTorsionAlertsSummaryInfo()
  205     
  206     # Setup data for initializing a worker process...
  207     MiscUtil.PrintInfo("Encoding options info and rotatable bond pattern molecule...")
  208     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  209 
  210     # Setup a encoded mols data iterable for a worker process...
  211     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  212     
  213     # Setup process pool along with data initialization for each process...
  214     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  215     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  216     
  217     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  218     
  219     # Start processing...
  220     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  221         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  222     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  223         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  224     else:
  225         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  226     
  227     (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount, FilteredMolWriteCount) = [0] * 5
  228     for Result in Results:
  229         MolCount += 1
  230         MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo = Result
  231         
  232         if EncodedMol is None:
  233             continue
  234         ValidMolCount += 1
  235         
  236         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  237         
  238         TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo)
  239         
  240         # Write out filtered and remaining molecules...
  241         WriteStatus = True
  242         if RotBondsAlertsStatus:
  243             if OutfileFilteredMode:
  244                 WriteStatus = WriteMolecule(WriterFiltered, Mol, RotBondsAlertsInfo)
  245                 if WriteStatus:
  246                     FilteredMolWriteCount += 1
  247         else:
  248             RemainingMolCount += 1
  249             WriteStatus = WriteMolecule(WriterRemaining, Mol, RotBondsAlertsInfo)
  250         
  251         if not WriteStatus:
  252             WriteFailedCount += 1
  253     
  254     WriteTorsionAlertsSummaryInfo(WriterAlertSummary, TorsionAlertsSummaryInfo)
  255     CloseOutfilesWriters(OutfilesWriters)
  256     
  257     if FilteredMolWriteCount:
  258         WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo)
  259     
  260     return (MolCount, ValidMolCount, RemainingMolCount, WriteFailedCount)
  261 
  262 def InitializeWorkerProcess(*EncodedArgs):
  263     """Initialize data for a worker process."""
  264     
  265     global Options, OptionsInfo
  266 
  267     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  268 
  269     # Decode Options and OptionInfo...
  270     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  271     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  272 
  273     # Instantiate torsion slibrary alerts class...
  274     OptionsInfo["TorsionLibraryAlertsHandle"] = InstantiateTorsionLibraryAlertsClass(Quiet = True)
  275 
  276 def WorkerProcess(EncodedMolInfo):
  277     """Process data for a worker process."""
  278 
  279     MolIndex, EncodedMol = EncodedMolInfo
  280     
  281     if EncodedMol is None:
  282         return [MolIndex, None, False, None]
  283     
  284     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  285     if RDKitUtil.IsMolEmpty(Mol):
  286         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  287         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  288         return [MolIndex, None, False, None]
  289         
  290     # Check for 3D flag...
  291     if not Mol.GetConformer().Is3D():
  292         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  293         MiscUtil.PrintWarning("3D tag is not set. Ignoring molecule: %s\n" % MolName)
  294         return [MolIndex, None, False, None]
  295     
  296     # Identify torsion library alerts for rotatable bonds..
  297     TorsionTorsionLibraryAlertsHandle = OptionsInfo["TorsionLibraryAlertsHandle"]
  298     RotBondsAlertsStatus, RotBondsAlertsInfo = TorsionTorsionLibraryAlertsHandle.IdentifyTorsionLibraryAlertsForRotatableBonds(Mol)
  299 
  300     return [MolIndex, EncodedMol, RotBondsAlertsStatus, RotBondsAlertsInfo]
  301 
  302 def InitializeTorsionAlertsSummaryInfo():
  303     """Initialize torsion alerts summary."""
  304 
  305     if OptionsInfo["CountMode"]:
  306         return None
  307     
  308     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  309         return None
  310     
  311     TorsionAlertsSummaryInfo = {}
  312     TorsionAlertsSummaryInfo["RuleIDs"] = []
  313 
  314     for DataLabel in ["SMARTSToRuleIDs", "RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AlertTypesMolCount"]:
  315         TorsionAlertsSummaryInfo[DataLabel] = {}
  316         
  317     return TorsionAlertsSummaryInfo
  318 
  319 def TrackTorsionAlertsSummaryInfo(TorsionAlertsSummaryInfo, RotBondsAlertsInfo):
  320     """Track torsion alerts summary information for matched torsion rules in a
  321     molecule."""
  322     
  323     if OptionsInfo["CountMode"]:
  324         return
  325     
  326     if not OptionsInfo["TrackAlertsSummaryInfo"]:
  327         return
  328 
  329     if RotBondsAlertsInfo is None:
  330         return
  331 
  332     MolAlertsInfo = {}
  333     MolAlertsInfo["RuleIDs"] = []
  334     MolAlertsInfo["AlertTypes"] = {}
  335     
  336     for ID in RotBondsAlertsInfo["IDs"]:
  337         if not RotBondsAlertsInfo["MatchStatus"][ID]:
  338             continue
  339 
  340         if OptionsInfo["OutfileAlertsOnly"]:
  341             if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
  342                 continue
  343         
  344         AlertType = RotBondsAlertsInfo["AlertTypes"][ID]
  345         TorsionRuleNodeID = RotBondsAlertsInfo["TorsionRuleNodeID"][ID]
  346         TorsionRuleSMARTS = RotBondsAlertsInfo["TorsionRuleSMARTS"][ID]
  347 
  348         # Track data for torsion alert summary information across molecules...
  349         if TorsionRuleNodeID not in TorsionAlertsSummaryInfo["RuleSMARTS"]:
  350             TorsionAlertsSummaryInfo["RuleIDs"].append(TorsionRuleNodeID)
  351             TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRuleSMARTS] = TorsionRuleNodeID
  352             
  353             TorsionAlertsSummaryInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRuleSMARTS
  354             TorsionAlertsSummaryInfo["HierarchyClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchyClassNames"][ID]
  355             TorsionAlertsSummaryInfo["HierarchySubClassName"][TorsionRuleNodeID] = RotBondsAlertsInfo["HierarchySubClassNames"][ID]
  356 
  357             TorsionAlertsSummaryInfo["TorsionRulePeaks"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRulePeaks"][ID]
  358             TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]
  359             TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]
  360             
  361             # Initialize number of alert types across all molecules...
  362             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID] = {}
  363             
  364             # Initialize number of molecules flagged by each alert type...
  365             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID] = {}
  366         
  367         if AlertType not in TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID]:
  368             TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
  369             TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] = 0
  370         
  371         TorsionAlertsSummaryInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
  372 
  373         # Track data for torsion alert information in a molecule...
  374         if TorsionRuleNodeID not in MolAlertsInfo["AlertTypes"]:
  375             MolAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  376             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID] = {}
  377         
  378         if AlertType not in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
  379             MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] = 0
  380         MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType] += 1
  381 
  382     # Track number of molecules flagged by a specific torsion alert...
  383     for TorsionRuleNodeID in MolAlertsInfo["RuleIDs"]:
  384         for AlertType in MolAlertsInfo["AlertTypes"][TorsionRuleNodeID]:
  385             if MolAlertsInfo["AlertTypes"][TorsionRuleNodeID][AlertType]:
  386                 TorsionAlertsSummaryInfo["AlertTypesMolCount"][TorsionRuleNodeID][AlertType] += 1
  387 
  388 def WriteTorsionAlertsSummaryInfo(Writer, TorsionAlertsSummaryInfo):
  389     """Write out torsion alerts summary informatio to a CSV file."""
  390     
  391     if OptionsInfo["CountMode"]:
  392         return
  393     
  394     if not OptionsInfo["OutfileSummaryMode"]:
  395         return
  396 
  397     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  398         return
  399     
  400     # Write headers...
  401     QuoteValues = True
  402     Values = ["TorsionRule", "TorsionPeaks", "Tolerances1", "Tolerances2", "HierarchyClass", "HierarchySubClass", "TorsionAlertTypes", "TorsionAlertCount", "TorsionAlertMolCount"]
  403     Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  404 
  405     SortedRuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  406 
  407     # Write alerts information...
  408     for ID in SortedRuleIDs:
  409         # Remove any double quotes in SMARTS...
  410         RuleSMARTS = TorsionAlertsSummaryInfo["RuleSMARTS"][ID]
  411         RuleSMARTS = re.sub("\"", "", RuleSMARTS, re.I)
  412         
  413         HierarchyClassName = TorsionAlertsSummaryInfo["HierarchyClassName"][ID]
  414         HierarchySubClassName = TorsionAlertsSummaryInfo["HierarchySubClassName"][ID]
  415 
  416         TorsionPeaks = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRulePeaks"][ID]], ",")
  417         TorsionRuleTolerances1 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances1"][ID]], ",")
  418         TorsionRuleTolerances2 = MiscUtil.JoinWords(["%s" % Value for Value in TorsionAlertsSummaryInfo["TorsionRuleTolerances2"][ID]], ",")
  419         
  420         AlertTypes = []
  421         AlertTypeCount = []
  422         AlertTypeMolCount = []
  423         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
  424             AlertTypes.append(AlertType)
  425             AlertTypeCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypes"][ID][AlertType])
  426             AlertTypeMolCount.append("%s" % TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType])
  427         
  428         Values = [RuleSMARTS, TorsionPeaks, TorsionRuleTolerances1, TorsionRuleTolerances2, HierarchyClassName, HierarchySubClassName, "%s" % MiscUtil.JoinWords(AlertTypes, ","), "%s" % (MiscUtil.JoinWords(AlertTypeCount, ",")), "%s" % (MiscUtil.JoinWords(AlertTypeMolCount, ","))]
  429         Writer.write("%s\n" % MiscUtil.JoinWords(Values, ",", QuoteValues))
  430 
  431 def GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo):
  432     """Sort torsion rule IDs by  alert types molecule count in descending order."""
  433 
  434     SortedRuleIDs = []
  435     
  436     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  437     if len(RuleIDs) == 0:
  438         return SortedRuleIDs
  439     
  440     # Setup a map from AlertTypesMolCount to IDs for sorting alerts...
  441     RuleIDs = TorsionAlertsSummaryInfo["RuleIDs"]
  442     MolCountMap = {}
  443     for ID in RuleIDs:
  444         MolCount = 0
  445         for AlertType in sorted(TorsionAlertsSummaryInfo["AlertTypes"][ID]):
  446             MolCount += TorsionAlertsSummaryInfo["AlertTypesMolCount"][ID][AlertType]
  447         MolCountMap[ID] = MolCount
  448 
  449     SortedRuleIDs = sorted(RuleIDs, key = lambda ID: MolCountMap[ID], reverse = True)
  450     
  451     return SortedRuleIDs
  452 
  453 def WriteTorsionAlertsFilteredByRulesInfo(TorsionAlertsSummaryInfo):
  454     """Write out torsion alerts SD files for individual torsion rules."""
  455     
  456     if OptionsInfo["CountMode"]:
  457         return
  458     
  459     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  460         return
  461 
  462     if len(TorsionAlertsSummaryInfo["RuleIDs"]) == 0:
  463         return
  464 
  465     # Setup a molecule reader for filtered molecules...
  466     FilteredMols  = RDKitUtil.ReadMolecules(OptionsInfo["OutfileFiltered"], **OptionsInfo["InfileParams"])
  467 
  468     # Get torsion rule IDs for writing out filtered SD files for individual torsion alert rules... 
  469     TorsionRuleIDs = GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo)
  470 
  471     # Setup writers...
  472     ByRuleOutfilesWriters = SetupByRuleOutfilesWriters(TorsionRuleIDs)
  473     
  474     for Mol in FilteredMols:
  475         # Retrieve torsion alerts info...
  476         TorsionAlertsInfo = RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo)
  477         if TorsionAlertsInfo is None:
  478             continue
  479         
  480         for TorsionRuleID in TorsionRuleIDs:
  481             if TorsionRuleID not in TorsionAlertsInfo["RuleSMARTS"]:
  482                 continue
  483             
  484             WriteMoleculeFilteredByRuleID(ByRuleOutfilesWriters[TorsionRuleID], Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
  485         
  486     CloseByRuleOutfilesWriters(ByRuleOutfilesWriters)
  487     
  488 def GetTorsionAlertsFilteredByRuleFilesRuleIDs(TorsionAlertsSummaryInfo):
  489     """Get torsion rule IDs for writing out individual SD files filtered by torsion alert rules."""
  490     
  491     # Get torsion rule IDs triggering torsion alerts sorted in the order from the most to
  492     # the least number of unique molecules...
  493     RuleIDs = GetSortedTorsionAlertsSummaryInfoRuleIDs(TorsionAlertsSummaryInfo)
  494 
  495     # Select torsion rule IDs for writing out SD files...
  496     if not OptionsInfo["OutfilesFilteredByRulesAllMode"]:
  497         MaxRuleIDs = OptionsInfo["OutfilesFilteredByRulesMaxCount"]
  498         if MaxRuleIDs < len(RuleIDs):
  499             RuleIDs = RuleIDs[0:MaxRuleIDs]
  500     
  501     return RuleIDs
  502 
  503 def RetrieveTorsionAlertsInfo(Mol, TorsionAlertsSummaryInfo):
  504     """Parse torsion alerts data field value to retrieve alerts information for rotatable bonds."""
  505     
  506     TorsionAlertsLabel = OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]
  507     TorsionAlerts = Mol.GetProp(TorsionAlertsLabel) if Mol.HasProp(TorsionAlertsLabel) else None
  508     
  509     if TorsionAlerts is None or len(TorsionAlerts) == 0:
  510         return None
  511 
  512     # Initialize for tracking by rule IDs...
  513     TorsionAlertsInfo = {}
  514     TorsionAlertsInfo["RuleIDs"] = []
  515     
  516     for DataLabel in ["RuleSMARTS", "HierarchyClassName", "HierarchySubClassName", "TorsionRulePeaks", "TorsionRuleTolerances1", "TorsionRuleTolerances2", "AlertTypes", "AtomIndices", "TorsionAtomIndices", "TorsionAngles", "TorsionAngleViolations", "AlertTypesCount"]:
  517         TorsionAlertsInfo[DataLabel] = {}
  518         
  519     ValuesDelimiter = OptionsInfo["IntraSetValuesDelim"]
  520     TorsionAlertsSetSize = 11
  521     
  522     TorsionAlertsWords = TorsionAlerts.split()
  523     if len(TorsionAlertsWords) % TorsionAlertsSetSize:
  524         MiscUtil.PrintError("The number of space delimited values, %s, for TorsionAlerts data field in filtered SD file must be a multiple of %s." % (len(TorsionAlertsWords), TorsionAlertsSetSize))
  525 
  526     ID = 0
  527     for Index in range(0, len(TorsionAlertsWords), TorsionAlertsSetSize):
  528         ID += 1
  529         
  530         RotBondIndices, TorsionAlertType, TorsionIndices, TorsionAngle, TorsionAngleViolation, HierarchyClass, HierarchySubClass, TorsionPeaks, Tolerances1, Tolerances2, TorsionRule = TorsionAlertsWords[Index: Index + TorsionAlertsSetSize]
  531         RotBondIndices = RotBondIndices.split(ValuesDelimiter)
  532         TorsionIndices = TorsionIndices.split(ValuesDelimiter)
  533         TorsionPeaks = TorsionPeaks.split(ValuesDelimiter)
  534         Tolerances1 = Tolerances1.split(ValuesDelimiter)
  535         Tolerances2 = Tolerances2.split(ValuesDelimiter)
  536 
  537         if TorsionRule not in TorsionAlertsSummaryInfo["SMARTSToRuleIDs"]:
  538             MiscUtil.PrintWarning("The SMARTS pattern, %s, for TorsionAlerts data field in filtered SD file doesn't map to any torsion rule..." % TorsionRule)
  539             continue
  540         TorsionRuleNodeID = TorsionAlertsSummaryInfo["SMARTSToRuleIDs"][TorsionRule]
  541 
  542         # Track data for torsion alerts in a molecule...
  543         if TorsionRuleNodeID not in TorsionAlertsInfo["RuleSMARTS"]:
  544             TorsionAlertsInfo["RuleIDs"].append(TorsionRuleNodeID)
  545 
  546             TorsionAlertsInfo["RuleSMARTS"][TorsionRuleNodeID] = TorsionRule
  547             TorsionAlertsInfo["HierarchyClassName"][TorsionRuleNodeID] = HierarchyClass
  548             TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleNodeID] = HierarchySubClass
  549             TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleNodeID] = TorsionPeaks
  550             TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleNodeID] = Tolerances1
  551             TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleNodeID] = Tolerances2
  552             
  553             TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID] = []
  554             TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID] = []
  555             TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID] = []
  556             TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID] = []
  557             TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID] = []
  558 
  559             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID] = {}
  560             
  561         # Track multiple values for a rule ID...
  562         TorsionAlertsInfo["AlertTypes"][TorsionRuleNodeID].append(TorsionAlertType)
  563         TorsionAlertsInfo["AtomIndices"][TorsionRuleNodeID].append(RotBondIndices)
  564         TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleNodeID].append(TorsionIndices)
  565         TorsionAlertsInfo["TorsionAngles"][TorsionRuleNodeID].append(TorsionAngle)
  566         TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleNodeID].append(TorsionAngleViolation)
  567         
  568         # Count alert type for a rule ID...
  569         if TorsionAlertType not in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID]:
  570             TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] = 0
  571         TorsionAlertsInfo["AlertTypesCount"][TorsionRuleNodeID][TorsionAlertType] += 1
  572         
  573     return TorsionAlertsInfo
  574     
  575 def WriteMolecule(Writer, Mol, RotBondsAlertsInfo):
  576     """Write out molecule."""
  577     
  578     if OptionsInfo["CountMode"]:
  579         return True
  580 
  581     SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo)
  582 
  583     try:
  584         Writer.write(Mol)
  585     except Exception as ErrMsg:
  586         MiscUtil.PrintWarning("Failed to write molecule %s:\n%s\n" % (RDKitUtil.GetMolName(Mol), ErrMsg))
  587         return False
  588     
  589     return True
  590 
  591 def SetupMolPropertiesForAlertsInformation(Mol, RotBondsAlertsInfo):
  592     """Setup molecule properties containing alerts information for rotatable bonds."""
  593 
  594     if not OptionsInfo["OutfileAlerts"]:
  595         return
  596     
  597     # Setup rotatable bonds count...
  598     RotBondsCount = 0
  599     if RotBondsAlertsInfo is not None:
  600         RotBondsCount =  len(RotBondsAlertsInfo["IDs"])
  601     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["RotBondsCountLabel"],  "%s" % RotBondsCount)
  602     
  603     # Setup alert counts for rotatable bonds...
  604     AlertsCount = []
  605     if RotBondsAlertsInfo is not None:
  606         for AlertType in ["Green", "Orange", "Red"]:
  607             if AlertType in RotBondsAlertsInfo["Count"]:
  608                 AlertsCount.append("%s" % RotBondsAlertsInfo["Count"][AlertType])
  609             else:
  610                 AlertsCount.append("0")
  611     
  612     if len(AlertsCount):
  613         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsCountLabel"],  "%s" % MiscUtil.JoinWords(AlertsCount, " "))
  614 
  615     # Setup alert information for rotatable bonds...
  616     AlertsInfoValues = []
  617 
  618     # Delimiter for multiple values corresponding to specific set of information for
  619     # a rotatable bond. For example: TorsionAtomIndices
  620     ValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  621 
  622     # Delimiter for various values for a rotatable bond...
  623     RotBondValuesDelim = OptionsInfo["InterSetValuesDelim"]
  624     
  625     # Delimiter for values corresponding to multiple rotatable bonds...
  626     AlertsInfoValuesDelim = OptionsInfo["InterSetValuesDelim"]
  627     
  628     if RotBondsAlertsInfo is not None:
  629         for ID in RotBondsAlertsInfo["IDs"]:
  630             if not RotBondsAlertsInfo["MatchStatus"][ID]:
  631                 continue
  632             
  633             if OptionsInfo["OutfileAlertsOnly"]:
  634                 if RotBondsAlertsInfo["AlertTypes"][ID] not in OptionsInfo["SpecifiedAlertsModeList"]:
  635                     continue
  636             
  637             RotBondValues = []
  638             
  639             # Bond atom indices...
  640             Values = ["%s" % Value for Value in RotBondsAlertsInfo["AtomIndices"][ID]]
  641             RotBondValues.append(ValuesDelim.join(Values))
  642 
  643             # Alert type...
  644             RotBondValues.append(RotBondsAlertsInfo["AlertTypes"][ID])
  645 
  646             # Torsion atom indices...
  647             TorsionAtomIndices = SetupTorsionAtomIndicesValues(RotBondsAlertsInfo["TorsionAtomIndices"][ID], ValuesDelim)
  648             RotBondValues.append(TorsionAtomIndices)
  649 
  650             # Torsion angle...
  651             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngles"][ID])
  652 
  653             # Torsion angle violation...
  654             RotBondValues.append("%.2f" % RotBondsAlertsInfo["TorsionAngleViolations"][ID])
  655 
  656             # Hierarchy class and subclass names...
  657             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchyClassNames"][ID])
  658             RotBondValues.append("%s" % RotBondsAlertsInfo["HierarchySubClassNames"][ID])
  659 
  660             # Torsion rule peaks...
  661             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRulePeaks"][ID]]
  662             RotBondValues.append(ValuesDelim.join(Values))
  663             
  664             # Torsion rule tolerances...
  665             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances1"][ID]]
  666             RotBondValues.append(ValuesDelim.join(Values))
  667             Values = ["%s" % Value for Value in RotBondsAlertsInfo["TorsionRuleTolerances2"][ID]]
  668             RotBondValues.append(ValuesDelim.join(Values))
  669             
  670             # Torsion rule SMARTS...
  671             RotBondValues.append("%s" % RotBondsAlertsInfo["TorsionRuleSMARTS"][ID])
  672 
  673             # Track joined values for a rotatable bond...
  674             AlertsInfoValues.append("%s" % RotBondValuesDelim.join(RotBondValues))
  675 
  676     if len(AlertsInfoValues):
  677         Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"], "%s" % ("%s" % AlertsInfoValuesDelim.join(AlertsInfoValues)))
  678     
  679 def WriteMoleculeFilteredByRuleID(Writer, Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
  680     """Write out molecule."""
  681     
  682     if OptionsInfo["CountMode"]:
  683         return
  684 
  685     SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo)
  686         
  687     Writer.write(Mol)
  688 
  689 def SetupMolPropertiesForFilteredByRuleIDAlertsInformation(Mol, TorsionRuleID, TorsionAlertsSummaryInfo, TorsionAlertsInfo):
  690     """Setup molecule properties containing alerts information for torsion alerts
  691     fileted by Rule IDs."""
  692 
  693     # Delete torsion alerts information for rotatable bonds...
  694     if Mol.HasProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"]):
  695         Mol.ClearProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionAlertsLabel"])
  696 
  697     # Delimiter for values...
  698     IntraSetValuesDelim = OptionsInfo["IntraSetValuesDelim"]
  699     InterSetValuesDelim = OptionsInfo["InterSetValuesDelim"]
  700     
  701     # Setup alert rule information...
  702     AlertRuleInfoValues = []
  703     
  704     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchyClassName"][TorsionRuleID])
  705     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["HierarchySubClassName"][TorsionRuleID])
  706     
  707     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRulePeaks"][TorsionRuleID]]
  708     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  709     
  710     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances1"][TorsionRuleID]]
  711     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  712     Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionRuleTolerances2"][TorsionRuleID]]
  713     AlertRuleInfoValues.append(IntraSetValuesDelim.join(Values))
  714     
  715     AlertRuleInfoValues.append("%s" % TorsionAlertsInfo["RuleSMARTS"][TorsionRuleID])
  716     
  717     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleLabel"], "%s" % ("%s" % InterSetValuesDelim.join(AlertRuleInfoValues)))
  718 
  719     # Setup alerts count for torsion rule...
  720     AlertsCount = []
  721     for AlertType in ["Green", "Orange", "Red"]:
  722         if AlertType in TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID]:
  723             AlertsCount.append("%s" % TorsionAlertsInfo["AlertTypesCount"][TorsionRuleID][AlertType])
  724         else:
  725             AlertsCount.append("0")
  726     
  727     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsCountLabel"],  "%s" % (InterSetValuesDelim.join(AlertsCount)))
  728     
  729     # Setup torsion rule alerts...
  730     AlertsInfoValues = []
  731     for Index in range(0, len(TorsionAlertsInfo["AlertTypes"][TorsionRuleID])):
  732         RotBondInfoValues = []
  733         
  734         # Bond atom indices...
  735         Values = ["%s" % Value for Value in TorsionAlertsInfo["AtomIndices"][TorsionRuleID][Index]]
  736         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  737         
  738         # Alert type...
  739         RotBondInfoValues.append(TorsionAlertsInfo["AlertTypes"][TorsionRuleID][Index])
  740         
  741         # Torsion atom indices retrieved from the filtered SD file and stored as strings...
  742         Values = ["%s" % Value for Value in TorsionAlertsInfo["TorsionAtomIndices"][TorsionRuleID][Index]]
  743         RotBondInfoValues.append(IntraSetValuesDelim.join(Values))
  744         
  745         # Torsion angle...
  746         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngles"][TorsionRuleID][Index])
  747         
  748         # Torsion angle violation...
  749         RotBondInfoValues.append(TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID][Index])
  750 
  751         # Track alerts informaiton...
  752         AlertsInfoValues.append("%s" % InterSetValuesDelim.join(RotBondInfoValues))
  753     
  754     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleAlertsLabel"],  "%s" % (InterSetValuesDelim.join(AlertsInfoValues)))
  755     
  756     # Setup torsion rule alert max angle violation...
  757     TorsionAngleViolations = [float(Angle) for Angle in TorsionAlertsInfo["TorsionAngleViolations"][TorsionRuleID]]
  758     Mol.SetProp(OptionsInfo["SDFieldIDsToLabels"]["TorsionRuleMaxAngleViolationLabel"],  "%.2f" % (max(TorsionAngleViolations)))
  759 
  760 def SetupTorsionAtomIndicesValues(TorsionAtomIndicesList, ValuesDelim):
  761     """Setup torsion atom indices value for output files."""
  762 
  763     # Check for any list values in the list of torsion atom indices used as placeholders
  764     # for positions of lone pairs in torsion rules containing  N_lp...
  765     TorsionAtomsInfo = []
  766     for Value in TorsionAtomIndicesList:
  767         if type(Value) is list:
  768             TorsionAtomsInfo.append("N_lp")
  769         else:
  770             TorsionAtomsInfo.append(Value)
  771             
  772     Values = ["%s" % Value for Value in TorsionAtomsInfo]
  773     
  774     return ValuesDelim.join(Values)
  775     
  776 def SetupOutfilesWriters():
  777     """Setup molecule and summary writers."""
  778 
  779     OutfilesWriters = {"WriterRemaining": None, "WriterFiltered": None, "WriterAlertSummary": None}
  780 
  781     # Writers for SD files...
  782     WriterRemaining, WriterFiltered = SetupMoleculeWriters()
  783     OutfilesWriters["WriterRemaining"] = WriterRemaining
  784     OutfilesWriters["WriterFiltered"] = WriterFiltered
  785     
  786     # Writer for alert summary CSV file...
  787     WriterAlertSummary = SetupAlertSummaryWriter()
  788     OutfilesWriters["WriterAlertSummary"] = WriterAlertSummary
  789 
  790     return OutfilesWriters
  791 
  792 def SetupMoleculeWriters():
  793     """Setup molecule writers."""
  794     
  795     Writer = None
  796     WriterFiltered = None
  797 
  798     if OptionsInfo["CountMode"]:
  799         return (Writer, WriterFiltered)
  800 
  801     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
  802     if Writer is None:
  803         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
  804     MiscUtil.PrintInfo("\nGenerating file %s..." % OptionsInfo["Outfile"])
  805     
  806     if OptionsInfo["OutfileFilteredMode"]:
  807         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
  808         if WriterFiltered is None:
  809             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
  810         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
  811     
  812     return (Writer, WriterFiltered)
  813 
  814 def SetupAlertSummaryWriter():
  815     """Setup a alert summary writer."""
  816     
  817     Writer = None
  818     
  819     if OptionsInfo["CountMode"]:
  820         return Writer
  821         
  822     if not OptionsInfo["OutfileSummaryMode"]:
  823         return Writer
  824     
  825     Outfile = OptionsInfo["OutfileSummary"]
  826     Writer = open(Outfile, "w")
  827     if Writer is None:
  828         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  829     
  830     MiscUtil.PrintInfo("Generating file %s..." % Outfile)
  831     
  832     return Writer
  833     
  834 def CloseOutfilesWriters(OutfilesWriters):
  835     """Close outfile writers."""
  836 
  837     for WriterType, Writer in OutfilesWriters.items():
  838         if Writer is not None:
  839             Writer.close()
  840 
  841 def SetupByRuleOutfilesWriters(RuleIDs):
  842     """Setup by rule outfiles writers."""
  843 
  844     # Initialize...
  845     OutfilesWriters = {}
  846     for RuleID in RuleIDs:
  847         OutfilesWriters[RuleID] = None
  848     
  849     if OptionsInfo["CountMode"]:
  850         return OutfilesWriters
  851         
  852     if not OptionsInfo["OutfilesFilteredByRulesMode"]:
  853         return OutfilesWriters
  854     
  855     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
  856     OutfilesRoot = "%s_Filtered_TopRule" % FileName
  857     OutfilesExt = "sdf"
  858 
  859     MsgTxt = "all" if OptionsInfo["OutfilesFilteredByRulesAllMode"] else "top %s" % OptionsInfo["OutfilesFilteredByRulesMaxCount"]
  860     MiscUtil.PrintInfo("\nGenerating output files %s*.%s for %s torsion rules triggering alerts..." % (OutfilesRoot, OutfilesExt, MsgTxt))
  861     
  862     # Delete any existing output files...
  863     Outfiles = glob.glob("%s*.%s" % (OutfilesRoot, OutfilesExt))
  864     if len(Outfiles):
  865         MiscUtil.PrintInfo("Deleting existing output files %s*.%s..." % (OutfilesRoot, OutfilesExt))
  866         for Outfile in Outfiles:
  867             try:
  868                 os.remove(Outfile)
  869             except Exception as ErrMsg:
  870                 MiscUtil.PrintWarning("Failed to delete file: %s" % ErrMsg)
  871     
  872     RuleIndex = 0
  873     for RuleID in RuleIDs:
  874         RuleIndex += 1
  875         Outfile = "%s%s.%s" % (OutfilesRoot, RuleIndex, OutfilesExt)
  876         Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
  877         if Writer is None:
  878             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  879             
  880         OutfilesWriters[RuleID] = Writer
  881 
  882     return OutfilesWriters
  883 
  884 def CloseByRuleOutfilesWriters(OutfilesWriters):
  885     """Close by rule outfile writers."""
  886 
  887     for RuleID, Writer in OutfilesWriters.items():
  888         if Writer is not None:
  889             Writer.close()
  890 
  891 def InstantiateTorsionLibraryAlertsClass(Quiet = False):
  892     """Initialize torsion library alerts class."""
  893 
  894     try:
  895         TorsionLibraryAlertsHandle = TorsionLibraryAlerts(AlertsMode = OptionsInfo["AlertsMode"], MinAlertsCount = OptionsInfo["MinAlertsCount"], NitrogenLonePairAllowHydrogenNbrs = OptionsInfo["NitrogenLonePairParams"]["AllowHydrogenNbrs"], NitrogenLonePairPlanarityTolerance = OptionsInfo["NitrogenLonePairParams"]["PlanarityTolerance"], RotBondsSMARTSMode = OptionsInfo["RotBondsSMARTSMode"], RotBondsSMARTSPattern = OptionsInfo["RotBondsSMARTSPattern"], TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"])
  896     except Exception as ErrMsg:
  897         MiscUtil.PrintError("Failed to instantiate TorsionLibraryAlerts:\n%s\n" % (ErrMsg))
  898 
  899     if not Quiet:
  900         MiscUtil.PrintInfo("\nRetrieving data from library file %s..." % TorsionLibraryAlertsHandle.GetTorsionLibraryFilePath())
  901         TorsionLibraryAlertsHandle.ListTorsionLibraryInfo()
  902 
  903     return TorsionLibraryAlertsHandle
  904 
  905 
  906 def ProcessRotatableBondsSMARTSMode():
  907     """"Process SMARTS pattern for rotatable bonds."""
  908 
  909     RotBondsMode = OptionsInfo["RotBondsSMARTSMode"]
  910     
  911     RotBondsSMARTSPattern = None
  912     RotBondsSMARTSPatternSpecified = OptionsInfo["RotBondsSMARTSPatternSpecified"]
  913     
  914     if re.match("^(NonStrict|SemiStrict|Strict)$", RotBondsMode, re.I):
  915         RotBondsSMARTSPattern = None
  916     elif re.match("Specify", RotBondsMode, re.I):
  917         RotBondsSMARTSPatternSpecified = RotBondsSMARTSPatternSpecified.strip()
  918         if not len(RotBondsSMARTSPatternSpecified):
  919             MiscUtil.PrintError("Empty value specified for SMILES/SMARTS pattern in  \"--rotBondsSMARTSPattern\" option, %s." % RotBondsMode)
  920         
  921         RotBondsPatternMol = Chem.MolFromSmarts(RotBondsSMARTSPatternSpecified)
  922         if RotBondsPatternMol is None:
  923             MiscUtil.PrintError("Failed to create rotatable bonds pattern molecule. The rotatable bonds SMARTS pattern, \"%s\", specified using \"--rotBondsSMARTSPattern\" option is not valid." % (RotBondsSMARTSPatternSpecified))
  924     else:
  925         MiscUtil.PrintError("The value, %s, specified for option \"-r, --rotBondsSMARTSMode\" is not valid. " % RotBondsMode)
  926     
  927     OptionsInfo["RotBondsSMARTSPattern"] = RotBondsSMARTSPattern
  928 
  929 
  930 
  931 def ProcessSDFieldLabelsOption():
  932     """Process SD data field label option."""
  933 
  934     ParamsOptionName = "--outfileSDFieldLabels"
  935     ParamsOptionValue = Options["--outfileSDFieldLabels"]
  936     
  937     ParamsIDsToLabels = {"RotBondsCountLabel": "RotBondsCount", "TorsionAlertsCountLabel": "TorsionAlertsCount (Green Orange Red)", "TorsionAlertsLabel": "TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleLabel": "TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks Tolerances1 Tolerances2 TorsionRule)", "TorsionRuleAlertsCountLabel": "TorsionRuleAlertsCount (Green Orange Red)", "TorsionRuleAlertsLabel": "TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)", "TorsionRuleMaxAngleViolationLabel": "TorsionRuleMaxAngleViolation"}
  938     
  939     if re.match("^auto$", ParamsOptionValue, re.I):
  940         OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
  941         return
  942     
  943     # Setup a canonical paramater names...
  944     ValidParamNames = []
  945     CanonicalParamNamesMap = {}
  946     for ParamName in sorted(ParamsIDsToLabels):
  947         ValidParamNames.append(ParamName)
  948         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  949     
  950     ParamsOptionValue = ParamsOptionValue.strip()
  951     if not ParamsOptionValue:
  952         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
  953     
  954     ParamsOptionValueWords = ParamsOptionValue.split(",")
  955     if len(ParamsOptionValueWords) % 2:
  956         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
  957     
  958     # Validate paramater name and value pairs...
  959     for Index in range(0, len(ParamsOptionValueWords), 2):
  960         Name = ParamsOptionValueWords[Index].strip()
  961         Value = ParamsOptionValueWords[Index + 1].strip()
  962 
  963         CanonicalName = Name.lower()
  964         if  not CanonicalName in CanonicalParamNamesMap:
  965             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
  966 
  967         ParamName = CanonicalParamNamesMap[CanonicalName]
  968         ParamValue = Value
  969         
  970         # Set value...
  971         ParamsIDsToLabels[ParamName] = ParamValue
  972     
  973     OptionsInfo["SDFieldIDsToLabels"] = ParamsIDsToLabels
  974 
  975 def ProcessOptionNitrogenLonePairParameters():
  976     """Process nitrogen lone pair parameters option."""
  977 
  978     ParamsOptionName = "--nitrogenLonePairParams"
  979     ParamsOptionValue = Options["--nitrogenLonePairParams"]
  980     
  981     ParamsInfo = {"AllowHydrogenNbrs": True, "PlanarityTolerance": 1.0,}
  982     
  983     if re.match("^auto$", ParamsOptionValue, re.I):
  984         OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
  985         return
  986     
  987     # Setup a canonical paramater names...
  988     ValidParamNames = []
  989     CanonicalParamNamesMap = {}
  990     for ParamName in sorted(ParamsInfo):
  991         ValidParamNames.append(ParamName)
  992         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  993     
  994     ParamsOptionValue = ParamsOptionValue.strip()
  995     if not ParamsOptionValue:
  996         PrintError("No valid parameter name and value pairs specified using \"%s\" option" % ParamsOptionName)
  997     
  998     ParamsOptionValueWords = ParamsOptionValue.split(",")
  999     if len(ParamsOptionValueWords) % 2:
 1000         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"%s\" option must be an even number." % (len(ParamsOptionValueWords), ParamsOptionName))
 1001     
 1002     # Validate paramater name and value pairs...
 1003     for Index in range(0, len(ParamsOptionValueWords), 2):
 1004         Name = ParamsOptionValueWords[Index].strip()
 1005         Value = ParamsOptionValueWords[Index + 1].strip()
 1006 
 1007         CanonicalName = Name.lower()
 1008         if  not CanonicalName in CanonicalParamNamesMap:
 1009             MiscUtil.PrintError("The parameter name, %s, specified using \"%s\" is not a valid name. Supported parameter names: %s" % (Name, ParamsOptionName, " ".join(ValidParamNames)))
 1010 
 1011         ParamName = CanonicalParamNamesMap[CanonicalName]
 1012         ParamValue = Value
 1013         
 1014         if re.match("^PlanarityTolerance$", ParamName, re.I):
 1015             Value = float(Value)
 1016             if Value < 0:
 1017                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: >= 0" % (Value, Name, ParamsOptionName))
 1018             ParamValue = Value
 1019         elif re.match("^AllowHydrogenNbrs$", ParamName, re.I):
 1020             if not re.match("^(yes|no)$", Value, re.I):
 1021                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: yes or no" % (Value, Name, ParamsOptionName))
 1022             ParamValue = True if re.match("^yes$", Value, re.I) else False
 1023             
 1024         # Set value...
 1025         ParamsInfo[ParamName] = ParamValue
 1026     
 1027     OptionsInfo["NitrogenLonePairParams"] = ParamsInfo
 1028     
 1029 def ProcessOptions():
 1030     """Process and validate command line arguments and options."""
 1031     
 1032     MiscUtil.PrintInfo("Processing options...")
 1033 
 1034     # Validate options...
 1035     ValidateOptions()
 1036     
 1037     OptionsInfo["Infile"] = Options["--infile"]
 1038     ParamsDefaultInfoOverride = {"RemoveHydrogens": False}
 1039     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], InfileName = Options["--infile"], ParamsDefaultInfo = ParamsDefaultInfoOverride)
 1040     
 1041     OptionsInfo["Outfile"] = Options["--outfile"]
 1042     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 1043     
 1044     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 1045     OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 1046     OptionsInfo["OutfileFiltered"] = OutfileFiltered
 1047     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 1048     
 1049     OutfileSummary = "%s_AlertsSummary.csv" % (FileName)
 1050     OptionsInfo["OutfileSummary"] = OutfileSummary
 1051     OptionsInfo["OutfileSummaryMode"] = True if re.match("^yes$", Options["--outfileSummary"], re.I) else False
 1052 
 1053     OptionsInfo["OutfilesFilteredByRulesMode"] = True if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I) else False
 1054     OptionsInfo["TrackAlertsSummaryInfo"] = True if (OptionsInfo["OutfileSummaryMode"] or OptionsInfo["OutfilesFilteredByRulesMode"]) else False
 1055 
 1056     OutfilesFilteredByRulesMaxCount = Options["--outfilesFilteredByRulesMaxCount"]
 1057     if not re.match("^All$", OutfilesFilteredByRulesMaxCount, re.I):
 1058         OutfilesFilteredByRulesMaxCount = int(OutfilesFilteredByRulesMaxCount)
 1059     OptionsInfo["OutfilesFilteredByRulesMaxCount"] = OutfilesFilteredByRulesMaxCount
 1060     OptionsInfo["OutfilesFilteredByRulesAllMode"] = True if re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I) else False
 1061     
 1062     OptionsInfo["OutfileAlerts"] = True if re.match("^yes$", Options["--outfileAlerts"], re.I) else False
 1063 
 1064     if re.match("^yes$", Options["--outfilesFilteredByRules"], re.I):
 1065         if not re.match("^yes$", Options["--outfileAlerts"], re.I):
 1066             MiscUtil.PrintError("The value \"%s\" specified for \"--outfilesFilteredByRules\" option is not valid. The specified value is only allowed during \"yes\" value of \"--outfileAlerts\" option." % (Options["--outfilesFilteredByRules"]))
 1067     
 1068     OptionsInfo["OutfileAlertsMode"] = Options["--outfileAlertsMode"]
 1069     OptionsInfo["OutfileAlertsOnly"] = True if re.match("^AlertsOnly$", Options["--outfileAlertsMode"], re.I) else False
 1070 
 1071     ProcessSDFieldLabelsOption()
 1072     
 1073     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1074     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 1075 
 1076     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 1077     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 1078 
 1079     ProcessOptionNitrogenLonePairParameters()
 1080     
 1081     OptionsInfo["AlertsMode"] = Options["--alertsMode"]
 1082     OptionsInfo["SpecifiedAlertsModeList"] = []
 1083     if re.match("^Red$", Options["--alertsMode"], re.I):
 1084         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1085     elif re.match("^RedAndOrange$", Options["--alertsMode"], re.I):
 1086         OptionsInfo["SpecifiedAlertsModeList"].append("Red")
 1087         OptionsInfo["SpecifiedAlertsModeList"].append("Orange")
 1088     
 1089     OptionsInfo["MinAlertsCount"] = int(Options["--alertsMinCount"])
 1090 
 1091     OptionsInfo["RotBondsSMARTSMode"] = Options["--rotBondsSMARTSMode"]
 1092     OptionsInfo["RotBondsSMARTSPatternSpecified"] = Options["--rotBondsSMARTSPattern"]
 1093     ProcessRotatableBondsSMARTSMode()
 1094 
 1095     OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1096 
 1097     # Setup delimiter for writing out torsion alert information to output files...
 1098     OptionsInfo["IntraSetValuesDelim"] = ","
 1099     OptionsInfo["InterSetValuesDelim"] = " "
 1100 
 1101 def RetrieveOptions():
 1102     """Retrieve command line arguments and options."""
 1103     
 1104     # Get options...
 1105     global Options
 1106     Options = docopt(_docoptUsage_)
 1107 
 1108     # Set current working directory to the specified directory...
 1109     WorkingDir = Options["--workingdir"]
 1110     if WorkingDir:
 1111         os.chdir(WorkingDir)
 1112     
 1113     # Handle examples option...
 1114     if "--examples" in Options and Options["--examples"]:
 1115         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1116         sys.exit(0)
 1117     
 1118 def ProcessListTorsionLibraryOption():
 1119     """Process list torsion library information."""
 1120 
 1121     # Validate and process dataFile option for listing torsion library information...
 1122     OptionsInfo["TorsionLibraryFile"] = Options["--torsionLibraryFile"]
 1123     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1124         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1125 
 1126     # Instantiate TorsionLibraryAlerts using defaults...
 1127     TorsionLibraryAlertsHandle = TorsionLibraryAlerts(TorsionLibraryFilePath = OptionsInfo["TorsionLibraryFile"])
 1128     MiscUtil.PrintInfo("\nRetrieving data from torsion library file %s..." % TorsionLibraryAlertsHandle.GetTorsionLibraryFilePath())
 1129     TorsionLibraryAlertsHandle.ListTorsionLibraryInfo()
 1130 
 1131 def ValidateOptions():
 1132     """Validate option values."""
 1133 
 1134     MiscUtil.ValidateOptionTextValue("-a, --alertsMode", Options["--alertsMode"], "Red RedAndOrange")
 1135     MiscUtil.ValidateOptionIntegerValue("--alertsMinCount", Options["--alertsMinCount"], {">=": 1})
 1136     
 1137     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1138     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol")
 1139     
 1140     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd")
 1141     if re.match("^filter$", Options["--mode"], re.I):
 1142         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1143         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1144 
 1145     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 1146     
 1147     MiscUtil.ValidateOptionTextValue("--outfilesFilteredByRules", Options["--outfilesFilteredByRules"], "yes no")
 1148     if not re.match("^All$", Options["--outfilesFilteredByRulesMaxCount"], re.I):
 1149         MiscUtil.ValidateOptionIntegerValue("--outfilesFilteredByRulesMaxCount", Options["--outfilesFilteredByRulesMaxCount"], {">": 0})
 1150     
 1151     MiscUtil.ValidateOptionTextValue("--outfileSummary", Options["--outfileSummary"], "yes no")
 1152     MiscUtil.ValidateOptionTextValue("--outfileAlerts", Options["--outfileAlerts"], "yes no")
 1153     MiscUtil.ValidateOptionTextValue("--outfileAlertsMode", Options["--outfileAlertsMode"], "All AlertsOnly")
 1154     
 1155     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "filter count")
 1156     if re.match("^filter$", Options["--mode"], re.I):
 1157         if not Options["--outfile"]:
 1158             MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"filter\" value of \"-m, --mode\" option")
 1159         
 1160     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 1161     
 1162     MiscUtil.ValidateOptionTextValue("-r, --rotBondsSMARTSMode", Options["--rotBondsSMARTSMode"], "NonStrict SemiStrict Strict Specify")
 1163     if re.match("^Specify$", Options["--rotBondsSMARTSMode"], re.I):
 1164         if not Options["--rotBondsSMARTSPattern"]:
 1165             MiscUtil.PrintError("The SMARTS pattern must be specified using \"--rotBondsSMARTSPattern\" during \"Specify\" value of \"-r, --rotBondsSMARTS\" option")
 1166     
 1167     if not re.match("^auto$", Options["--torsionLibraryFile"], re.I):
 1168         MiscUtil.ValidateOptionFilePath("-t, --torsionLibraryFile", Options["--torsionLibraryFile"])
 1169 
 1170 # Setup a usage string for docopt...
 1171 _docoptUsage_ = """
 1172 RDKitFilterTorsionLibraryAlerts.py - Filter torsion library alerts
 1173 
 1174 Usage:
 1175     RDKitFilterTorsionLibraryAlerts.py  [--alertsMode <Red, RedAndOrange>] [--alertsMinCount <Number>]
 1176                                         [--infileParams <Name,Value,...>] [--mode <filter or count>] [--mp <yes or no>] [--mpParams <Name,Value,...>]
 1177                                         [--nitrogenLonePairParams <Name,Value,...>] [--outfileAlerts <yes or no>]
 1178                                         [--outfileAlertsMode <All or AlertsOnly>] [--outfileFiltered <yes or no>]
 1179                                         [--outfilesFilteredByRules <yes or no>] [--outfilesFilteredByRulesMaxCount <All or number>]
 1180                                         [--outfileSummary <yes or no>] [--outfileSDFieldLabels <Type,Label,...>]
 1181                                         [--outfileParams <Name,Value,...>] [--overwrite] [ --rotBondsSMARTSMode <NonStrict, SemiStrict,...>]
 1182                                         [--rotBondsSMARTSPattern <SMARTS>] [--torsionLibraryFile <FileName or auto>] [-w <dir>] -i <infile> -o <outfile>
 1183     RDKitFilterTorsionLibraryAlerts.py [--torsionLibraryFile <FileName or auto>] -l | --list
 1184     RDKitFilterTorsionLibraryAlerts.py -h | --help | -e | --examples
 1185 
 1186 Description:
 1187     Filter strained molecules from an input file for torsion library [ Ref 146, 152, 159 ]
 1188     alerts by matching rotatable bonds against SMARTS patterns specified for torsion
 1189     rules in a torsion library file and write out appropriate molecules to output
 1190     files. The molecules must have 3D coordinates in input file.
 1191     
 1192     The default torsion library file, TorsionLibrary.xml, is available under
 1193     MAYACHEMTOOLS/lib/python/TorsionAlerts directory.
 1194     
 1195     The data in torsion library file is organized in a hierarchical manner. It consists
 1196     of one generic class and six specific classes at the highest level. Each class
 1197     contains multiple subclasses corresponding to named functional groups or
 1198     substructure patterns. The subclasses consist of torsion rules sorted from
 1199     specific to generic torsion patterns. The torsion rule, in turn, contains a list
 1200     of peak values for torsion angles and two tolerance values. A pair of tolerance
 1201     values define torsion bins around a torsion peak value. For example:
 1202          
 1203         <library>
 1204             <hierarchyClass name="GG" id1="G" id2="G">
 1205             ...
 1206             </hierarchyClass>
 1207             <hierarchyClass name="CO" id1="C" id2="O">
 1208                 <hierarchySubClass name="Ester bond I" smarts="O=[C:2][O:3]">
 1209                     <torsionRule smarts="[O:1]=[C:2]!@[O:3]~[CH0:4]">
 1210                         <angleList>
 1211                             <angle value="0.0" tolerance1="20.00"
 1212                              tolerance2="25.00" score="56.52"/>
 1213                         </angleList>
 1214                     </torsionRule>
 1215                     ...
 1216                 ...
 1217              ...
 1218             </hierarchyClass>
 1219             <hierarchyClass name="NC" id1="N" id2="C">
 1220              ...
 1221             </hierarchyClass>
 1222             <hierarchyClass name="SN" id1="S" id2="N">
 1223             ...
 1224             </hierarchyClass>
 1225             <hierarchyClass name="CS" id1="C" id2="S">
 1226             ...
 1227             </hierarchyClass>
 1228             <hierarchyClass name="CC" id1="C" id2="C">
 1229             ...
 1230             </hierarchyClass>
 1231             <hierarchyClass name="SS" id1="S" id2="S">
 1232              ...
 1233             </hierarchyClass>
 1234         </library>
 1235         
 1236     The rotatable bonds in a 3D molecule are identified using a default SMARTS pattern.
 1237     A custom SMARTS pattern may be optionally specified to detect rotatable bonds.
 1238     Each rotatable bond is matched to a torsion rule in the torsion library and
 1239     assigned one of the following three alert categories: Green, Orange or Red. The 
 1240     rotatable bond is marked Green or Orange for the measured angle of the torsion
 1241     pattern within the first or second tolerance bins around a torsion peak.
 1242     Otherwise, it's marked Red implying that the measured angle is not observed in
 1243     the structure databases employed to generate the torsion library.
 1244 
 1245     The following output files are generated after the filtering:
 1246         
 1247         <OutfileRoot>.sdf
 1248         <OutfileRoot>_Filtered.sdf
 1249         <OutfileRoot>_AlertsSummary.csv
 1250         <OutfileRoot>_Filtered_TopRule*.sdf
 1251         
 1252     The supported input file formats are: Mol (.mol), SD (.sdf, .sd)
 1253 
 1254     The supported output file formats are: SD (.sdf, .sd)
 1255 
 1256 Options:
 1257     -a, --alertsMode <Red, RedAndOrange>  [default: Red]
 1258         Torsion library alert types to use for filtering molecules containing
 1259         rotatable bonds marked with Green, Orange, or Red alerts. Possible
 1260         values: Red or RedAndOrange.
 1261     --alertsMinCount <Number>  [default: 1]
 1262         Minimum number of rotatable bond alerts in a molecule for filtering the
 1263         molecule.
 1264     -e, --examples
 1265         Print examples.
 1266     -h, --help
 1267         Print this help message.
 1268     -i, --infile <infile>
 1269         Input file name.
 1270     --infileParams <Name,Value,...>  [default: auto]
 1271         A comma delimited list of parameter name and value pairs for reading
 1272         molecules from files. The supported parameter names for different file
 1273         formats, along with their default values, are shown below:
 1274             
 1275             SD, MOL: removeHydrogens,no,sanitize,yes,strictParsing,yes
 1276             
 1277     -l, --list
 1278         List torsion library information without performing any filtering.
 1279     -m, --mode <filter or count>  [default: filter]
 1280         Specify whether to filter molecules for torsion library [ Ref 146, 152, 159 ] alerts
 1281         by matching rotatable bonds against SMARTS patterns specified for torsion
 1282         rules and write out the rest of the molecules to an outfile or simply count
 1283         the number of matched molecules marked for filtering.
 1284     --mp <yes or no>  [default: no]
 1285         Use multiprocessing.
 1286          
 1287         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 1288         function employing lazy RDKit data iterable. This allows processing of
 1289         arbitrary large data sets without any additional requirements memory.
 1290         
 1291         All input data may be optionally loaded into memory by mp.Pool.map()
 1292         before starting worker processes in a process pool by setting the value
 1293         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 1294         
 1295         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 1296         data mode may adversely impact the performance. The '--mpParams' section
 1297         provides additional information to tune the value of 'chunkSize'.
 1298     --mpParams <Name,Value,...>  [default: auto]
 1299         A comma delimited list of parameter name and value pairs to configure
 1300         multiprocessing.
 1301         
 1302         The supported parameter names along with their default and possible
 1303         values are shown below:
 1304         
 1305             chunkSize, auto
 1306             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 1307             numProcesses, auto   [ Default: mp.cpu_count() ]
 1308         
 1309         These parameters are used by the following functions to configure and
 1310         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 1311         mp.Pool.imap().
 1312         
 1313         The chunkSize determines chunks of input data passed to each worker
 1314         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 1315         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 1316         
 1317         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 1318         automatically converts RDKit data iterable into a list, loads all data into
 1319         memory, and calculates the default chunkSize using the following method
 1320         as shown in its code:
 1321         
 1322             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 1323             if extra: chunkSize += 1
 1324         
 1325         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 1326         and 100 data items.
 1327         
 1328         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 1329         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 1330         data into memory. Consequently, the size of input data is not known a priori.
 1331         It's not possible to estimate an optimal value for the chunkSize. The default 
 1332         chunkSize is set to 1.
 1333         
 1334         The default value for the chunkSize during 'Lazy' data mode may adversely
 1335         impact the performance due to the overhead associated with exchanging
 1336         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 1337         a larger value during 'Lazy' input data mode, based on the size of your input
 1338         data and number of processes in the process pool.
 1339         
 1340         The mp.Pool.map() function waits for all worker processes to process all
 1341         the data and return the results. The mp.Pool.imap() function, however,
 1342         returns the the results obtained from worker processes as soon as the
 1343         results become available for specified chunks of data.
 1344         
 1345         The order of data in the results returned by both mp.Pool.map() and 
 1346         mp.Pool.imap() functions always corresponds to the input data.
 1347     -n, --nitrogenLonePairParams <Name,Value,...>  [default: auto]
 1348         A comma delimited list of parameter name and value pairs to match
 1349         torsion SMARTS patterns containing non-standard construct 'N_lp'
 1350         corresponding to nitrogen lone pair.
 1351         
 1352         The supported parameter names along with their default and possible
 1353         values are shown below:
 1354         
 1355             allowHydrogenNbrs, yes   [ Possible values: yes or no ]
 1356             planarityTolerance, 1  [Possible values: >=0] 
 1357             
 1358         These parameters are used during the matching of torsion rules containing
 1359         'N_lp' in their SMARTS patterns. The 'allowHydrogensNbrs' allows the use
 1360         hydrogen neighbors attached to nitrogen during the determination of its
 1361         planarity. The 'planarityTolerance' in degrees represents the tolerance
 1362         allowed for nitrogen to be considered coplanar with its three neighbors.
 1363         
 1364         The torsion rules containing 'N_lp' in their SMARTS patterns are categorized
 1365         into the following two types of rules:
 1366          
 1367             TypeOne:  
 1368             
 1369             [CX4:1][CX4H2:2]!@[NX3;"N_lp":3][CX4:4]
 1370             [C:1][CX4H2:2]!@[NX3;"N_lp":3][C:4]
 1371             ... ... ...
 1372          
 1373             TypeTwo:  
 1374             
 1375             [!#1:1][CX4:2]!@[NX3;"N_lp":3]
 1376             [C:1][$(S(=O)=O):2]!@["N_lp":3]
 1377             ... ... ...
 1378             
 1379         The torsions are matched to torsion rules containing 'N_lp' using specified
 1380         SMARTS patterns without the 'N_lp' along with additional constraints using
 1381         the following methodology:
 1382             
 1383             TypeOne:  
 1384             
 1385             . SMARTS pattern must contain four mapped atoms and the third
 1386                 mapped atom must be a nitrogen matched with 'NX3:3'
 1387             . Nitrogen atom must have 3 neighbors. The 'allowHydrogens'
 1388                 parameter controls inclusion of hydrogens as its neighbors.
 1389             . Nitrogen atom and its 3 neighbors must be coplanar.
 1390                 'planarityTolerance' parameter provides tolerance in degrees
 1391                 for nitrogen to be considered coplanar with its 3 neighbors.
 1392             
 1393             TypeTwo:  
 1394             
 1395             . SMARTS pattern must contain three mapped atoms and the third
 1396                 mapped atom must be a nitrogen matched with 'NX3:3'. The 
 1397                 third mapped atom may contain only 'N_lp:3' The missing 'NX3'
 1398                 is automatically detected.
 1399             . Nitrogen atom must have 3 neighbors. 'allowHydrogens'
 1400                 parameter controls inclusion of hydrogens as neighbors.
 1401             . Nitrogen atom and its 3 neighbors must not be coplanar.
 1402                 'planarityTolerance' parameter provides tolerance in degrees
 1403                 for nitrogen to be considered coplanar with its 3 neighbors.
 1404             . Nitrogen lone pair position equivalent to VSEPR theory is
 1405                 determined based on the position of nitrogen and its neighbors.
 1406                 A vector normal to 3 nitrogen neighbors is calculated and added
 1407                 to the coordinates of nitrogen atom to determine the approximate
 1408                 position of the lone pair. It is used as the fourth position to
 1409                 calculate the torsion angle.
 1410             
 1411     -o, --outfile <outfile>
 1412         Output file name.
 1413     --outfileAlerts <yes or no>  [default: yes]
 1414         Write out alerts information to SD output files.
 1415     --outfileAlertsMode <All or AlertsOnly>  [default: AlertsOnly]
 1416         Write alerts information to SD output files for all alerts or only for alerts
 1417         specified by '--AlertsMode' option. Possible values: All or AlertsOnly
 1418         This option is only valid for 'Yes' value of '--outfileAlerts' option.
 1419         
 1420         The following alerts information is added to SD output files using
 1421         'TorsionAlerts' data field:
 1422             
 1423             RotBondIndices TorsionAlert TorsionIndices TorsionAngle
 1424             TorsionAngleViolation HierarchyClass HierarchySubClass
 1425             TorsionRule TorsionPeaks Tolerances1 Tolerances2
 1426             
 1427         The 'RotBondsCount' and 'TorsionAlertsCount' data fields are always added
 1428         to SD output files containing both remaining and filtered molecules.
 1429         
 1430         Format:
 1431             
 1432             > <RotBondsCount>
 1433             Number
 1434             
 1435             > <TorsionAlertsCount (Green Orange Red)>
 1436             Number Number Number
 1437             
 1438             > <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 1439                 TorsionAngle TorsionAngleViolation HierarchyClass
 1440                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 1441                 TorsionRule)>
 1442             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 1443             AtomIndex4 Angle AngleViolation ClassName SubClassName
 1444             CommaDelimPeakValues CommaDelimTol1Values CommDelimTol2Values
 1445             SMARTS ... ... ...
 1446              ... ... ...
 1447             
 1448         A set of 11 values is written out as value of 'TorsionAlerts' data field for
 1449         each torsion in a molecule. The space character is used as a delimiter
 1450         to separate values with in a set and across set. The comma character
 1451         is used to delimit multiple values for each value in a set.
 1452         
 1453         The 'RotBondIndices' and 'TorsionIndices' contain 2 and 4 comma delimited
 1454         values representing atom indices for a rotatable bond and matched torsion.
 1455         The 'TorsionPeaks',  'Tolerances1', and 'Tolerances2' contain same number
 1456         of comma delimited values corresponding to  torsion angle peaks and
 1457         tolerance intervals specified in torsion library. For example:
 1458             
 1459             ... ... ...
 1460             >  <RotBondsCount>  (1) 
 1461             7
 1462             
 1463             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 1464             3 2 2
 1465             
 1466             >  <TorsionAlerts (RotBondIndices TorsionAlert TorsionIndices
 1467                 TorsionAngle TorsionAngleViolation HierarchyClass
 1468                 HierarchySubClass TorsionPeaks Tolerances1 Tolerances2
 1469                 TorsionRule)>
 1470             1,2 Red 32,2,1,0 0.13 149.87 NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]
 1471             ([cH,nX2H0])!@[NX3H1:3][CX4:4] 8,9 Red 10,9,8,28 -0.85 GG
 1472             None -90.0,90.0 30.0,30.0 60.0,60.0 [cH1:1][a:2]([cH1])!@[a:3]
 1473             ([cH0])[cH0:4]
 1474             ... ... ...
 1475             
 1476     --outfileFiltered <yes or no>  [default: yes]
 1477         Write out a file containing filtered molecules. Its name is automatically
 1478         generated from the specified output file. Default: <OutfileRoot>_
 1479         Filtered.<OutfileExt>.
 1480     --outfilesFilteredByRules <yes or no>  [default: yes]
 1481         Write out SD files containing filtered molecules for individual torsion
 1482         rules triggering alerts in molecules. The name of SD files are automatically
 1483         generated from the specified output file. Default file names: <OutfileRoot>_
 1484         Filtered_TopRule*.sdf
 1485                 
 1486         The following alerts information is added to SD output files:
 1487             
 1488             > <RotBondsCount>
 1489             Number
 1490             
 1491             >  <TorsionAlertsCount (Green Orange Red)> 
 1492             Number Number Number
 1493             
 1494             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 1495                 Tolerances1 Tolerances2 TorsionRule)> 
 1496             ClassName SubClassName CommaDelimPeakValues CommaDelimTol1Values
 1497             CommDelimTol2Values SMARTS ... ... ...
 1498              ... ... ...
 1499             
 1500             > <TorsionRuleAlertsCount (Green Orange Red)>
 1501             Number Number Number
 1502             
 1503             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 1504                 TorsionAngle TorsionAngleViolation)>
 1505             AtomIndex2,AtomIndex3  AlertType AtomIndex1,AtomIndex2,AtomIndex3,
 1506             AtomIndex4 Angle AngleViolation ... ... ...
 1507             
 1508             >  <TorsionRuleMaxAngleViolation>
 1509             Number
 1510              ... ... ...
 1511             
 1512         For example:
 1513             
 1514             ... ... ...
 1515             >  <RotBondsCount>  (1) 
 1516             7
 1517              
 1518             >  <TorsionAlertsCount (Green Orange Red)>  (1) 
 1519             3 2 2
 1520             
 1521             >  <TorsionRule (HierarchyClass HierarchySubClass TorsionPeaks
 1522                 Tolerances1 Tolerances2 TorsionRule)>  (1) 
 1523             NC Anilines 180.0 10.0 30.0 [cH0:1][c:2]([cH,nX2H0])!@[NX3H1:3][CX4:4]
 1524             
 1525             >  <TorsionRuleAlertsCount (Green Orange Red)>  (1) 
 1526             0 0 1
 1527             
 1528             >  <TorsionRuleAlerts (RotBondIndices TorsionAlert TorsionIndices
 1529                 TorsionAngle TorsionAngleViolation)>  (1) 
 1530             1,2 Red 32,2,1,0 0.13 149.87
 1531             
 1532             >  <TorsionRuleMaxAngleViolation>  (1) 
 1533             149.87
 1534             ... ... ...
 1535             
 1536     --outfilesFilteredByRulesMaxCount <All or number>  [default: 10]
 1537         Write out SD files containing filtered molecules for specified number of
 1538         top N torsion rules triggering alerts for the largest number of molecules
 1539         or for all torsion rules triggering alerts across all molecules.
 1540     --outfileSummary <yes or no>  [default: yes] 
 1541         Write out a CVS text file containing summary of torsions rules responsible
 1542         for triggering torsion alerts. Its name is automatically generated from the
 1543         specified output file. Default: <OutfileRoot>_AlertsSummary.csv.
 1544         
 1545         The following alerts information is written to summary text file:
 1546             
 1547             TorsionRule, TorsionPeaks, Tolerances1, Tolerances2,
 1548             HierarchyClass, HierarchySubClass, TorsionAlertType,
 1549             TorsionAlertCount, TorsionAlertMolCount
 1550              
 1551         The double quotes characters are removed from SMART patterns before
 1552         before writing them to a CSV file. In addition, the torsion rules are sorted by
 1553         TorsionAlertMolCount. For example:
 1554             
 1555             "TorsionRule","TorsionPeaks","Tolerances1","Tolerances2",
 1556                 "HierarchyClass","HierarchySubClass","TorsionAlertTypes",
 1557                 "TorsionAlertCount","TorsionAlertMolCount"
 1558             "[!#1:1][CX4H2:2]!@[CX4H2:3][!#1:4]","-60.0,60.0,180.0",
 1559                 "20.0,20.0,20.0","30.0,30.0,30.0","CC","None/[CX4:2][CX4:3]",
 1560                 "Red","16","11"
 1561             ... ... ...
 1562             
 1563     --outfileSDFieldLabels <Type,Label,...>  [default: auto]
 1564         A comma delimited list of SD data field type and label value pairs for writing
 1565         torsion alerts information along with molecules to SD files.
 1566         
 1567         The supported SD data field label type along with their default values are
 1568         shown below:
 1569             
 1570             For all SD files:
 1571             
 1572             RotBondsCountLabel, RotBondsCount
 1573             TorsionAlertsCountLabel, TorsionAlertsCount (Green Orange Red)
 1574             TorsionAlertsLabel, TorsionAlerts (RotBondIndices TorsionAlert
 1575                 TorsionIndices TorsionAngle TorsionAngleViolation
 1576                 HierarchyClass HierarchySubClass TorsionPeaks Tolerances1
 1577                 Tolerances2 TorsionRule)
 1578             
 1579             For individual SD files filtered by torsion rules:
 1580             
 1581             TorsionRuleLabel, TorsionRule (HierarchyClass HierarchySubClass
 1582                 TorsionPeaks Tolerances1 Tolerances2 TorsionRule)
 1583             TorsionRuleAlertsCountLabel, TorsionRuleAlertsCount (Green Orange
 1584                 Red)
 1585             TorsionRuleAlertsLabel, TorsionRuleAlerts (RotBondIndices
 1586                 TorsionAlert TorsionIndices TorsionAngle TorsionAngleViolation)
 1587             TorsionRuleMaxAngleViolationLabel, TorsionRuleMaxAngleViolation
 1588             
 1589     --outfileParams <Name,Value,...>  [default: auto]
 1590         A comma delimited list of parameter name and value pairs for writing
 1591         molecules to files. The supported parameter names for different file
 1592         formats, along with their default values, are shown below:
 1593             
 1594             SD: kekulize,yes,forceV3000,no
 1595             
 1596     --overwrite
 1597         Overwrite existing files.
 1598     -r, --rotBondsSMARTSMode <NonStrict, SemiStrict,...>  [default: SemiStrict]
 1599         SMARTS pattern to use for identifying rotatable bonds in a molecule
 1600         for matching against torsion rules in the torsion library. Possible values:
 1601         NonStrict, SemiStrict, Strict or Specify. The rotatable bond SMARTS matches
 1602         are filtered to ensure that each atom in the rotatable bond is attached to
 1603         at least two heavy atoms.
 1604         
 1605         The following SMARTS patterns are used to identify rotatable bonds for
 1606         different modes:
 1607             
 1608             NonStrict: [!$(*#*)&!D1]-&!@[!$(*#*)&!D1]
 1609             
 1610             SemiStrict:
 1611             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1612             &!$(C([CH3])([CH3])[CH3])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1613             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1614             
 1615             Strict:
 1616             [!$(*#*)&!D1&!$(C(F)(F)F)&!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)
 1617             &!$(C([CH3])([CH3])[CH3])&!$([CD3](=[N,O,S])-!@[#7,O,S!D1])
 1618             &!$([#7,O,S!D1]-!@[CD3]=[N,O,S])&!$([CD3](=[N+])-!@[#7!D1])
 1619             &!$([#7!D1]-!@[CD3]=[N+])]-!@[!$(*#*)&!D1&!$(C(F)(F)F)
 1620             &!$(C(Cl)(Cl)Cl)&!$(C(Br)(Br)Br)&!$(C([CH3])([CH3])[CH3])]
 1621             
 1622         The 'NonStrict' and 'Strict' SMARTS patterns are available in RDKit. The 
 1623         'NonStrict' SMARTS pattern corresponds to original Daylight SMARTS
 1624          specification for rotatable bonds. The 'SemiStrict' SMARTS pattern is 
 1625          derived from 'Strict' SMARTS patterns for its usage in this script.
 1626         
 1627         You may use any arbitrary SMARTS pattern to identify rotatable bonds by
 1628         choosing 'Specify' value for '-r, --rotBondsSMARTSMode' option and providing its
 1629         value via '--rotBondsSMARTSPattern' option.
 1630     --rotBondsSMARTSPattern <SMARTS>
 1631         SMARTS pattern for identifying rotatable bonds. This option is only valid
 1632         for 'Specify' value of '-r, --rotBondsSMARTSMode' option.
 1633     -t, --torsionLibraryFile <FileName or auto>  [default: auto]
 1634         Specify a XML file name containing data for torsion library hierarchy
 1635         or use default file, TorsionLibrary.xml, available in
 1636         MAYACHEMTOOLS/lib/Python/TorsionAlerts directory.
 1637         
 1638         The format of data in local XML file must match format of the data in Torsion
 1639         Library [ Ref 146, 152, 159 ] file available in MAYACHEMTOOLS directory.
 1640     -w, --workingdir <dir>
 1641         Location of working directory which defaults to the current directory.
 1642 
 1643 Examples:
 1644     To filter molecules containing any rotatable bonds marked with Red alerts
 1645     based on torsion rules in the torsion library and write out SD files containing
 1646     remaining and filtered molecules, and individual SD files for torsion rules
 1647     triggering alerts along with appropriate torsion information for red alerts,
 1648     type:
 1649 
 1650         % RDKitFilterTorsionLibraryAlerts.py -i Sample3D.sdf -o Sample3DOut.sdf
 1651 
 1652     To run the first example for only counting number of alerts without writing
 1653     out any SD files, type:
 1654 
 1655         % RDKitFilterTorsionLibraryAlerts.py -m count -i Sample3D.sdf -o
 1656           Sample3DOut.sdf
 1657     
 1658     To run the first example for filtertering molecules marked with Orange or
 1659     Red alerts and write out SD files, tye:
 1660 
 1661         % RDKitFilterTorsionLibraryAlerts.py -m Filter --alertsMode RedAndOrange
 1662           -i Sample3D.sdf -o Sample3DOut.sdf
 1663     
 1664     To run the first example for filtering molecules and writing out torsion
 1665     information for all alert types to SD files, type:
 1666 
 1667         % RDKitFilterTorsionLibraryAlerts.py --outfileAlertsMode All
 1668           -i Sample3D.sdf -o Sample3DOut.sdf
 1669 
 1670     To run the first example for filtering molecules in multiprocessing mode on
 1671     all available CPUs without loading all data into memory and write out SD files,
 1672     type:
 1673 
 1674         % RDKitFilterTorsionLibraryAlerts.py --mp yes -i Sample3D.sdf
 1675          -o Sample3DOut.sdf
 1676 
 1677     To run the first example for filtering molecules in multiprocessing mode on
 1678     all available CPUs by loading all data into memory and write out a SD files,
 1679     type:
 1680 
 1681         % RDKitFilterTorsionLibraryAlerts.py  --mp yes --mpParams
 1682           "inputDataMode, InMemory" -i Sample3D.sdf  -o Sample3DOut.sdf
 1683 
 1684     To run the first example for filtering molecules in multiprocessing mode on
 1685     specific number of CPUs and chunksize without loading all data into memory
 1686     and write out SD files, type:
 1687 
 1688         % RDKitFilterTorsionLibraryAlerts.py --mp yes --mpParams
 1689           "inputDataMode,lazy,numProcesses,4,chunkSize,8"  -i Sample3D.sdf
 1690           -o Sample3DOut.sdf
 1691 
 1692     To list information about default torsion library file without performing any
 1693     filtering, type:
 1694 
 1695         % RDKitFilterTorsionLibraryAlerts.py -l
 1696 
 1697     To list information about a local torsion library XML file without performing
 1698     any, filtering, type:
 1699 
 1700         % RDKitFilterTorsionLibraryAlerts.py --torsionLibraryFile
 1701           TorsionLibrary.xml -l
 1702 
 1703 Author:
 1704     Manish Sud (msud@san.rr.com)
 1705 
 1706 Collaborator:
 1707     Pat Walters
 1708 
 1709 Acknowledgments:
 1710     Wolfgang Guba, Patrick Penner, and Levi Pierce
 1711 
 1712 See also:
 1713     RDKitFilterChEMBLAlerts.py, RDKitFilterPAINS.py, RDKitFilterTorsionStrainEnergyAlerts.py,
 1714     RDKitConvertFileFormat.py, RDKitSearchSMARTS.py
 1715 
 1716 Copyright:
 1717     Copyright (C) 2024 Manish Sud. All rights reserved.
 1718 
 1719     This script uses the Torsion Library jointly developed by the University
 1720     of Hamburg, Center for Bioinformatics, Hamburg, Germany and
 1721     F. Hoffmann-La-Roche Ltd., Basel, Switzerland.
 1722 
 1723     The functionality available in this script is implemented using RDKit, an
 1724     open source toolkit for cheminformatics developed by Greg Landrum.
 1725 
 1726     This file is part of MayaChemTools.
 1727 
 1728     MayaChemTools is free software; you can redistribute it and/or modify it under
 1729     the terms of the GNU Lesser General Public License as published by the Free
 1730     Software Foundation; either version 3 of the License, or (at your option) any
 1731     later version.
 1732 
 1733 """
 1734 
 1735 if __name__ == "__main__":
 1736     main()