MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitDrawMoleculesAndDataTable.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2021 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using RDKit, an
    9 # open source toolkit for cheminformatics developed by Greg Landrum.
   10 #
   11 # This file is part of MayaChemTools.
   12 #
   13 # MayaChemTools is free software; you can redistribute it and/or modify it under
   14 # the terms of the GNU Lesser General Public License as published by the Free
   15 # Software Foundation; either version 3 of the License, or (at your option) any
   16 # later version.
   17 #
   18 # MayaChemTools is distributed in the hope that it will be useful, but without
   19 # any warranty; without even the implied warranty of merchantability of fitness
   20 # for a particular purpose.  See the GNU Lesser General Public License for more
   21 # details.
   22 #
   23 # You should have received a copy of the GNU Lesser General Public License
   24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   26 # Boston, MA, 02111-1307, USA.
   27 #
   28 
   29 from __future__ import print_function
   30 
   31 # Add local python path to the global path and import standard library modules...
   32 import os
   33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   34 import time
   35 import re
   36 import random
   37 
   38 # RDKit imports...
   39 try:
   40     from rdkit import rdBase
   41     from rdkit import Chem
   42     from rdkit.Chem import AllChem
   43 except ImportError as ErrMsg:
   44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   46     sys.exit(1)
   47 
   48 # MayaChemTools imports...
   49 try:
   50     from docopt import docopt
   51     import MiscUtil
   52     import RDKitUtil
   53 except ImportError as ErrMsg:
   54     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   55     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   56     sys.exit(1)
   57 
   58 ScriptName = os.path.basename(sys.argv[0])
   59 Options = {}
   60 OptionsInfo = {}
   61 
   62 def main():
   63     """Start execution of the script"""
   64     
   65     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
   66     
   67     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   68     
   69     # Retrieve command line arguments and options...
   70     RetrieveOptions()
   71     
   72     # Process and validate command line arguments and options...
   73     ProcessOptions()
   74     
   75     # Perform actions required by the script...
   76     GenerateMoleculesAndDataTable()
   77     
   78     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   79     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   80 
   81 def GenerateMoleculesAndDataTable():
   82     """Generate a HTML table containing molecules and alphanumerical data."""
   83     
   84     # Retrieve data...
   85     ValidMols = RetrieveMoleculesAndData()
   86 
   87     # Setup data type map...
   88     DataMap = IdentifyStructureAndNumericalData(ValidMols)
   89 
   90     # Validate data labels used to specify highlighting data...
   91     ValidateSpecifiedDataLabels(DataMap)
   92 
   93     # Validate show molecule name option...
   94     ValidateShowMolNameOption(DataMap)
   95     
   96     # Compute 2D coordinates before alignment...
   97     if OptionsInfo["Compute2DCoords"]:
   98         MiscUtil.PrintInfo("\nComputing 2D coordinates for primary structure data...")
   99         for Mol in ValidMols:
  100             AllChem.Compute2DCoords(Mol)
  101             
  102     # Perform alignment to a common template for primary molecular structure data...
  103     PerformAlignment(ValidMols)
  104 
  105     # Write out a HTML file...
  106     WriteHTMLTableFile(ValidMols, DataMap)
  107 
  108 def WriteHTMLTableFile(ValidMols, DataMap):
  109     """Write out a HTML table file."""
  110     
  111     Outfile = OptionsInfo["Outfile"]
  112     
  113     Writer = open(Outfile, "w")
  114     if Writer is None:
  115         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  116     
  117     MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile)
  118 
  119     WriteHTMLPageHeader(Writer, DataMap)
  120     WriteHTMLPageTitle(Writer)
  121     
  122     WriteHTMLTableHeader(Writer)
  123     WriteHTMLTableRows(Writer, ValidMols, DataMap)
  124     WriteHTMLTableEnd(Writer)
  125     
  126     WriteHTMLPageFooter(Writer)
  127     WriteHTMLPageEnd(Writer)
  128     
  129     if Writer is not None:
  130         Writer.close()
  131 
  132 def WriteHTMLTableRows(Writer, ValidMols, DataMap):
  133     """Write out HTML table rows."""
  134 
  135     WriteTableHeaderRow(Writer, ValidMols, DataMap)
  136     WriteTableDataRows(Writer, ValidMols, DataMap)
  137     WriteTableFooterRow(Writer, ValidMols, DataMap)
  138 
  139 def WriteTableDataRows(Writer, ValidMols, DataMap):
  140     """Write out table data row."""
  141 
  142     Writer.write("""        <tbody>\n""")
  143     
  144     MolCount = 0
  145     for Mol in ValidMols:
  146         MolCount += 1
  147         Writer.write("""          <tr>\n""")
  148 
  149         if OptionsInfo["CounterCol"]:
  150             Writer.write("""            <td></td>\n""")
  151             
  152         SetupPrimaryStructureTableData(Writer, Mol)
  153         
  154         if OptionsInfo["ShowMolName"]:
  155             MolName = RDKitUtil.GetMolName(Mol, MolCount)
  156             WrappedMolName = MiscUtil.WrapText(MolName, "<br/>", OptionsInfo["WrapTextWidth"])
  157             Writer.write("""            <td>%s</td>\n""" % WrappedMolName)
  158 
  159         # Set up rest of the data..
  160         AvailableDataLabelsMap = Mol.GetPropsAsDict(includePrivate = False, includeComputed = False)
  161         for DataLabel in DataMap["DataLabels"]:
  162             if not DataLabel in AvailableDataLabelsMap:
  163                 Writer.write("""            <td></td>\n""")
  164                 continue
  165 
  166             # Check for empty value...
  167             DataValue = "%s" % AvailableDataLabelsMap[DataLabel]
  168             DataValue = DataValue.strip()
  169             if MiscUtil.IsEmpty(DataValue):
  170                 Writer.write("""            <td></td>\n""")
  171                 continue
  172             
  173             if DataMap["StructureDataMap"][DataLabel]:
  174                 SetupNonPrimaryStructureTableData(Writer, DataLabel, DataValue, DataMap)
  175             else:
  176                 SetupAlphanumericTableData(Writer, DataLabel, DataValue, DataMap)
  177         
  178         Writer.write("""          </tr>\n""")
  179         
  180     Writer.write("""        </tbody>\n""")
  181 
  182 def SetupPrimaryStructureTableData(Writer, Mol):
  183     """Set up an inline SVG image for primary structure data for a table cell."""
  184     
  185     HightlightAtomList = SetupAtomListToHighlight(Mol, "Structure")
  186     SVGImageTag = SetupMolInLineSVGImageTag(Mol, HightlightAtomList)
  187     
  188     Writer.write("""            <td bgcolor="white"><%s></td>\n""" % SVGImageTag)
  189 
  190 def SetupNonPrimaryStructureTableData(Writer, DataLabel, DataValue, DataMap):
  191     """Set up an inline SVG image for non primary structure data cell."""
  192 
  193     WrappedDataValue = DataValue
  194     if OptionsInfo["WrapText"]:
  195         WrappedDataValue = MiscUtil.WrapText(DataValue, "<br/>", OptionsInfo["WrapTextWidth"])
  196 
  197     if DataMap["SMILESDataMap"][DataLabel]:
  198         Mol = Chem.MolFromSmiles(DataValue, sanitize = False)
  199         Mol.UpdatePropertyCache(strict = False)
  200     else:
  201         MiscUtil.PrintWarning("\nIgnoring uknown structure data column type with column label %s: %s\n" % (DataLabel, DataValue))
  202         Writer.write("""            <td>%s</td>\n""" % WrappedDataValue)
  203         return
  204 
  205     if Mol is None:
  206         MiscUtil.PrintWarning("\nSMILES parsing failed for data label %s: %s\n" % (DataLabel, DataValue))
  207         Writer.write("""            <td>%s</td>\n""" % WrappedDataValue)
  208         return
  209     elif  not Mol.GetNumHeavyAtoms():
  210         Writer.write("""            <td>%s</td>\n""" % WrappedDataValue)
  211         return
  212     elif AllChem.Compute2DCoords(Mol) < 0:
  213         Writer.write("""            <td>%s</td>\n""" % WrappedDataValue)
  214         return
  215     
  216     HightlightAtomList = SetupAtomListToHighlight(Mol, DataLabel)
  217     SVGImageTag = SetupMolInLineSVGImageTag(Mol, HightlightAtomList)
  218     
  219     Writer.write("""            <td bgcolor="white"><%s></td>\n""" % SVGImageTag)
  220 
  221 def SetupAlphanumericTableData(Writer, DataLabel, DataValue, DataMap):
  222     """Set up alphanumeric data."""
  223 
  224     BackgroundColor, BackgroundColorType = GetAlphanumeircValueHighlightBackgroundColor(DataLabel, DataValue, DataMap)
  225     SetupAlphanumericTableDataValue(Writer, DataValue, BackgroundColor, BackgroundColorType)
  226 
  227 def WriteTableHeaderRow(Writer, ValidMols, DataMap):
  228     """Write out table header row."""
  229 
  230     TableHeaderStyle = OptionsInfo["TableHeaderStyle"]
  231     if TableHeaderStyle is None:
  232         Writer.write("""      <thead>\n""")
  233         Writer.write("""        <tr>\n""")
  234     elif re.match("^(thead|table)", TableHeaderStyle):
  235         Writer.write("""      <thead class="%s">\n""" % TableHeaderStyle)
  236         Writer.write("""        <tr>\n""")
  237     else:
  238         Writer.write("""      <thead>\n""")
  239         Writer.write("""        <tr bgcolor="%s"\n""" % TableHeaderStyle)
  240 
  241     if OptionsInfo["CounterCol"]:
  242         Writer.write("""          <th></th>\n""")
  243     Writer.write("""          <th>Structure</th>\n""")
  244     if OptionsInfo["ShowMolName"]:
  245         Writer.write("""          <th>%s</th>\n""" % OptionsInfo["ShowMolNameDataLabel"])
  246     
  247     # Write out rest of the column headers...
  248     for DataLabel in DataMap["DataLabels"]:
  249         Writer.write("""          <th>%s</th>\n""" % DataLabel)
  250         
  251     Writer.write("""        </tr>\n""")
  252     Writer.write("""      </thead>\n""")
  253 
  254 def WriteTableFooterRow(Writer, ValidMols, DataMap):
  255     """Write out table footer row."""
  256 
  257     if not OptionsInfo["TableFooter"]:
  258         return
  259     
  260     Writer.write("""      <tfoot>\n""")
  261     Writer.write("""        <tr>\n""")
  262 
  263     if OptionsInfo["CounterCol"]:
  264         Writer.write("""          <td></td>\n""")
  265     Writer.write("""          <td>Structure</td>\n""")
  266     if OptionsInfo["ShowMolName"]:
  267         Writer.write("""          <td>%s</td>\n""" % OptionsInfo["ShowMolNameDataLabel"])
  268 
  269     # Write out rest of the column headers...
  270     for DataLabel in DataMap["DataLabels"]:
  271         Writer.write("""          <td>%s</td>\n""" % DataLabel)
  272         
  273     Writer.write("""        </tr>\n""")
  274     Writer.write("""      </tfoot>\n""")
  275 
  276 def WriteHTMLPageHeader(Writer, DataMap):
  277     """Write out HTML page header."""
  278 
  279     # Collect column indices containing counter and structure data to disable
  280     # sorting and searching. In addition, set up a list to exclude counter and
  281     # primary structure columns from column visibility pulldown along with
  282     # any other columns...
  283     #
  284     if OptionsInfo["CounterCol"]:
  285         StrColIndicesList = ["0", "1"]
  286         ColVisibilityExcludeColIndicesList = ["0", "1"]
  287         ColIndexOffset = 2
  288         FreezeLeftColumns = "2"
  289     else:
  290         StrColIndicesList = ["0"]
  291         ColVisibilityExcludeColIndicesList = ["0"]
  292         ColIndexOffset = 1
  293         FreezeLeftColumns = "1"
  294 
  295     if OptionsInfo["ShowMolName"]:
  296         ColIndexOffset += 1
  297         
  298     MaxColVisColCount = OptionsInfo["ColVisibilityCtrlMax"]
  299     MaxDataColVisColCount = MaxColVisColCount - len(ColVisibilityExcludeColIndicesList)
  300     MaxDataColVisColCount = MaxColVisColCount
  301 
  302     DataColVisibilityExclude = False
  303     ColCount = len(DataMap["DataLabels"])
  304     if OptionsInfo["ColVisibility"]:
  305         if ColCount > MaxDataColVisColCount:
  306             DataColVisibilityExclude = True
  307             MiscUtil.PrintWarning("The number of data columns, %d, is more than %d. Only first %d data columns will be available in column visibility pulldown." % (ColCount, MaxColVisColCount, MaxColVisColCount))
  308     
  309     DisplayButtons = False
  310     if OptionsInfo["ColVisibility"]:
  311         if ColCount > 0 or OptionsInfo["ShowMolName"]:
  312             DisplayButtons = True
  313     
  314     FreezeCols = False
  315     if (OptionsInfo["FreezeCols"] and OptionsInfo["ScrollX"]):
  316         FreezeCols = True
  317     
  318     for Index, DataLabel in enumerate(DataMap["DataLabels"]):
  319         if DataMap["StructureDataMap"][DataLabel]:
  320             StrColIndex = Index + ColIndexOffset
  321             StrColIndicesList.append("%s" % StrColIndex)
  322         
  323         if OptionsInfo["ColVisibility"]:
  324             if Index >= MaxDataColVisColCount:
  325                 ColIndex = Index + ColIndexOffset
  326                 ColVisibilityExcludeColIndicesList.append("%s" %ColIndex)
  327             
  328     StrColIndices = MiscUtil.JoinWords(StrColIndicesList, ", ")
  329     ColVisibilityExcludeColIndices = MiscUtil.JoinWords(ColVisibilityExcludeColIndicesList, ", ")
  330 
  331     Paging = "true" if OptionsInfo["Paging"] else "false"
  332     PageLength = "%d" % OptionsInfo["PageLength"]
  333     PagingType = "\"%s\"" % OptionsInfo["PagingType"]
  334 
  335     ScrollX = "true" if OptionsInfo["ScrollX"] else "false"
  336     
  337     ScrollY = ""
  338     if OptionsInfo["ScrollY"]:
  339         if re.search("vh$", OptionsInfo["ScrollYSize"]):
  340             ScrollY = "\"%s\"" % OptionsInfo["ScrollYSize"]
  341         else:
  342             ScrollY = "%s" % OptionsInfo["ScrollYSize"]
  343     
  344     RegexSearch = "true" if  OptionsInfo["RegexSearch"] else "false"
  345 
  346     # Start HTML header...
  347     Title = "Molecules and data table" if OptionsInfo["Header"] is None else OptionsInfo["Header"]
  348 
  349     Writer.write("""\
  350 <!doctype html>
  351 <html lang="en">
  352 <head>
  353     <title>%s</title>
  354     <meta charset="utf-8">
  355     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
  356     <link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css">
  357     <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.16/css/dataTables.bootstrap4.min.css">
  358   
  359 """ % (Title))
  360 
  361     if (FreezeCols):
  362         Writer.write("""\
  363     <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/fixedcolumns/3.2.4/css/fixedColumns.bootstrap4.min.css">
  364 """)
  365     
  366     if (OptionsInfo["KeysNavigation"]):
  367         Writer.write("""\
  368     <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/keytable/2.3.2/css/keyTable.bootstrap4.min.css">
  369 """)
  370     
  371     Writer.write("""\
  372 
  373     <script type="text/javascript" language="javascript" src="https://code.jquery.com/jquery-1.12.4.js"></script>
  374     <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script>
  375     <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.16/js/dataTables.bootstrap4.min.js"></script>
  376 
  377 """)
  378 
  379     if DisplayButtons:
  380         Writer.write("""\
  381     <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.5.1/js/dataTables.buttons.min.js"></script>
  382     <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.5.1/js/buttons.bootstrap4.min.js"></script>
  383     <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.5.1/js/buttons.colVis.min.js"></script>
  384 
  385 """)
  386     
  387     if (FreezeCols):
  388         Writer.write("""\
  389     <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/fixedcolumns/3.2.4/js/dataTables.fixedColumns.min.js"></script>
  390 """)
  391     
  392     if (OptionsInfo["KeysNavigation"]):
  393         Writer.write("""\
  394     <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/keytable/2.3.2/js/dataTables.keyTable.min.js"></script>
  395 """)
  396     
  397     # Intialize table using Bootstrap, DataTables and JQuery frameworks...
  398     Writer.write("""\
  399 
  400     <script type="text/javascript" class="init">
  401 
  402 $(document).ready(function() {
  403     var MolsAndDataTable = $('#MolsAndDataTable').DataTable( {
  404         "columnDefs": [
  405             {
  406                 "orderable": false,
  407                 "searchable": false,
  408                 "targets": [%s]
  409             },
  410 """ % (StrColIndices))
  411 
  412     if OptionsInfo["ColVisibility"]:
  413         Writer.write("""\
  414             {
  415                 "className": "noColVisCtrl",
  416                 "targets": [%s]
  417             }
  418 """ % (ColVisibilityExcludeColIndices))
  419 
  420     Writer.write("""\
  421         ],
  422 """)
  423     
  424     # Setup column visibility control pulldown by excluding counter column
  425     # and primary structure column from the list...
  426     #
  427     if OptionsInfo["ColVisibility"]:
  428         # Set up dom for button display...
  429         if OptionsInfo["Paging"]:
  430             Writer.write("""\
  431         "dom":  "<'row'<'col'l><'col'B><'col'f>>" +
  432             "<'row'<'col-sm-12'tr>>" +
  433             "<'row'<'col-sm-5'i><'col-sm-7'p>>",
  434 """)
  435         else:
  436             Writer.write("""\
  437         "dom":  "<'row'<'col-sm-6'B><'col-sm-6'f>>" +
  438             "<'row'<'col-sm-12'tr>>" +
  439             "<'row'<'col-sm-5'i><'col-sm-7'p>>",
  440 """)
  441         # Set up buttons...
  442         Writer.write("""\
  443         "buttons": [
  444             {
  445                 "extend": "colvis",
  446                 "text": "Column visibility",
  447                 "className": "btn btn-outline-light text-dark",
  448                 "columns": ":not(.noColVisCtrl)",
  449 """)
  450         if not DataColVisibilityExclude:
  451             Writer.write("""\
  452                 "prefixButtons": [ "colvisRestore" ],
  453 """)
  454         
  455         Writer.write("""\
  456                 "columnText": function ( dt, colIndex, colLabel ) {
  457                     return (colIndex + 1) + ": " + colLabel;
  458                 },
  459             }
  460         ],
  461 """)
  462 
  463     # Write out rest of the variables for DataTables...
  464     if FreezeCols:
  465         Writer.write("""\
  466         "fixedColumns": {
  467             "leftColumns": %s
  468         },
  469 """ % (FreezeLeftColumns))
  470     
  471     if (OptionsInfo["KeysNavigation"]):
  472         Writer.write("""\
  473         "keys": true,
  474 """)
  475     
  476     Writer.write("""\
  477         "pageLength": %s,
  478         "lengthMenu": [ [10, 15, 25, 50, 100, 500, 1000, -1], [10, 15, 25, 50, 100, 500, 1000, "All"] ],
  479         "paging": %s,
  480         "pagingType": %s,
  481         "scrollX": %s,
  482         "scrollY": %s,
  483         "scrollCollapse": true,
  484         "order": [],
  485         "search" : {"regex" : %s},
  486     } );
  487 """ % (PageLength, Paging, PagingType, ScrollX, ScrollY, RegexSearch))
  488     
  489     if OptionsInfo["CounterCol"]:
  490         Writer.write("""\
  491     MolsAndDataTable.on( 'order.dt search.dt', function () {
  492         MolsAndDataTable.column(0, {search:'applied', order:'applied'}).nodes().each( function (cell, rowIndex) {
  493             cell.innerHTML = rowIndex + 1;
  494         } );
  495     } ).draw();
  496 """)
  497     
  498     # End of Javacscript code...
  499     Writer.write("""\
  500 } );
  501 
  502     </script>
  503 """)
  504 
  505     # Finish up HTML header...
  506     Writer.write("""\
  507   
  508 </head>
  509 <body>
  510   <div class="container-fluid">
  511     <br/>
  512 """)
  513 
  514 def WriteHTMLPageEnd(Writer):
  515     """Write out HTML page end."""
  516 
  517     Writer.write("""\
  518   </div>
  519 </body>
  520 </html>
  521 """)
  522 
  523 def WriteHTMLPageTitle(Writer):
  524     """Write out HTML page title."""
  525 
  526     if OptionsInfo["Header"] is None:
  527         return
  528 
  529     Writer.write("""    <%s class="text-center">%s</%s>\n""" % (OptionsInfo["HeaderStyle"], OptionsInfo["Header"], OptionsInfo["HeaderStyle"]))
  530 
  531 def WriteHTMLPageFooter(Writer):
  532     """Write out HTML page footer."""
  533 
  534     if OptionsInfo["Footer"] is None:
  535         return
  536 
  537     Writer.write("""    <br/>\n    <p class="%s">%s</p>\n""" % (OptionsInfo["FooterClass"], OptionsInfo["Footer"]))
  538 
  539 def WriteHTMLTableHeader(Writer):
  540     """Write out HTML table header."""
  541 
  542     if OptionsInfo["TableStyle"] is None:
  543         Writer.write("""\n    <table id="MolsAndDataTable" cellspacing="0" width="100%">\n""")
  544     else:
  545         Writer.write("""    <table id="MolsAndDataTable" class="%s" cellspacing="0" width="100%s">\n""" % (OptionsInfo["TableStyle"], "%"))
  546         
  547 def WriteHTMLTableEnd(Writer):
  548     """Write out HTML table end."""
  549 
  550     Writer.write("""    </table>\n\n""")
  551 
  552 def RetrieveMoleculesAndData():
  553     """Retrieve molecules and data from input file."""
  554 
  555     MiscUtil.PrintInfo("\nReading file %s..." % OptionsInfo["Infile"])
  556 
  557     if MiscUtil.CheckFileExt(OptionsInfo["Infile"] ,"smi csv tsv txt"):
  558         # Check for the presence of SMILES column name in title line...
  559         Infile = open(OptionsInfo["Infile"], "r")
  560         if Infile is None:
  561             MiscUtil.PrintError("Couldn't open file %s..." % OptionsInfo["Infile"])
  562         Line = Infile.readline()
  563         Infile.close()
  564         
  565         if not re.search("SMILES", Line, re.I):
  566             MiscUtil.PrintError("The input file, %s, must contain a title line containing a column name with SMILES in its name." % OptionsInfo["Infile"])
  567     
  568     if MiscUtil.CheckFileExt(OptionsInfo["Infile"],"sdf sd smi"):
  569         ValidMols, MolCount, ValidMolCount  = RDKitUtil.ReadAndValidateMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  570     else:
  571         ValidMols, MolCount, ValidMolCount  = RetrieveMoleculesFromTextFile(OptionsInfo["Infile"])
  572     
  573     MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
  574     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  575     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
  576 
  577     return ValidMols
  578 
  579 def RetrieveMoleculesFromTextFile(Infile):
  580     """Retrieve molecules from a CSV/TSV text file."""
  581 
  582     # Read and parse text lines...
  583     Delimiter = "," if MiscUtil.CheckFileExt(Infile ,"csv") else "\t"
  584     QuoteChar = '"'
  585     IgnoreHeaderLine = False
  586     TextLinesWords = MiscUtil.GetTextLinesWords(Infile, Delimiter, QuoteChar, IgnoreHeaderLine)
  587 
  588     # Process column names...
  589     ColNames = TextLinesWords[0]
  590     ColCount = len(ColNames)
  591     
  592     MolColIndex = None
  593     MolDataColIndices = []
  594 
  595     FirstSMILES = True
  596     for ColIndex in range(0, ColCount):
  597         if re.search("SMILES", ColNames[ColIndex], re.I) and FirstSMILES:
  598             MolColIndex = ColIndex
  599             FirstSMILES = False
  600             continue
  601         
  602         MolDataColIndices.append(ColIndex)
  603     
  604     if MolColIndex is None:
  605             MiscUtil.PrintError("The input file, %s, must contain a title line containing a column name with SMILES in its name." % Infile)
  606 
  607     ValidMols = []
  608     MolCount = 0
  609 
  610     Sanitize = OptionsInfo["InfileParams"]["Sanitize"]
  611     
  612     # Process data lines...
  613     for LineIndex in range(1, len(TextLinesWords)):
  614         MolCount += 1
  615         LineWords = TextLinesWords[LineIndex]
  616         if len(LineWords) != ColCount:
  617             MiscUtil.PrintWarning("Ignoring text line number %d: Number of columns, %d, must match number of columns, %d, in title line.\nLine: %s" % (MolCount, len(LineWords), ColCount, Delimiter.join(LineWords)))
  618             continue
  619 
  620         # Process molecule column...
  621         MolSMILES = LineWords[MolColIndex]
  622         Mol = Chem.MolFromSmiles(MolSMILES, sanitize = Sanitize)
  623         if Mol is None:
  624             MiscUtil.PrintWarning("Ignoring text line number %d: SMILES parsing failed\nLine: %s" % (MolCount, Delimiter.join(LineWords)))
  625             continue
  626 
  627         # Process molecule data columns...
  628         for ColIndex in MolDataColIndices:
  629             Name = ColNames[ColIndex]
  630             Value = LineWords[ColIndex]
  631             Mol.SetProp(Name, Value)
  632         
  633         ValidMols.append(Mol)
  634     
  635     ValidMolCount = len(ValidMols)
  636     
  637     return (ValidMols, MolCount, ValidMolCount)
  638 
  639 def IdentifyStructureAndNumericalData(ValidMols):
  640     """Identify structure and alphanumerical data."""
  641 
  642     DataMap = {}
  643     DataMap["DataLabels"] = []
  644     DataMap["DataLabelsMap"] = {}
  645     DataMap["CanonicalDataLabelsMap"] = {}
  646     
  647     DataMap["StructureDataMap"] = {}
  648     DataMap["SMILESDataMap"] = {}
  649 
  650     # Retrieve all possible data labels...
  651     if MiscUtil.CheckFileExt(OptionsInfo["Infile"] ,"smi csv tsv txt"):
  652         # First molecule contains all possible data fields...
  653         Mol = ValidMols[0]
  654         ProcessMolDataLabels(ValidMols[0], DataMap)
  655     else:
  656         # Go over all molecules to identify unique data labels...
  657         MiscUtil.PrintInfo("\nRetrieving unique data labels for data in file %s..." % OptionsInfo["Infile"])
  658         for Mol in ValidMols:
  659             ProcessMolDataLabels(Mol, DataMap)
  660 
  661     return DataMap
  662 
  663 def ProcessMolDataLabels(Mol, DataMap):
  664     """Process data label to identify and track its type"""
  665     
  666     for DataLabel in Mol.GetPropNames(includePrivate = False, includeComputed = False):
  667         if DataLabel in DataMap["DataLabelsMap"]:
  668             continue
  669 
  670         # Track labels...
  671         DataMap["DataLabels"].append(DataLabel)
  672         DataMap["DataLabelsMap"][DataLabel] = DataLabel
  673         DataMap["CanonicalDataLabelsMap"][DataLabel.lower()] = DataLabel
  674         
  675         DataMap["StructureDataMap"][DataLabel] = False
  676         DataMap["SMILESDataMap"][DataLabel] = False
  677         
  678         if re.search("SMILES", DataLabel, re.I):
  679             DataMap["StructureDataMap"][DataLabel] = True
  680             DataMap["SMILESDataMap"][DataLabel] = True
  681 
  682 def ValidateShowMolNameOption(DataMap):
  683     """Validate show molecule name option. """
  684     
  685     if not OptionsInfo["ShowMolName"]:
  686         return
  687 
  688     if not MiscUtil.CheckFileExt(OptionsInfo["Infile"],"sdf sd smi"):
  689         OptionsInfo["ShowMolName"] = False
  690         return
  691 
  692     CanonicalDataLabel = OptionsInfo["ShowMolNameDataLabel"].lower()
  693     if CanonicalDataLabel in DataMap["CanonicalDataLabelsMap"]:
  694         OptionsInfo["ShowMolName"] = False
  695         if not OptionsInfo["ShowMolNameAuto"]:
  696             MiscUtil.PrintWarning("Ignoring \"--showMolName\" option: Data label \"Name\" corresponding to molecule name is already present in input file.")
  697 
  698 def ValidateSpecifiedDataLabels(DataMap):
  699     """Validate data labels used to specify highlighting data. """
  700     
  701     ValidateSpecifiedDataLabelsForHighlightSMARTS(DataMap)
  702     
  703     ValidateSpecifiedDataLabelsForHighlightValues(DataMap)
  704     ValidateSpecifiedDataLabelsForHighlightRanges(DataMap)
  705     ValidateSpecifiedDataLabelsForHighlightClasses(DataMap)
  706     
  707 def ValidateSpecifiedDataLabelsForHighlightSMARTS(DataMap):
  708     """Validate data labels used to specify highlighting SMARTS option. """
  709 
  710     if OptionsInfo["HighlightSMARTSAllMode"]:
  711         return
  712 
  713     for DataLabel in OptionsInfo["HighlightSMARTSDataLabels"]:
  714         if re.match("^Structure$", DataLabel, re.I):
  715             continue
  716         
  717         CanonicalDataLabel = DataLabel.lower()
  718         if not CanonicalDataLabel in DataMap["CanonicalDataLabelsMap"]:
  719             MiscUtil.PrintError("The data label specified, %s, using option \"--highlightSMARTS\" doesn't exist in input file." % DataLabel)
  720         
  721         Label = DataMap["CanonicalDataLabelsMap"][CanonicalDataLabel]
  722         if not DataMap["StructureDataMap"][Label]:
  723             MiscUtil.PrintError("The data label specified, %s, using option \"--highlightSMARTS\" doesn't correspond to structure data: Valid structure data labels: SMILES in data label." % DataLabel)
  724             
  725 def ValidateSpecifiedDataLabelsForHighlightValues(DataMap):
  726     """Validate data labels used to specify highlighting values option. """
  727 
  728     ValidateDataLabels("--highlightValues", DataMap, OptionsInfo["HighlightValuesLabels"])
  729 
  730 def ValidateSpecifiedDataLabelsForHighlightRanges(DataMap):
  731     """Validate data labels used to specify highlighting ranges option. """
  732     
  733     ValidateDataLabels("--highlightRanges", DataMap, OptionsInfo["HighlightRangesLabels"])
  734 
  735 def ValidateSpecifiedDataLabelsForHighlightClasses(DataMap):
  736     """Validate data labels used to specify highlighting classes option. """
  737 
  738     if OptionsInfo["HighlightClassesRules"] is None:
  739         return
  740     
  741     ValidDataLabelsList = []
  742     NotValidDataLabelsList = []
  743     for Label in OptionsInfo["HighlightClassesLabels"]:
  744         ValidCanonicalLabel = None
  745         
  746         for LabelSynonym in OptionsInfo["HighlightClassesSynonymsMap"][Label]:
  747             CanonicalLabel = LabelSynonym.lower()
  748             
  749             # Is this label already in use...
  750             if CanonicalLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]:
  751                 MiscUtil.PrintInfo("")
  752                 MiscUtil.PrintWarning("The data label, %s, for class, %s , in option \"--highlightValuesClasses\" has already been used in \"--highlightValues\" option. It'll be ignored during highlighting." % (LabelSynonym, OptionsInfo["HighlightClasses"]))
  753                 continue
  754 
  755             if CanonicalLabel in OptionsInfo["HighlightRangesCanonicalLabelsMap"]:
  756                 MiscUtil.PrintInfo("")
  757                 MiscUtil.PrintWarning("The data label, %s, for class, %s , in option \"--highlightValuesClasses\" has already been used in \"--highlightValuesRanges\" option. It'll be ignored during highlighting." % (LabelSynonym, OptionsInfo["HighlightClasses"]))
  758                 continue
  759             
  760             # Is this label present in input file...
  761             if  CanonicalLabel in DataMap["CanonicalDataLabelsMap"]:
  762                 ValidCanonicalLabel = CanonicalLabel
  763                 break
  764         
  765         if ValidCanonicalLabel is None:
  766             MiscUtil.PrintWarning("The data label or its synonyms - %s - for class, %s , in option \"--highlightValuesClasses\" either don't exist in input file or have already been used for highlighting in option \"--highlightValuesClasses\" or \"--highlightValuesRanges\". It'll be ignored during highlighting." % (MiscUtil.JoinWords(OptionsInfo["HighlightClassesSynonymsMap"][Label], ", "), OptionsInfo["HighlightClasses"]))
  767             NotValidDataLabelsList.append(Label)
  768             continue
  769         
  770         # Track label...
  771         OptionsInfo["HighlightClassesCanonicalLabelsMap"][ValidCanonicalLabel] = Label
  772         ValidDataLabelsList.append(DataMap["CanonicalDataLabelsMap"][ValidCanonicalLabel])
  773 
  774     ValidDataLabelsCount = len(ValidDataLabelsList)
  775     NotValidDataLabelsCount = len(NotValidDataLabelsList)
  776     DataLabelsCount = len(OptionsInfo["HighlightClassesLabels"])
  777     
  778     if ValidDataLabelsCount == 0:
  779         MiscUtil.PrintInfo("")
  780         MiscUtil.PrintWarning("The data labels and their synonyms for class, %s , in option \"--highlightValuesClasses\" either don't exists in input file or have already been used for highlighting in option \"--highlightValuesClasses\" or \"--highlightValuesRanges\". No class highlighting will be performed. Missing data labels:  %s" % (OptionsInfo["HighlightClasses"], MiscUtil.JoinWords(OptionsInfo["HighlightClassesLabels"], ", ")))
  781     elif  ValidDataLabelsCount < DataLabelsCount:
  782         MiscUtil.PrintInfo("")
  783         MiscUtil.PrintWarning("The class, %s, based highlighting specified using \"--highlightValuesClasses\" option will be performed using only, %d, out of, %d, data labels: %s\nThe rest of the data label(s) - %s - either don't exist in the input file or have aready been used for highlighting in option \"--highlightValuesClasses\" or \"--highlightValuesRanges\"." % (OptionsInfo["HighlightClasses"], ValidDataLabelsCount,  DataLabelsCount,  MiscUtil.JoinWords(ValidDataLabelsList, ", "),  MiscUtil.JoinWords(NotValidDataLabelsList, ", ") ))
  784         
  785 def ValidateDataLabels(OptionName, DataMap, DataLabels):
  786     """Validate data labels."""
  787     
  788     for DataLabel in DataLabels:
  789         if re.match("^Structure$", DataLabel, re.I):
  790             MiscUtil.PrintError("The data label specified, %s, using option \"-%s\" must not correspond to structure data. Structure label is not allowed." % (DataLabel, OptionName))
  791             
  792         CanonicalDataLabel = DataLabel.lower()
  793         if not CanonicalDataLabel in DataMap["CanonicalDataLabelsMap"]:
  794             MiscUtil.PrintError("The data label specified, %s, using option \"%s\" doesn't exist in input file." % (DataLabel, OptionName))
  795         
  796         Label = DataMap["CanonicalDataLabelsMap"][CanonicalDataLabel]
  797         if DataMap["StructureDataMap"][Label]:
  798             MiscUtil.PrintError("The data label specified, %s, using option \"%s\" must not correspond to structure data: Valid structure data labels contain \"SMILES\" in their name.." % (DataLabel, OptionName))
  799 
  800 def SetupMolInLineSVGImageTag(Mol, HightlightAtomList):
  801     """Setup a inline SVG image tag for molecule."""
  802 
  803     SVGText = RDKitUtil.GetInlineSVGForMolecule(Mol, OptionsInfo["MolImageWidth"], OptionsInfo["MolImageHeight"], AtomListToHighlight = HightlightAtomList, Base64Encoded = OptionsInfo["MolImageEncoded"])
  804 
  805     if OptionsInfo["MolImageEncoded"]:
  806         SVGInlineImageTag = "img src=\"data:image/svg+xml;base64,\n%s\"" % SVGText
  807     else:
  808         SVGInlineImageTag = "img src=\"data:image/svg+xml;charset=UTF-8,\n%s\"" % SVGText
  809     
  810     return SVGInlineImageTag
  811 
  812 def SetupAtomListToHighlight(Mol, DataLabel):
  813     """Set up atom list to highlight using specified SMARTS patterns."""
  814 
  815     HighlightAtomList = None
  816     if OptionsInfo["HighlightSMARTS"] is None:
  817         return  HighlightAtomList
  818 
  819     if OptionsInfo["HighlightSMARTSAllMode"]:
  820         PatternMol = OptionsInfo["HighlightSMARTSPatternMol"]
  821     else:
  822         CanonicalDataLabel = DataLabel.lower()
  823         if not CanonicalDataLabel in OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"]:
  824             return  HighlightAtomList
  825         
  826         Label = OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"][CanonicalDataLabel]
  827         PatternMol = OptionsInfo["HighlightSMARTSPatternMolsMap"][Label]
  828         
  829     # Get matched atom lists and flatten it...
  830     MatchedAtomsLists = Mol.GetSubstructMatches(PatternMol)
  831     MatchedAtoms = [ Atom for AtomsList in MatchedAtomsLists for Atom in AtomsList]
  832 
  833     if len(MatchedAtoms):
  834         HighlightAtomList = MatchedAtoms
  835     
  836     return  HighlightAtomList
  837 
  838 def GetAlphanumeircValueHighlightBackgroundColor(DataLabel, DataValue, DataMap):
  839     """Get background highlight color for a value."""
  840 
  841     BackgroundColor = None
  842     BackgroundColorType = None
  843     
  844     CanonicalDataLabel =DataLabel.lower()
  845     if CanonicalDataLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]:
  846         return GetBackgroundColorUsingHighlightValuesMode(DataLabel, DataValue, DataMap)
  847     elif CanonicalDataLabel in OptionsInfo["HighlightRangesCanonicalLabelsMap"]:
  848         return GetBackgroundColorUsingHighlightRangesMode(DataLabel, DataValue, DataMap)
  849     elif CanonicalDataLabel in OptionsInfo["HighlightClassesCanonicalLabelsMap"]:
  850         return GetBackgroundColorUsingHighlightClassesMode(DataLabel, DataValue, DataMap)
  851     elif OptionsInfo["HighlightClassesRandom"]:
  852         return GetBackgroundColorUsingRandomMode(DataLabel, DataValue, DataMap)
  853     
  854     return (BackgroundColor, BackgroundColorType)
  855     
  856 def GetBackgroundColorUsingHighlightValuesMode(DataLabel, DataValue, DataMap):
  857     """Get background highlight color for a value."""
  858 
  859     BackgroundColor = None
  860     BackgroundColorType = None
  861 
  862     CanonicalDataLabel =DataLabel.lower()
  863     if not CanonicalDataLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]:
  864         return (BackgroundColor, BackgroundColorType)
  865 
  866     Label = OptionsInfo["HighlightValuesCanonicalLabelsMap"][CanonicalDataLabel]
  867     DataType = OptionsInfo["HighlightValuesTypesMap"][Label]
  868     Criterion = OptionsInfo["HighlightValuesCriteriaMap"][Label]
  869     CriterionValue = OptionsInfo["HighlightValuesCriteriaValuesMap"][Label]
  870 
  871     return GetBackgroundColorForHighlightingValue(DataLabel, DataValue, DataType, Criterion, CriterionValue)
  872     
  873 def GetBackgroundColorUsingHighlightClassesMode(DataLabel, DataValue, DataMap):
  874     """Get background highlight color for a value."""
  875 
  876     BackgroundColor = None
  877     BackgroundColorType = None
  878 
  879     CanonicalDataLabel =DataLabel.lower()
  880     if not CanonicalDataLabel in OptionsInfo["HighlightClassesCanonicalLabelsMap"]:
  881         return (BackgroundColor, BackgroundColorType)
  882 
  883     Label = OptionsInfo["HighlightClassesCanonicalLabelsMap"][CanonicalDataLabel]
  884     DataType = OptionsInfo["HighlightClassesTypesMap"][Label]
  885     Criterion = OptionsInfo["HighlightClassesCriteriaMap"][Label]
  886     CriterionValue = OptionsInfo["HighlightClassesCriteriaValuesMap"][Label]
  887 
  888     return GetBackgroundColorForHighlightingValue(DataLabel, DataValue, DataType, Criterion, CriterionValue)
  889 
  890 def GetBackgroundColorForHighlightingValue(DataLabel, DataValue, DataType, Criterion, CriterionValue):
  891     """Get background color for highlighting a value."""
  892     
  893     ValueOkay = False
  894     if re.match("^numeric$", DataType, re.I):
  895         if not MiscUtil.IsNumber(DataValue):
  896             MiscUtil.PrintWarning("Ignoring data value, %s, for data label, %s, during numeric highlighting: It must be a number" % (DataValue, DataLabel))
  897             return (BackgroundColor, BackgroundColorType)
  898         
  899         DataValue = float(DataValue)
  900         if re.match("^gt$", Criterion, re.I):
  901             ValueOkay = True if DataValue > CriterionValue else False
  902         elif re.match("^lt$", Criterion, re.I):
  903             ValueOkay = True if DataValue < CriterionValue else False
  904         elif re.match("^ge$", Criterion, re.I):
  905             ValueOkay = True if DataValue >= CriterionValue else False
  906         elif re.match("^le$", Criterion, re.I):
  907             ValueOkay = True if DataValue <= CriterionValue else False
  908         elif re.match("^eq$", Criterion, re.I):
  909             ValueOkay = True if DataValue == CriterionValue else False
  910         elif re.match("^ne$", Criterion, re.I):
  911             ValueOkay = True if DataValue != CriterionValue else False
  912         else:
  913             return (BackgroundColor, BackgroundColorType)
  914     elif re.match("^text$", DataType, re.I):
  915         DataValue = "%s" % DataValue
  916         if re.match("^gt$", Criterion, re.I):
  917             ValueOkay = True if DataValue > CriterionValue else False
  918         elif re.match("^lt$", Criterion, re.I):
  919             ValueOkay = True if DataValue < CriterionValue else False
  920         elif re.match("^ge$", Criterion, re.I):
  921             ValueOkay = True if DataValue >= CriterionValue else False
  922         elif re.match("^le$", Criterion, re.I):
  923             ValueOkay = True if DataValue <= CriterionValue else False
  924         elif re.match("^eq$", Criterion, re.I):
  925             ValueOkay = True if DataValue == CriterionValue else False
  926         elif re.match("^ne$", Criterion, re.I):
  927             ValueOkay = True if DataValue != CriterionValue else False
  928         else:
  929             return (BackgroundColor, BackgroundColorType)
  930     elif re.match("^regex$", DataType, re.I):
  931         DataValue = "%s" % DataValue
  932         if re.match("^eq$", Criterion, re.I):
  933             ValueOkay = True if re.search("%s" % CriterionValue, DataValue, re.I) else False
  934         elif re.match("^ne$", Criterion, re.I):
  935             ValueOkay = False if re.search("%s" % CriterionValue, DataValue, re.I) else True
  936         else:
  937             return (BackgroundColor, BackgroundColorType)
  938 
  939     BackgroundColor = OptionsInfo["HighlightColorsList"][0] if ValueOkay else OptionsInfo["HighlightColorsList"][1]
  940     BackgroundColorType = OptionsInfo["HighlightColorsType"]
  941     
  942     return (BackgroundColor, BackgroundColorType)
  943 
  944 def GetBackgroundColorUsingHighlightRangesMode(DataLabel, DataValue, DataMap):
  945     """Get background highlight color for value range."""
  946 
  947     BackgroundColor = None
  948     BackgroundColorType = None
  949 
  950     CanonicalDataLabel =DataLabel.lower()
  951     if not CanonicalDataLabel in OptionsInfo["HighlightRangesCanonicalLabelsMap"]:
  952         return (BackgroundColor, BackgroundColorType)
  953 
  954     Label = OptionsInfo["HighlightRangesCanonicalLabelsMap"][CanonicalDataLabel]
  955     DataType = OptionsInfo["HighlightRangesTypesMap"][Label]
  956     CriterionLower = OptionsInfo["HighlightRangesCriteriaLowerMap"][Label]
  957     CriterionLowerValue = OptionsInfo["HighlightRangesCriteriaLowerValuesMap"][Label]
  958     CriterionUpper = OptionsInfo["HighlightRangesCriteriaUpperMap"][Label]
  959     CriterionUpperValue = OptionsInfo["HighlightRangesCriteriaUpperValuesMap"][Label]
  960     
  961     if re.match("^numeric$", DataType, re.I):
  962         if not MiscUtil.IsNumber(DataValue):
  963             MiscUtil.PrintWarning("Ignoring data value, %s, for data label, %s, during numeric highlighting: It must be a number" % (DataValue, DataLabel))
  964             return (BackgroundColor, BackgroundColorType)
  965         
  966         DataValue = float(DataValue)
  967         ColorIndex = 1
  968         
  969         if DataValue < CriterionLowerValue and re.match("^lt$", CriterionLower, re.I):
  970             ColorIndex = 0
  971         elif DataValue <= CriterionLowerValue and re.match("^le$", CriterionLower, re.I):
  972             ColorIndex = 0
  973         elif DataValue > CriterionUpperValue and re.match("^gt$", CriterionUpper, re.I):
  974             ColorIndex = 2
  975         elif DataValue >= CriterionUpperValue and re.match("^ge$", CriterionUpper, re.I):
  976             ColorIndex = 2
  977     elif re.match("^text$", DataType, re.I):
  978         DataValue = "%s" % DataValue
  979         ColorIndex = 1
  980         
  981         if DataValue < CriterionLowerValue and re.match("^lt$", CriterionLower, re.I):
  982             ColorIndex = 0
  983         elif DataValue <= CriterionLowerValue and re.match("^le$", CriterionLower, re.I):
  984             ColorIndex = 0
  985         elif DataValue > CriterionUpperValue and re.match("^gt$", CriterionUpper, re.I):
  986             ColorIndex = 2
  987         elif DataValue >= CriterionUpperValue and re.match("^ge$", CriterionUpper, re.I):
  988             ColorIndex = 2
  989     else:
  990         return (BackgroundColor, BackgroundColorType)
  991 
  992     BackgroundColor = OptionsInfo["HighlightColorsRangesList"][ColorIndex]
  993     BackgroundColorType = OptionsInfo["HighlightColorsRangesType"]
  994     
  995     return (BackgroundColor, BackgroundColorType)
  996 
  997 def GetBackgroundColorUsingRandomMode(DataLabel, DataValue, DataMap):
  998     """Get a random background highlight color for a value."""
  999 
 1000     BackgroundColor = random.choice(OptionsInfo["HighlightColorsRandomList"])
 1001     BackgroundColorType = OptionsInfo["HighlightColorsRandomType"]
 1002     
 1003     return (BackgroundColor, BackgroundColorType)
 1004     
 1005 def SetupAlphanumericTableDataValue(Writer, DataValue, BackgroundColor, BackgroundColorType):
 1006     """Set up alphanumeric data value for a table cell."""
 1007     
 1008     WrappedDataValue = "%s" % DataValue
 1009 
 1010     # Look for new lines...
 1011     Delim = "<br/>"
 1012     if re.search("(\r\n|\r|\n)", WrappedDataValue):
 1013         WrappedDataValue = re.sub("(\r\n|\r|\n)", "<br/>", DataValue)
 1014 
 1015     # Wrap text...
 1016     if OptionsInfo["WrapText"] and len(WrappedDataValue) > OptionsInfo["WrapTextWidth"]:
 1017         WrappedDataLines = []
 1018         for DataLine in WrappedDataValue.split("<br/>"):
 1019             WrappedDataLine = MiscUtil.WrapText(DataLine, "<br/>", OptionsInfo["WrapTextWidth"])
 1020             WrappedDataLines.append(WrappedDataLine)
 1021         
 1022         WrappedDataValue = "<br/>".join(WrappedDataLines)
 1023     
 1024     # Highlight value...
 1025     if BackgroundColor is not None:
 1026         ColorTypeTag = GetBackgroundColorTypeTagForTableValue(BackgroundColor, BackgroundColorType)
 1027         Writer.write("""            <td %s = "%s">%s</td>\n""" % (ColorTypeTag, BackgroundColor, WrappedDataValue))
 1028     else:
 1029         Writer.write("""            <td>%s</td>\n""" % WrappedDataValue)
 1030 
 1031 def GetBackgroundColorTypeTagForTableValue(Color, ColorType):
 1032     """Setup color type tage for setting background of a table value."""
 1033 
 1034     ColorTypeTag = "class" if re.match("^colorclass", ColorType, re.I) else "bgcolor"
 1035     
 1036     return ColorTypeTag
 1037     
 1038 def PerformAlignment(ValidMols):
 1039     """Perform alignment to a common template specified by a SMARTS pattern."""
 1040     
 1041     if OptionsInfo["AlignmentSMARTSPattern"] is None:
 1042         return
 1043     
 1044     MiscUtil.PrintInfo("\nPerforming alignment for primary structure data...")
 1045     
 1046     PatternMol = Chem.MolFromSmarts(OptionsInfo["AlignmentSMARTSPattern"])
 1047     AllChem.Compute2DCoords(PatternMol)
 1048         
 1049     MatchedValidMols = [ValidMol for ValidMol in ValidMols if ValidMol.HasSubstructMatch(PatternMol)]
 1050     for ValidMol in MatchedValidMols:
 1051         AllChem.GenerateDepictionMatching2DStructure(ValidMol, PatternMol)
 1052 
 1053 def ProcessHighlightSMARTSOption():
 1054     """Process highlight SMARTS option"""
 1055 
 1056     OptionsInfo["HighlightSMARTS"]  = None
 1057     OptionsInfo["HighlightSMARTSAllMode"]  = False
 1058     OptionsInfo["HighlightSMARTSPatternMol"]  = None
 1059     
 1060     OptionsInfo["HighlightSMARTSDataLabels"]  = []
 1061     OptionsInfo["HighlightSMARTSDataLabelsMap"]  = {}
 1062     
 1063     OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"]  = {}
 1064     OptionsInfo["HighlightSMARTSPatternsMap"]  = {}
 1065     OptionsInfo["HighlightSMARTSPatternMolsMap"]  = {}
 1066 
 1067     OptionsInfo["HighlightSMARTSDelim"]  = Options["--highlightSMARTSDelim"]
 1068     
 1069     if re.match("^None$", Options["--highlightSMARTS"], re.I):
 1070         # Nothing to proecess...
 1071         return
 1072     
 1073     HighlightSMARTS = Options["--highlightSMARTS"].strip()
 1074     if not HighlightSMARTS:
 1075         MiscUtil.PrintError("No valid values specified using \"--highlightSMARTS\" option.")
 1076 
 1077     OptionsInfo["HighlightSMARTS"]  = HighlightSMARTS
 1078     HighlightSMARTSWords = HighlightSMARTS.split(OptionsInfo["HighlightSMARTSDelim"])
 1079     
 1080     if len(HighlightSMARTSWords) == 1:
 1081         PatternMol = Chem.MolFromSmarts(HighlightSMARTS)
 1082         if PatternMol is None:
 1083             MiscUtil.PrintError("The value specified, %s, using option \"--highlightSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--highlightSMARTS"])
 1084         OptionsInfo["HighlightSMARTSAllMode"]  = True
 1085         OptionsInfo["HighlightSMARTSPatternMol"]  = PatternMol
 1086         return
 1087     
 1088     if len(HighlightSMARTSWords) % 2:
 1089         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightSMARTS\" option must be an even number." % (len(HighlightSMARTSWords)))
 1090     
 1091     HighlightSMARTSAllMode = False
 1092             
 1093     for Index in range(0, len(HighlightSMARTSWords), 2):
 1094         DataLabel = HighlightSMARTSWords[Index].strip()
 1095         SMARTSPattern = HighlightSMARTSWords[Index + 1].strip()
 1096                 
 1097         PatternMol = Chem.MolFromSmarts(SMARTSPattern)
 1098         if PatternMol is None:
 1099             MiscUtil.PrintError("The value specified, %s, using option \"--highlightSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--highlightSMARTS"])
 1100 
 1101         if DataLabel in OptionsInfo["HighlightSMARTSDataLabelsMap"]:
 1102             MiscUtil.PrintError("The datalabel, %s, specified in pair, \"%s, %s\", using option \"--highlightSMARTS\" is not a valid: Multiple occurences of data label" % (DataLabel, DataLabel, SMARTSPattern))
 1103             
 1104         OptionsInfo["HighlightSMARTSDataLabels"].append(DataLabel)
 1105         OptionsInfo["HighlightSMARTSDataLabelsMap"][DataLabel] = DataLabel
 1106         OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"][DataLabel.lower()] = DataLabel
 1107         OptionsInfo["HighlightSMARTSPatternsMap"][DataLabel] = SMARTSPattern
 1108         OptionsInfo["HighlightSMARTSPatternMolsMap"][DataLabel] = PatternMol
 1109 
 1110 def ProcessHighlightDataOptions():
 1111     """Process highlight values and colors option"""
 1112     
 1113     ProcessHighlightValuesOption()
 1114     ProcessHighlightValuesRangesOption()
 1115     ProcessHighlightValuesClassesOption()
 1116     
 1117     ProcessHighlightColorsOption()
 1118     ProcessHighlightColorsRangesOption()
 1119     ProcessHighlightColorsRandomOption()
 1120 
 1121 def ProcessHighlightValuesOption():
 1122     """Process highlight values option"""
 1123 
 1124     OptionsInfo["HighlightValues"]  = None
 1125     OptionsInfo["HighlightValuesLabels"]  = []
 1126     
 1127     OptionsInfo["HighlightValuesLabelsMap"]  = {}
 1128     OptionsInfo["HighlightValuesCanonicalLabelsMap"]  = {}
 1129     
 1130     OptionsInfo["HighlightValuesTypesMap"]  = {}
 1131     OptionsInfo["HighlightValuesCriteriaMap"]  = {}
 1132     OptionsInfo["HighlightValuesCriteriaValuesMap"]  = {}
 1133 
 1134     HighlightValues = Options["--highlightValues"].strip()
 1135     if re.match("^None$", HighlightValues, re.I):
 1136         return
 1137     
 1138     OptionsInfo["HighlightValues"]  = HighlightValues
 1139     HighlightValuesWords = HighlightValues.split(",")
 1140     
 1141     if len(HighlightValuesWords) % 4:
 1142         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightValues\" option must be a multiple of 4." % (len(HighlightValuesWords)))
 1143 
 1144     for Index in range(0, len(HighlightValuesWords), 4):
 1145         DataLabel = HighlightValuesWords[Index].strip()
 1146         DataType = HighlightValuesWords[Index + 1].strip()
 1147         DataCriterion = HighlightValuesWords[Index + 2].strip()
 1148         DataValue = HighlightValuesWords[Index + 3].strip()
 1149         
 1150         if not re.match("^(numeric|text|regex)$", DataType, re.I):
 1151             MiscUtil.PrintError("The data type, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. Supported values: numeric, regex or text." % (DataType, DataLabel, DataType, DataCriterion, DataValue))
 1152         
 1153         if re.match("^regex$", DataType, re.I):
 1154             if not re.match("^(eq|ne)$", DataCriterion, re.I):
 1155                 MiscUtil.PrintError("The data criterion, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. Supported values: eq or ne" % (DataType, DataLabel, DataType, DataCriterion, DataValue))
 1156         else:
 1157             if not re.match("^(gt|lt|ge|le|eq|ne)$", DataCriterion, re.I):
 1158                 MiscUtil.PrintError("The data criterion, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. Supported values: gt, lt, ge, le, eq, or  ne." % (DataType, DataLabel, DataType, DataCriterion, DataValue))
 1159 
 1160         # Check criterion value...
 1161         if re.match("^numeric$", DataType, re.I):
 1162             if not MiscUtil.IsNumber(DataValue):
 1163                 MiscUtil.PrintError("The data value, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. It must be a number for data type, %s" % (DataType, DataLabel, DataType, DataCriterion, DataValue, DataType))
 1164             DataValue = float(DataValue)
 1165         
 1166         # Track values...
 1167         if DataLabel in OptionsInfo["HighlightValuesLabelsMap"]:
 1168             MiscUtil.PrintError("The data label, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid: Multiple occurences of data label" % (DataLabel, DataLabel, DataType, DataCriterion, DataValue))
 1169 
 1170         OptionsInfo["HighlightValuesLabels"].append(DataLabel)
 1171         OptionsInfo["HighlightValuesLabelsMap"][DataLabel]  = DataLabel
 1172         OptionsInfo["HighlightValuesCanonicalLabelsMap"][DataLabel.lower()]  = DataLabel
 1173         
 1174         OptionsInfo["HighlightValuesTypesMap"][DataLabel]  = DataType
 1175         OptionsInfo["HighlightValuesCriteriaMap"][DataLabel]  = DataCriterion 
 1176         OptionsInfo["HighlightValuesCriteriaValuesMap"][DataLabel]  = DataValue
 1177 
 1178 def ProcessHighlightValuesRangesOption():
 1179     """Process highlight values ranges option"""
 1180     
 1181     OptionsInfo["HighlightRanges"]  = None
 1182     OptionsInfo["HighlightRangesLabels"]  = []
 1183     
 1184     OptionsInfo["HighlightRangesLabelsMap"]  = {}
 1185     OptionsInfo["HighlightRangesCanonicalLabelsMap"]  = {}
 1186     
 1187     OptionsInfo["HighlightRangesTypesMap"]  = {}
 1188     OptionsInfo["HighlightRangesCriteriaLowerMap"]  = {}
 1189     OptionsInfo["HighlightRangesCriteriaLowerValuesMap"]  = {}
 1190     OptionsInfo["HighlightRangesCriteriaUpperMap"]  = {}
 1191     OptionsInfo["HighlightRangesCriteriaUpperValuesMap"]  = {}
 1192 
 1193     HighlightRanges = Options["--highlightValuesRanges"].strip()
 1194     if re.match("^None$", HighlightRanges, re.I):
 1195         return
 1196     
 1197     OptionsInfo["HighlightRanges"]  = HighlightRanges
 1198     HighlightRangesWords = HighlightRanges.split(",")
 1199     
 1200     if len(HighlightRangesWords) % 6:
 1201         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified in sextet \"%s\" using \"--highlightValuesRanges\" option must be a multiple of 6." % (len(HighlightRangesWords), HighlightRanges))
 1202 
 1203     for Index in range(0, len(HighlightRangesWords), 6):
 1204         DataLabel = HighlightRangesWords[Index].strip()
 1205         DataType = HighlightRangesWords[Index + 1].strip()
 1206         LowerBoundDataCriterion = HighlightRangesWords[Index + 2].strip()
 1207         LowerBoundDataValue = HighlightRangesWords[Index + 3].strip()
 1208         UpperBoundDataCriterion = HighlightRangesWords[Index + 4].strip()
 1209         UpperBoundDataValue = HighlightRangesWords[Index + 5].strip()
 1210 
 1211         SpecifiedSextet = "%s,%s,%s,%s,%s,%s" % (DataLabel, DataType, LowerBoundDataCriterion, LowerBoundDataValue, UpperBoundDataCriterion, UpperBoundDataValue)
 1212 
 1213         CanonicalDataLabel = DataLabel.lower()
 1214         if CanonicalDataLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]:
 1215             MiscUtil.PrintError("The data label specified, %s, using option \"--highlightRanges\" has already been used in \"--highlightValues\" option" % DataLabel)
 1216         
 1217         if not re.match("^(numeric|text)$", DataType, re.I):
 1218             MiscUtil.PrintError("The data type, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option is not valid. Supported values: numeric text." % (DataType, SpecifiedSextet))
 1219         
 1220         if not re.match("^(lt|le)$", LowerBoundDataCriterion, re.I):
 1221             MiscUtil.PrintError("The lower bound criterion, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option is not valid. Supported values: lt or le." % (LowerBoundDataCriterion, SpecifiedSextet))
 1222         
 1223         if not re.match("^(gt|ge)$", UpperBoundDataCriterion, re.I):
 1224             MiscUtil.PrintError("The upper bound criterion, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option is not valid. Supported values: gt or ge." % (UpperBoundDataCriterion, SpecifiedSextet))
 1225 
 1226         if re.match("^numeric$", DataType, re.I):
 1227             if not MiscUtil.IsNumber(LowerBoundDataValue):
 1228                 MiscUtil.PrintError("The lower bound data value, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. It must be a number for \"%s\" data type." % (LowerBoundDataValue, SpecifiedSextet, DataType))
 1229             
 1230             if not MiscUtil.IsNumber(UpperBoundDataValue):
 1231                 MiscUtil.PrintError("The upper bound data value, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. It must be a number for \"%s\"data type." % (UpperBoundDataValue, SpecifiedSextet, DataType))
 1232             
 1233             if float(LowerBoundDataValue) >= float(UpperBoundDataValue):
 1234                 MiscUtil.PrintError("The lower bound data value, %s, must be less than upper bound value, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option." % (LowerBoundDataValue, UpperBoundDataValue,  SpecifiedSextet))
 1235             
 1236             LowerBoundDataValue = float(LowerBoundDataValue)
 1237             UpperBoundDataValue = float(UpperBoundDataValue)
 1238         else:
 1239             if LowerBoundDataValue >= UpperBoundDataValue:
 1240                 MiscUtil.PrintError("The lower bound data value, %s, must be less than upper bound value, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. It must be a number for data type, %s" % (LowerBoundDataValue, UpperBoundDataValue,  SpecifiedSextet, DataType))
 1241 
 1242         # Track values...
 1243         if DataLabel in OptionsInfo["HighlightRangesLabelsMap"]:
 1244             MiscUtil.PrintError("The data label, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. Multiple occurences of data label" % (DataLabel, SpecifiedSextet))
 1245 
 1246         OptionsInfo["HighlightRangesLabels"].append(DataLabel)
 1247         OptionsInfo["HighlightRangesLabelsMap"][DataLabel]  = DataLabel
 1248         OptionsInfo["HighlightRangesCanonicalLabelsMap"][CanonicalDataLabel]  = DataLabel
 1249         
 1250         OptionsInfo["HighlightRangesTypesMap"][DataLabel]  = DataType
 1251         
 1252         OptionsInfo["HighlightRangesCriteriaLowerMap"][DataLabel]  = LowerBoundDataCriterion 
 1253         OptionsInfo["HighlightRangesCriteriaLowerValuesMap"][DataLabel]  = LowerBoundDataValue
 1254         OptionsInfo["HighlightRangesCriteriaUpperMap"][DataLabel]  = UpperBoundDataCriterion 
 1255         OptionsInfo["HighlightRangesCriteriaUpperValuesMap"][DataLabel]  = UpperBoundDataValue
 1256 
 1257 def ProcessHighlightValuesClassesOption():
 1258     """Process highlight values classes option"""
 1259     
 1260     OptionsInfo["HighlightClasses"]  = None
 1261     OptionsInfo["HighlightClassesRules"]  = None
 1262     OptionsInfo["HighlightClassesSynonymsMap"]  = None
 1263     OptionsInfo["HighlightClassesRandom"]  = False
 1264 
 1265     OptionsInfo["HighlightClassesLabels"]  = []
 1266     OptionsInfo["HighlightClassesLabelsMap"]  = {}
 1267     OptionsInfo["HighlightClassesCanonicalLabelsMap"]  = {}
 1268     
 1269     OptionsInfo["HighlightClassesTypesMap"]  = {}
 1270     OptionsInfo["HighlightClassesCriteriaMap"]  = {}
 1271     OptionsInfo["HighlightClassesCriteriaValuesMap"]  = {}
 1272     
 1273     HighlightClasses = Options["--highlightValuesClasses"].strip()
 1274     if re.match("^None$", HighlightClasses, re.I):
 1275         return
 1276 
 1277     OptionsInfo["HighlightClasses"]  = HighlightClasses
 1278     
 1279     if re.match("^RuleOf5$", HighlightClasses, re.I):
 1280         HighlightClassessRules = "MolecularWeight,numeric,le,500,HydrogenBondDonors,numeric,le,5,HydrogenBondAcceptors,numeric,le,10,LogP,numeric,le,5"
 1281     elif re.match("^RuleOf3$", HighlightClasses, re.I):
 1282         HighlightClassessRules = "MolecularWeight,numeric,le,300,HydrogenBondDonors,numeric,le,3,HydrogenBondAcceptors,numeric,le,3,LogP,numeric,le,3,RotatableBonds,numeric,le,3,TPSA,numeric,le,60"
 1283     elif re.match("^DrugLike$", HighlightClasses, re.I):
 1284         HighlightClassessRules = "MolecularWeight,numeric,le,500,HydrogenBondDonors,numeric,le,5,HydrogenBondAcceptors,numeric,le,10,LogP,numeric,le,5,RotatableBonds,numeric,le,10,TPSA,numeric,le,140"
 1285     elif re.match("^Random$", HighlightClasses, re.I):
 1286         if OptionsInfo["HighlightValues"] is not None:
 1287             MiscUtil.PrintError("The value specified, %s, using option \"--highlightValuesClasses\" is not allowed in conjunction with \"--highlightValues\" option." % HighlightClasses)
 1288         if OptionsInfo["HighlightRanges"] is not None:
 1289             MiscUtil.PrintError("The value specified, %s, using option \"--highlightValuesClasses\" is not allowed in conjunction with \"--highlightRanges\" option ." % HighlightClasses)
 1290             
 1291         OptionsInfo["HighlightClassesRandom"]  = True
 1292         return
 1293     else:
 1294         MiscUtil.PrintError("The value specified, %d, using option \"--highlightValuesClasses\" is not supported." % HighlightClasses)
 1295         return
 1296         
 1297     OptionsInfo["HighlightClassesRules"]  = HighlightClassessRules
 1298 
 1299     # Process rules for highlighting values...
 1300     HighlightClassesWords = HighlightClassessRules.split(",")
 1301     for Index in range(0, len(HighlightClassesWords), 4):
 1302         DataLabel = HighlightClassesWords[Index].strip()
 1303         DataType = HighlightClassesWords[Index + 1].strip()
 1304         DataCriterion = HighlightClassesWords[Index + 2].strip()
 1305         DataValue = HighlightClassesWords[Index + 3].strip()
 1306         
 1307         DataValue = float(DataValue)
 1308 
 1309         if DataLabel in OptionsInfo["HighlightClassesLabelsMap"]:
 1310             MiscUtil.PrintWarning("Ignoring duplicate datalabel, %s, specified in highlighting values rule for class, %s, in \"--highlightClassesValue\" option..." % (DataLabel, HighlightClasses))
 1311             continue
 1312             
 1313         OptionsInfo["HighlightClassesLabels"].append(DataLabel)
 1314         OptionsInfo["HighlightClassesLabelsMap"][DataLabel] = DataLabel
 1315         
 1316         OptionsInfo["HighlightClassesTypesMap"][DataLabel] = DataType
 1317         OptionsInfo["HighlightClassesCriteriaMap"][DataLabel] = DataCriterion
 1318         OptionsInfo["HighlightClassesCriteriaValuesMap"][DataLabel] = DataValue
 1319     
 1320     # Set up synonyms for data labels corresponding to physicochemical properties
 1321     # calculated by MayaChemTools and RDKit...
 1322     OptionsInfo["HighlightClassesSynonymsMap"]  = {}
 1323     OptionsInfo["HighlightClassesSynonymsMap"]["MolecularWeight"]  = ["MolecularWeight", "MolWt"]
 1324     OptionsInfo["HighlightClassesSynonymsMap"]["HydrogenBondDonors"]  = ["HydrogenBondDonors", "NHOHCount"]
 1325     OptionsInfo["HighlightClassesSynonymsMap"]["HydrogenBondAcceptors"]  = ["HydrogenBondAcceptors", "NOCount"]
 1326     OptionsInfo["HighlightClassesSynonymsMap"]["LogP"]  = ["SLogP", "MolLogP"]
 1327     OptionsInfo["HighlightClassesSynonymsMap"]["RotatableBonds"]  = ["RotatableBonds", "NumRotatableBonds"]
 1328     OptionsInfo["HighlightClassesSynonymsMap"]["TPSA"]  = ["TPSA", "TPSA"]
 1329 
 1330 def ProcessHighlightColorsOption():
 1331     """Process highlight colors option"""
 1332 
 1333     OptionsInfo["HighlightColors"] = None
 1334     OptionsInfo["HighlightColorsType"] = None
 1335     OptionsInfo["HighlightColorsList"] = None
 1336 
 1337     HighlightColors = "colorclass,table-success, table-danger"
 1338     if not re.match("^auto$", Options["--highlightColors"], re.I):
 1339         HighlightColors = Options["--highlightColors"].strip()
 1340         if MiscUtil.IsEmpty(HighlightColors):
 1341             MiscUtil.PrintError("The value specified using \"--highlightColors\" is empty.")
 1342 
 1343     OptionsInfo["HighlightColors"] = re.sub(" ", "", HighlightColors)
 1344     HighlightColorsList = [Color.lower() for Color in OptionsInfo["HighlightColors"].split(",")]
 1345         
 1346     if len(HighlightColorsList) != 3:
 1347         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightColors\" option must be 3." % (len(HighlightColorsList)))
 1348         
 1349     ColorsType, Color1, Color2 = HighlightColorsList
 1350     if not re.match("^(colorclass|colorspec)$", ColorsType, re.I):
 1351         MiscUtil.PrintError("The color type, %s, specified using \"--highlightColors\" option is not valid. Supported values: colorclass or colorspec." % ColorsType)
 1352 
 1353     ColorsList = [Color1, Color2]
 1354     if re.match("^colorclass$", ColorsType, re.I):
 1355         CheckOptionTableClassColorValues("--highlightColors", ColorsList)
 1356         
 1357     OptionsInfo["HighlightColorsList"] = ColorsList
 1358     OptionsInfo["HighlightColorsType"] = ColorsType
 1359 
 1360 def ProcessHighlightColorsRangesOption():
 1361     """Process highlight colors ranges option"""
 1362 
 1363     OptionsInfo["HighlightColorsRanges"] = None
 1364     OptionsInfo["HighlightColorsRangesType"] = None
 1365     OptionsInfo["HighlightColorsRangesList"] = None
 1366 
 1367     HighlightColors = "colorclass,table-success, table-warning, table-danger"
 1368     if not re.match("^auto$", Options["--highlightColorsRanges"], re.I):
 1369         HighlightColors = Options["--highlightColorsRanges"].strip()
 1370         if MiscUtil.IsEmpty(HighlightColors):
 1371             MiscUtil.PrintError("The value specified using \"--highlightColorsRanges\" is empty.")
 1372 
 1373     OptionsInfo["HighlightColorsRanges"] = re.sub(" ", "", HighlightColors)
 1374     HighlightColorsList = [Color.lower() for Color in OptionsInfo["HighlightColorsRanges"].split(",")]
 1375         
 1376     if len(HighlightColorsList) != 4:
 1377         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightColorsRanges\" option must be 4." % (len(HighlightColorsList)))
 1378     
 1379     ColorsType, Color1, Color2, Color3 = HighlightColorsList
 1380     if not re.match("^(colorclass|colorspec)$", ColorsType, re.I):
 1381         MiscUtil.PrintError("The color type, %s, specified using \"--highlightColorsRanges\" option is not valid. Supported values: colorclass or colorspec." % ColorsType)
 1382     
 1383     ColorsList = [Color1, Color2, Color3]
 1384     if re.match("^colorclass$", ColorsType, re.I):
 1385         CheckOptionTableClassColorValues("--highlightColorsRanges", ColorsList)
 1386     
 1387     OptionsInfo["HighlightColorsRangesList"] = ColorsList
 1388     OptionsInfo["HighlightColorsRangesType"] = ColorsType
 1389 
 1390 def ProcessHighlightColorsRandomOption():
 1391     """Process highlight colors random option"""
 1392 
 1393     OptionsInfo["HighlightColorsRandom"] = None
 1394     OptionsInfo["HighlightColorsRandomType"] = None
 1395     OptionsInfo["HighlightColorsRandomList"] = None
 1396 
 1397     HighlightColors = "colorclass,table-primary,table-success,table-danger,table-info,table-warning,table-secondary"
 1398     if not re.match("^auto$", Options["--highlightColorsRandom"], re.I):
 1399         HighlightColors = Options["--highlightColorsRandom"].strip()
 1400         if MiscUtil.IsEmpty(HighlightColors):
 1401             MiscUtil.PrintError("The value specified using \"--highlightColorsRandom\" is empty.")
 1402 
 1403     OptionsInfo["HighlightColorsRandom"] = re.sub(" ", "", HighlightColors)
 1404     HighlightColorsList = [Color.lower() for Color in OptionsInfo["HighlightColorsRandom"].split(",")]
 1405         
 1406     if len(HighlightColorsList) <= 1:
 1407         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightColorsRandom\" option must be > 1." % (len(HighlightColorsList)))
 1408         
 1409     ColorsType = HighlightColorsList[0]
 1410     ColorsList = HighlightColorsList[1:]
 1411     
 1412     if not re.match("^(colorclass|colorspec)$", ColorsType, re.I):
 1413         MiscUtil.PrintError("The color type, %s, specified using \"--highlightColorsRandim\" option is not valid. Supported values: colorclass or colorspec." % ColorsType)
 1414 
 1415     if re.match("^colorclass$", ColorsType, re.I):
 1416         CheckOptionTableClassColorValues("--highlightColorsRandom", ColorsList)
 1417         
 1418     OptionsInfo["HighlightColorsRandomList"] = ColorsList
 1419     OptionsInfo["HighlightColorsRandomType"] = ColorsType
 1420 
 1421 def CheckOptionTableClassColorValues(OptionName, ColorsList):
 1422     """Check names of table color classes and issue a warning for unknown names."""
 1423 
 1424     TableClassColors = ["thead-dark", "thead-light", "table-primary", "table-success", "table-danger", "table-info", "table-warning", "table-active", "table-secondary", "table-light", "table-dark", "bg-primary", "bg-success", "bg-danger",  "bg-info", "bg-warning", "bg-secondary", "bg-dark", "bg-light"]
 1425 
 1426     for Color in ColorsList:
 1427         if not Color in TableClassColors:
 1428             MiscUtil.PrintWarning("The color class name, %s, specified using option \"%s\" appears to be a unknown name..." % (Color, OptionName))
 1429         
 1430 def ProcessOptions():
 1431     """Process and validate command line arguments and options"""
 1432     
 1433     MiscUtil.PrintInfo("Processing options...")
 1434     
 1435     # Validate options...
 1436     ValidateOptions()
 1437     
 1438     OptionsInfo["Infile"] = Options["--infile"]
 1439     OptionsInfo["Outfile"] = Options["--outfile"]
 1440     OptionsInfo["Overwrite"] = Options["--overwrite"]
 1441     
 1442     # No need for any RDKit specific --outfileParams....
 1443     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], OptionsInfo["Infile"])
 1444 
 1445     AlignmentSMARTSPattern = None
 1446     if not re.match("^None$", Options["--alignmentSMARTS"], re.I):
 1447         AlignmentSMARTSPattern = Options["--alignmentSMARTS"]
 1448     OptionsInfo["AlignmentSMARTSPattern"]  = AlignmentSMARTSPattern
 1449     
 1450     Compute2DCoords = True
 1451     if re.match("^no$", Options["--compute2DCoords"], re.I):
 1452         Compute2DCoords = False
 1453     OptionsInfo["Compute2DCoords"]  = Compute2DCoords
 1454 
 1455     CounterCol = True
 1456     if re.match("^no$", Options["--counterCol"], re.I):
 1457         CounterCol = False
 1458     OptionsInfo["CounterCol"]  = CounterCol
 1459     
 1460     ColVisibility = True
 1461     if re.match("^no$", Options["--colVisibility"], re.I):
 1462         ColVisibility = False
 1463     OptionsInfo["ColVisibility"]  = ColVisibility
 1464     
 1465     OptionsInfo["ColVisibilityCtrlMax"]  = int(Options["--colVisibilityCtrlMax"])
 1466     
 1467     Footer = None
 1468     if not re.match("^None$", Options["--footer"], re.I):
 1469         Footer = Options["--footer"]
 1470     OptionsInfo["Footer"]  = Footer
 1471 
 1472     FooterClass = Options["--footerClass"].strip()
 1473     if MiscUtil.IsEmpty(FooterClass):
 1474         MiscUtil.PrintError("The value specified using option \"--footerClass\" is empty.")
 1475     OptionsInfo["FooterClass"]  = FooterClass
 1476     
 1477     FreezeCols = True
 1478     if re.match("^no$", Options["--freezeCols"], re.I):
 1479         FreezeCols = False
 1480     OptionsInfo["FreezeCols"]  = FreezeCols
 1481     
 1482     Header = None
 1483     if not re.match("^None$", Options["--header"], re.I):
 1484         Header = Options["--header"]
 1485     OptionsInfo["Header"]  = Header
 1486     
 1487     HeaderStyle = Options["--headerStyle"].strip()
 1488     if MiscUtil.IsEmpty(HeaderStyle):
 1489         MiscUtil.PrintError("The value specified using option \"--headerStyle\" is empty.")
 1490     OptionsInfo["HeaderStyle"]  = HeaderStyle
 1491 
 1492     ProcessHighlightSMARTSOption()
 1493     ProcessHighlightDataOptions()
 1494     
 1495     OptionsInfo["KeysNavigation"] = True
 1496     if re.match("^no$", Options["--keysNavigation"], re.I):
 1497         OptionsInfo["KeysNavigation"] = False
 1498     
 1499     SizeValues = Options["--molImageSize"].split(",")
 1500     OptionsInfo["MolImageWidth"] = int(SizeValues[0])
 1501     OptionsInfo["MolImageHeight"] = int(SizeValues[1])
 1502     
 1503     OptionsInfo["MolImageEncoded"] = True
 1504     if re.match("^no$", Options["--molImageEncoded"], re.I):
 1505         OptionsInfo["MolImageEncoded"] = False
 1506     
 1507     OptionsInfo["Paging"] = True
 1508     if re.match("^no$", Options["--paging"], re.I):
 1509         OptionsInfo["Paging"] = False
 1510     
 1511     PagingType = Options["--pagingType"]
 1512     if not re.match("^(numbers|simple|simple_numbers|full|full_numbers|simple_number)$", Options["--pagingType"], re.I):
 1513         MiscUtil.PrintWarning("The paging type name, %s, specified using option \"--pagingType\" appears to be a unknown type..." % (PagingType))
 1514     OptionsInfo["PagingType"] = PagingType.lower()
 1515     
 1516     OptionsInfo["PageLength"] = int(Options["--pageLength"])
 1517     
 1518     OptionsInfo["RegexSearch"] = True
 1519     if re.match("^no$", Options["--regexSearch"], re.I):
 1520         OptionsInfo["RegexSearch"] = False
 1521     
 1522     OptionsInfo["ShowMolName"] = True
 1523     OptionsInfo["ShowMolNameDataLabel"] = "Name"
 1524     if re.match("^no$", Options["--showMolName"], re.I):
 1525         OptionsInfo["ShowMolName"] = False
 1526     
 1527     OptionsInfo["ShowMolNameAuto"] = True if re.match("^auto$", Options["--showMolName"], re.I) else False
 1528 
 1529     OptionsInfo["ScrollX"] = True
 1530     if re.match("^no$", Options["--scrollX"], re.I):
 1531         OptionsInfo["ScrollX"] = False
 1532         
 1533     OptionsInfo["ScrollY"] = True
 1534     if re.match("^no$", Options["--scrollY"], re.I):
 1535         OptionsInfo["ScrollY"] = False
 1536 
 1537     OptionsInfo["ScrollYSize"] = Options["--scrollYSize"]
 1538     if re.match("vh$", Options["--scrollYSize"], re.I):
 1539         ScrollYSize = int(re.sub("vh$", "", Options["--scrollYSize"]))
 1540         if ScrollYSize <= 0:
 1541             MiscUtil.PrintError("The value specified, %s, for option \"--scrollYSize\" is not valid. Supported value: > 0 followed by \"vh\"" % Options["--scrollYSize"])
 1542     
 1543     TableStyle = None
 1544     if not re.match("^None$", Options["--tableStyle"], re.I):
 1545         if re.match("^All$", Options["--tableStyle"], re.I):
 1546             TableStyle = "table table-striped table-bordered table-hover table-dark"
 1547         else:
 1548             TableStyle = re.sub(" ", "", Options["--tableStyle"])
 1549             for Style in [Style for Style in TableStyle.split(",")]:
 1550                 if not re.match("^(table|table-striped|table-bordered|table-hover|table-dark|table-sm)$", Style, re.I):
 1551                     MiscUtil.PrintWarning("The table style name, %s, specified using option \"-t, --tableStyle\" appears to be a unknown style..." % (Style))
 1552             TableStyle = re.sub(",", " ", TableStyle.lower())
 1553     OptionsInfo["TableStyle"]  = TableStyle
 1554 
 1555     TableHeaderStyle = None
 1556     if not re.match("^None$", Options["--tableHeaderStyle"], re.I):
 1557         TableHeaderStyle = Options["--tableHeaderStyle"]
 1558         TableHeaderStyle = TableHeaderStyle.lower()
 1559         CheckOptionTableClassColorValues("--tableHeaderStyle", [TableHeaderStyle])
 1560     OptionsInfo["TableHeaderStyle"]  = TableHeaderStyle
 1561     
 1562     OptionsInfo["TableFooter"] = True
 1563     if re.match("^no$", Options["--tableFooter"], re.I):
 1564         OptionsInfo["TableFooter"] = False
 1565 
 1566     OptionsInfo["WrapText"] = True
 1567     if re.match("^no$", Options["--wrapText"], re.I):
 1568         OptionsInfo["WrapText"] = False
 1569 
 1570     OptionsInfo["WrapTextWidth"] = int(Options["--wrapTextWidth"])
 1571 
 1572 def RetrieveOptions():
 1573     """Retrieve command line arguments and options"""
 1574     
 1575     # Get options...
 1576     global Options
 1577     Options = docopt(_docoptUsage_)
 1578     
 1579     # Set current working directory to the specified directory...
 1580     WorkingDir = Options["--workingdir"]
 1581     if WorkingDir:
 1582         os.chdir(WorkingDir)
 1583     
 1584     # Handle examples option...
 1585     if "--examples" in Options and Options["--examples"]:
 1586         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1587         sys.exit(0)
 1588 
 1589 def ValidateOptions():
 1590     """Validate option values"""
 1591     
 1592     if not re.match("^None$", Options["--alignmentSMARTS"], re.I):
 1593         PatternMol = Chem.MolFromSmarts(Options["--alignmentSMARTS"])
 1594         if PatternMol is None:
 1595             MiscUtil.PrintError("The value specified, %s, using option \"--alignmentSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--alignmentSMARTS"])
 1596     
 1597     MiscUtil.ValidateOptionTextValue("-c, --compute2DCoords", Options["--compute2DCoords"], "yes no auto")
 1598     
 1599     MiscUtil.ValidateOptionTextValue("--counterCol", Options["--counterCol"], "yes no")
 1600     MiscUtil.ValidateOptionTextValue("--colVisibility", Options["--colVisibility"], "yes no")
 1601     MiscUtil.ValidateOptionIntegerValue("--colVisibilityCtrlMax", Options["--colVisibilityCtrlMax"], {">": 0})
 1602     
 1603     MiscUtil.ValidateOptionTextValue("--freezeCols", Options["--freezeCols"], "yes no")
 1604     MiscUtil.ValidateOptionTextValue("--highlightValuesClasses", Options["--highlightValuesClasses"], "RuleOf5 RuleOf3 DrugLike Random None")
 1605     
 1606     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1607     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt")
 1608 
 1609     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "html")
 1610     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1611     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1612     
 1613     MiscUtil.ValidateOptionTextValue("-k, --keysNavigation", Options["--keysNavigation"], "yes no")
 1614     
 1615     MiscUtil.ValidateOptionNumberValues("-m, --molImageSize", Options["--molImageSize"], 2, ",", "integer", {">": 0})
 1616     MiscUtil.ValidateOptionTextValue("--molImageEncoded", Options["--molImageEncoded"], "yes no")
 1617     
 1618     MiscUtil.ValidateOptionTextValue("-p, --paging", Options["--paging"], "yes no")
 1619     MiscUtil.ValidateOptionIntegerValue("--pageLength", Options["--pageLength"], {">": 0})
 1620     MiscUtil.ValidateOptionTextValue("-r, --regexSearch", Options["--regexSearch"], "yes no")
 1621     
 1622     MiscUtil.ValidateOptionTextValue("--showMolName", Options["--showMolName"], "yes no auto")
 1623     
 1624     MiscUtil.ValidateOptionTextValue("--scrollX", Options["--scrollX"], "yes no")
 1625     MiscUtil.ValidateOptionTextValue("--scrollY", Options["--scrollY"], "yes no")
 1626     if not re.search("vh$", Options["--scrollYSize"], re.I):
 1627         MiscUtil.ValidateOptionIntegerValue("--scrollYSize", Options["--scrollYSize"], {">": 0})
 1628     
 1629     MiscUtil.ValidateOptionTextValue("--tableFooter", Options["--tableFooter"], "yes no")
 1630     
 1631     MiscUtil.ValidateOptionTextValue("--wrapText", Options["--wrapText"], "yes no")
 1632     MiscUtil.ValidateOptionIntegerValue("--wrapTextWidth", Options["--wrapTextWidth"], {">": 0})
 1633 
 1634 # Setup a usage string for docopt...
 1635 _docoptUsage_ = """
 1636 RDKitDrawMoleculesAndDataTable.py - Generate a HTML data table
 1637 
 1638 Usage:
 1639     RDKitDrawMoleculesAndDataTable.py [--alignmentSMARTS <SMARTS>]
 1640                              [--compute2DCoords <yes or  no>] [--counterCol <yes or no>]
 1641                              [--colVisibility <yes or no>] [--colVisibilityCtrlMax <number>] [--footer <text>]
 1642                              [--footerClass <text>] [--freezeCols <yes or no>] [--header <text>]
 1643                              [--headerStyle <text>] [--highlightSMARTS <SMARTS,...>] [--highlightSMARTSDelim <text>]
 1644                              [--highlightValues <datalabel,datatype,criterion,value,...>]
 1645                              [--highlightValuesRanges <datalabel,datatype,criterion1,vaue1,criterion2,value2...>]
 1646                              [--highlightValuesClasses <RuleOf5,RuleOf3,...>]
 1647                              [--highlightColors <colortype,color1,color2>]
 1648                              [--highlightColorsRanges <colortype,color1,color2,color3>]
 1649                              [--highlightColorsRandom <colottype,color1,color2,...>]
 1650                              [--infileParams <Name,Value,...>] [--keysNavigation <yes or no>]
 1651                              [--molImageSize <width,height>] [--molImageEncoded <yes or no> ] [--overwrite]
 1652                              [--paging <yes or no>] [--pagingType <numbers,simple, ...>] [--pageLength <number>]
 1653                              [--regexSearch <yes or no>] [--showMolName <yes or no>]
 1654                              [--scrollX <yes or no>] [--scrollY <yes or no>] [--scrollYSize <number>]
 1655                              [--tableStyle <table,table-striped,...>] [--tableFooter <yes or no>]
 1656                              [--tableHeaderStyle <thead-dark,thead-light,...>] [--wrapText <yes or no>] 
 1657                              [--wrapTextWidth <number>] [-w <dir>] -i <infile> -o <outfile>
 1658     RDKitDrawMoleculesAndDataTable.py -h | --help | -e | --examples
 1659 
 1660 Description:
 1661     Generate an interactive HTML table with columns corresponding to molecules
 1662     and available alphanumerical data in an input file. The drawing of molecules are
 1663     embedded in the columns as in line SVG images.
 1664 
 1665     The interactive HTML table may contain multiple columns with drawing of
 1666     molecules. These columns are automatically generated for each data field in SD
 1667     file or a column name in SMILES and CSV/TSV file containing SMILES
 1668     string in their names. The first molecular drawing column in the HTML table
 1669     represents primary molecular structure data available in an input file. It
 1670     corresponds to MOL block is SD file or a first column containing SMILES string
 1671     in its name in SMILES and CSV/TSV files.
 1672  
 1673     The interactive table requires internet access for viewing in a browser and
 1674     employs the following frameworks: JQuery, Bootstrap, and DataTable. It provides
 1675     the following functionality: sorting by columns, page length control, page 
 1676     navigation, searching data with regular expressions, and horizontal/vertical
 1677     scrolling, row highlighting during hovering, a counter column, freezing of primary
 1678     structure and counter columns, and column visibility control.
 1679 
 1680     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi),
 1681     CSV/TSV (.csv, .tsv, .txt)
 1682 
 1683     The supported output file format is HTML (.html).
 1684 
 1685 Options:
 1686     -a, --alignmentSMARTS <SMARTS>  [default: none]
 1687         SMARTS pattern for aligning molecules to a common template. This option is
 1688         only used for primary molecular data in SD, SMILES and CSV/TSV files. It is 
 1689         ignored for all other molecular coordinates corresponding to data fields in SD
 1690         file or columns in SMILES and CSV/TSV files containing SMILES string in their
 1691         names.
 1692     -c, --compute2DCoords <yes or no>  [default: yes]
 1693         Compute 2D coordinates of molecules before drawing. Default: yes for SMILES
 1694         strings in SMILES, CSV/TSV, and SD file data fields. In addition, 2D coordinated are
 1695         always calculated for molecules corresponding to data fields in SD file or columns
 1696         in SMILES and CSV/TSV files containing SMILES string in their names.
 1697     --counterCol <yes or no>  [default: yes]
 1698         Show a counter column as the first column in the table. It contains the position
 1699         for each row in the table.
 1700     --colVisibility <yes or no>  [default: yes]
 1701         Show a dropdown button to toggle visibility of columns in the table. The counter
 1702         and primary structure columns are excluded from the list.
 1703     --colVisibilityCtrlMax <number>  [default: 25]
 1704         Maximum number of columns to show in column visibility dropdown button. The
 1705         rest of the data columns are not listed in the dropdown and are shown in the table.
 1706         A word to the wise: The display of too many columns appear to hang interactive
 1707         Javascript framework for Bootstrap and DataTables.
 1708     --freezeCols <yes or no>  [default: yes]
 1709         Lock counter and primary structure columns in place during horizontal scrolling.
 1710     --footer <text>  [default: none]
 1711         Footer text to insert at the bottom of the HTML page after the table.
 1712     --footerClass <text>  [default: small text-center text-muted]
 1713         Footer class style to use with <p> tag.
 1714     -e, --examples
 1715         Print examples.
 1716     -h, --help
 1717         Print this help message.
 1718     --header <text>  [default: none]
 1719         Header text to insert at the top of the HTML page before the table.
 1720     --headerStyle <text>  [default: h5]
 1721         Header style to use. Possible values: h1 to h6.
 1722     --highlightSMARTS <SMARTS,...>  [default: none]
 1723         SMARTS pattern for highlighting atoms and bonds in molecules. All matched
 1724         substructures are highlighted.
 1725         
 1726         The SMARTS string is used to highlight atoms and bonds in drawing of
 1727         molecules present in a HTML table across multiple columns. These columns
 1728         correspond to data field labels in SD file or a column name in SMILES and
 1729         CSV/TSV file containing SMILES string in their names. The first molecular
 1730         drawing column in HTML table corresponds to primary molecular structure
 1731         data available in an input file. It is identified by a label 'Structure' across
 1732         all input formats.
 1733         
 1734         A single SMARTS string is used to highlight a common substructure across
 1735         all columns containing drawing of molecules in HTML table.
 1736         
 1737         Format:
 1738             
 1739             SMARTS
 1740             Structure,SMARTS1,DataLabel,SMARTS2,...
 1741             Structure,SMARTS1,Collabel,SMARTS2,...
 1742             
 1743         Example:
 1744             
 1745             c1ccccc1
 1746             Structure,c1ccccc1,SMILESR1,c1ccccc1,SMILESR2,c1ccccc1
 1747             
 1748     --highlightSMARTSDelim <text>  [default: ,]
 1749         Delimiter for parsing SMARTS patterns specified using '--highlightSMARTS'
 1750         option. Default: ',' comma character. Possible value: Any arbitrary text or
 1751         a valid character. You may use arbitrary text as a delimiter to handle
 1752         presence of special characters such as comma, semicolon, tilde etc. in
 1753         SMARTS patterns. 
 1754     --highlightValues <datalabel,datatype,criterion,value,...>  [default: none]
 1755         Highlighting methodology to use for highlighting  alphanumerical data
 1756         corresponding to data fields in SD file or column names in SMILES and
 1757         CSV/TSV text files.
 1758         
 1759         Input text contains these quartets: DataLabel, DataType, Criterion, Value.
 1760         Possible datatype values: numeric, text. Possible criterion values for numeric
 1761         and text: gt, lt, ge, le.
 1762         
 1763         The 'datalabel' corresponds to either data field label in SD file or column name
 1764         in SMILES and CSV/TSV text files.
 1765         
 1766         Examples:
 1767             
 1768             MolecularWeight,numeric,le,500
 1769             MolecularWeight,numeric,le,450,SLogP,numeric,le,5
 1770             Name,text,eq,Aspirin
 1771             Name,regex,eq,acid|amine
 1772             
 1773     --highlightValuesRanges <datalabel,datatype,...>  [default: none]
 1774         Highlighting methodology to use for highlighting ranges of alphanumerical
 1775         data corresponding to data fields in SD file or column names in SMILES and
 1776         CSV/TSV text files.
 1777         
 1778         Input text contains these sextets: DataLabel, DataType, CriterionLowerBound,
 1779         LowerBoundValue, CriterionUpperBound, UpperBoundValue.
 1780         
 1781         Possible datatype values: numeric or text. Possible criterion values: Lower
 1782         bound value - lt, le; Upper bound value: gt, ge.
 1783         
 1784         The 'datalabel' corresponds to either data field label in SD file or column name
 1785         in SMILES and CSV/TSV text files.
 1786         
 1787         Examples:
 1788             
 1789             MolecularWeight,numeric,lt,450,gt,1000
 1790             MolecularWeight,numeric,lt,450,gt,1000,SLogP,numeric,lt,0,gt,5
 1791             
 1792     --highlightValuesClasses <RuleOf5,RuleOf3,...>  [default: none]
 1793         Highlighting methodology to use for highlighting ranges of numerical data
 1794         data corresponding to specific set of data fields in SD file or column names in
 1795         SMILES and CSV/TSV text files. Possible values: RuleOf5, RuleOf3, DrugLike,
 1796         Random.
 1797         
 1798         The following value classes are supported: RuleOf5, RuleOf3, LeadLike, DrugLike.
 1799         LeadLike is equivalent to RuleOf3.
 1800         
 1801         Each supported class encompasses a specific set of data labels along with
 1802         appropriate criteria to compare and highlight column values, except for
 1803         'Random' class. The data labels in these classes are automatically associated
 1804         with appropriate data fields in SD file or column names in SMILES and CSV/TSV
 1805         text files.
 1806         
 1807         No data labels are associated with 'Random' class. It is used to highlight
 1808         available alphanumeric data by randomly selecting a highlight color from the
 1809         list of colors specified using '--highlightColorsRandom' option. The 'Random'
 1810         class value is not allowed in conjunction with '--highlightValues' or
 1811         '--highlightValuesRanges'.
 1812         
 1813         The rules to highlight values for the supported classes are as follows.
 1814         
 1815         RuleOf5 [ Ref 91 ]:
 1816          
 1817             MolecularWeight,numeric,le,500 (MolecularWeight <= 500)
 1818             HydrogenBondDonors,numeric,le,5 (HydrogenBondDonors <= 5)
 1819             HydrogenBondAcceptors,numeric,le,10 (HydrogenBondAcceptors <= 10)
 1820             LogP,numeric,le,5 (LogP <= 5)
 1821          
 1822         RuleOf3 or LeadLike [ Ref 92 ]:
 1823          
 1824             MolecularWeight,numeric,le,300 (MolecularWeight <= 300)
 1825             HydrogenBondDonors,numeric,le,3 (HydrogenBondDonors <= 3)
 1826             HydrogenBondAcceptors,numeric,le,3 (HydrogenBondAcceptors <= 3)
 1827             LogP,numeric,le,3 (LogP <= 3)
 1828             RotatableBonds,numeric,le,3 (RotatableBonds <= 3)
 1829             TPSA,numeric,le,60 (TPSA <= 60)
 1830          
 1831         DrugLike:
 1832          
 1833             MolecularWeight,numeric,le,500 (MolecularWeight <= 500)
 1834             HydrogenBondDonors,numeric,le,5 (HydrogenBondDonors <= 5)
 1835             HydrogenBondAcceptors,numeric,le,10 (HydrogenBondAcceptors <= 10)
 1836             LogP,numeric,le,5 (LogP <= 5)
 1837             RotatableBonds,numeric,le,10 (RotatableBonds <= 10)
 1838             TPSA,numeric,le,140 (TPSA <= 140)
 1839             
 1840         The following synonyms are automatically detected for data labels used
 1841         by MayaChemTools and RDKit packages during the calculation of
 1842         physicochemical properties.
 1843         
 1844         MayaChemTools: MolecularWeight, HydrogenBondDonors, HydrogenBondAcceptors,
 1845         SLogP, RotatableBonds, TPSA.
 1846             
 1847         RDKit: MolWt,  NHOHCount, NOCount, MolLogP, NumRotatableBonds, TPSA
 1848         
 1849     --highlightColors <colortype,color1,color2>  [default: auto]
 1850         Background colors used to highlight column values based on criterion
 1851         specified by '--highlightValues' and '--highlightColorsClasses' option. Default
 1852         value: colorclass,table-success, table-danger.
 1853         
 1854         The first color is used to highlight column values that satisfy the specified
 1855         criterion for the column. The second color highlights the rest of the values
 1856         in the column. 
 1857         
 1858         Possible values for colortype: colorclass or colorspec.
 1859         
 1860         Any valid bootstrap contextual color class is supported for 'colorclass'
 1861         color type. For example: table-primary (Blue), table-success (Green),
 1862         table-danger (Red), table-info (Light blue), table-warning (Orange),
 1863         table-secondary (Grey), table-light (Light grey), and  table-dark (Dark grey).
 1864         
 1865         The following bootstrap color classes may also used: bg-primary bg-success,
 1866         bg-danger bg-info, bg-warning, bg-secondary.
 1867         
 1868         Any valid color name or hexadecimal color specification is supported for
 1869         'colorspec' color type: For example: red, green, blue, #ff000, #00ff00, #0000ff.
 1870     --highlightColorsRanges <colortype,color1,color2,color3>  [default: auto]
 1871         Background colors used to highlight column values using criteria specified
 1872         by '--highlightValuesRanges' option. Default value:  colorclass, table-success,
 1873         table-warning, table-danger.
 1874         
 1875         The first and third color are used to highlight column values lower and higher
 1876         than the specified values for the lower and upper bound. The middle color highlights
 1877         the rest of the values in the column.
 1878         
 1879         The supported color type and values are explained in the section for '--highlightColors'.
 1880     --highlightColorsRandom <colortype,color1,color2,...>  [default: auto]
 1881         Background color list to use for randomly selecting a color  to highlight
 1882         column values during 'Random" value of '--highlightValuesClasses' option.
 1883         
 1884         Default value:  colorclass,table-primary,table-success,table-danger,table-info,
 1885         table-warning,table-secondary.
 1886         
 1887         The supported color type and values are explained in the section for '--highlightColors'.
 1888     -i, --infile <infile>
 1889         Input file name.
 1890     --infileParams <Name,Value,...>  [default: auto]
 1891         A comma delimited list of parameter name and value pairs for reading
 1892         molecules from files. The supported parameter names for different file
 1893         formats, along with their default values, are shown below:
 1894             
 1895             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 1896             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1897                 sanitize,yes
 1898             
 1899         Possible values for smilesDelimiter: space, comma or tab.
 1900     -k, --keysNavigation <yes or no>  [default: yes]
 1901         Provide Excel like keyboard cell navigation for the table.
 1902     -m, --molImageSize <width,height>  [default: 200,150]
 1903         Image size of a molecule in pixels.
 1904     --molImageEncoded <yes or no>  [default: yes]
 1905         Base64 encode SVG image of a molecule for inline embedding in a HTML page.
 1906         The inline SVG image may fail to display in browsers without encoding.
 1907     -o, --outfile <outfile>
 1908         Output file name.
 1909     --overwrite
 1910         Overwrite existing files.
 1911     -p, --paging <yes or no>  [default: yes]
 1912         Provide page navigation for browsing data in the table.
 1913     --pagingType <numbers, simple, ...>  [default: full_numbers]
 1914         Type of page navigation. Possible values: numbers, simple, simple_numbers,
 1915         full, full_numbers, or first_last_numbers.
 1916             
 1917             numbers - Page number buttons only
 1918             simple - 'Previous' and 'Next' buttons only
 1919             simple_numbers - 'Previous' and 'Next' buttons, plus page numbers
 1920             full - 'First', 'Previous', 'Next' and 'Last' buttons
 1921             full_numbers - 'First', 'Previous', 'Next' and 'Last' buttons, plus
 1922                 page numbers
 1923             first_last_numbers - 'First' and 'Last' buttons, plus page numbers
 1924             
 1925     --pageLength <number>  [default: 15]
 1926         Number of rows to show per page.
 1927     -r, --regexSearch <yes or no>  [default: yes]
 1928         Allow regular expression search through alphanumerical data in the table.
 1929     -s, --showMolName <yes or no>  [default: auto]
 1930         Show molecule names in a column next to the column corresponding to primary
 1931         structure data in SD and SMILES file. The default value is yes for SD and SMILES file.
 1932         This option is ignored for CSV/TSV text files.
 1933     --scrollX <yes or no>  [default: yes]
 1934         Provide horizontal scroll bar in the table as needed.
 1935     --scrollY <yes or no>  [default: yes]
 1936         Provide vertical scroll bar in the table as needed.
 1937     --scrollYSize <number>  [default: 75vh]
 1938         Maximum height of table viewport either in pixels or percentage of the browser
 1939         window height before providing a vertical scroll bar. Default: 75% of the height of
 1940         browser window.
 1941     -t, --tableStyle <table,table-striped,...>  [default: table,table-hover,table-sm]
 1942         Style of table. Possible values: table, table-striped, table-bordered,
 1943         table-hover, table-dark, table-sm, none, or All. Default: 'table,table-hover'. A
 1944         comma delimited list of any valid Bootstrap table styles is also supported.
 1945     --tableFooter <yes or no>  [default: yes]
 1946         Show column headers at the end of the table.
 1947     --tableHeaderStyle <thead-dark,thead-light,...>  [default: thead-dark]
 1948         Style of table header. Possible values: thead-dark, thead-light, or none.
 1949         The names of the following contextual color classes are also supported:
 1950         table-primary (Blue), table-success (Green), table-danger (Red), table-info
 1951         (Light blue), table-warning (Orange), table-active (Grey), table-light (Light
 1952         grey), and  table-dark (Dark grey).
 1953     -w, --workingdir <dir>
 1954         Location of working directory which defaults to the current directory.
 1955     --wrapText <yes or no>  [default: yes]
 1956         Wrap alphanumeric text using <br/> delimiter for display in a HTML table.
 1957     --wrapTextWidth <number>  [default: 40]
 1958         Maximum width in characters before wraping alphanumeric text for display
 1959         in a HTML table.
 1960 
 1961 Examples:
 1962     To generate a HTML table containing structure and alphanumeric data for
 1963     molecules in a SD file along with all the bells and whistles to interact with
 1964     the table, type:
 1965 
 1966         % RDKitDrawMoleculesAndDataTable.py -i Sample.sdf -o SampleOut.html
 1967 
 1968     To generate a HTML table containing structure and alphanumeric data for
 1969     molecules in a SMILES file along with all the bells and whistles to interact
 1970     with the table, type:
 1971 
 1972         % RDKitDrawMoleculesAndDataTable.py  -i Sample.smi -o SampleOut.html
 1973 
 1974     To generate a HTML table containing multiple structure columns for molecules
 1975     in a CSV file along with all the bells and whistles to interact with the table, type:
 1976 
 1977         % RDKitDrawMoleculesAndDataTable.py -i SampleSeriesRGroupsD3R.csv
 1978           -o SampleSeriesRGroupsD3ROut.html
 1979 
 1980     To generate a HTML table containing structure and alphanumeric data for
 1981     molecules in a SD file along without any bells and whistles to interact with
 1982     the table, type:
 1983 
 1984         % RDKitDrawMoleculesAndDataTable.py --colVisibility no --freezeCols no
 1985           --keysNavigation no --paging no --regexSearch no --scrollX no
 1986           --scrollY no -i Sample.sdf -o SampleOut.html
 1987 
 1988     To generate a HTML table containing structure and alphanumeric data for
 1989     molecules in a SD file along with highlighting molecular weight values
 1990     using a specified criterion, type:
 1991 
 1992         % RDKitDrawMoleculesAndDataTable.py  --highlightValues
 1993           "MolecularWeight,numeric,le,500" -i Sample.sdf -o SampleOut.html
 1994 
 1995     To generate a HTML table containing structure and alphanumeric data for
 1996     molecules in a SD file along with highlighting range of molecular weight values
 1997     using a specified criterion, type:
 1998 
 1999         % RDKitDrawMoleculesAndDataTable.py  --highlightValuesRanges
 2000           "MolecularWeight,numeric,lt,400,gt,500" -i Sample.sdf -o SampleOut.html
 2001 
 2002     To generate a HTML table containing structure and alphanumeric data for
 2003     molecules in a SD file along with highlighting molecular weight values and
 2004     ranges of SLogP values using a specified criterion and color schemes, type:
 2005 
 2006         % RDKitDrawMoleculesAndDataTable.py  --highlightValues
 2007           "MolecularWeight,numeric,le,500" --highlightValuesRanges
 2008           "SLogP,numeric,lt,0,gt,5" --highlightColors "colorclass,table-success,
 2009           table-danger" --highlightColorsRanges "colorclass,table-danger,
 2010           table-success,table-warning" -i Sample.sdf -o SampleOut.html
 2011 
 2012     To generate a HTML table containing structure and alphanumeric data for
 2013     molecules in a SD file along with highlighting RuleOf5 physicochemical
 2014     properties using a pre-defined set of criteria, type:
 2015 
 2016         % RDKitDrawMoleculesAndDataTable.py  --highlightValuesClasses RuleOf5
 2017           -i Sample.sdf -o SampleOut.html
 2018 
 2019     To generate a HTML table containing structure and alphanumeric data for
 2020     molecules in a SD file along with all the bells and whistles to interact
 2021     with the table and highlight a specific SMARTS pattern in molecules, type:
 2022 
 2023         % RDKitDrawMoleculesAndDataTable.py  --highlightSMARTS "c1ccccc1"
 2024           -i Sample.sdf -o SampleOut.html
 2025 
 2026     To generate a HTML table containing structure and alphanumeric data for
 2027     molecules in a SD file along with highlighting of values using random colors
 2028     from a default list of colors, type:
 2029 
 2030         % RDKitDrawMoleculesAndDataTable.py --highlightValuesClasses Random
 2031           -i Sample.sdf -o SampleOut.html
 2032 
 2033     To generate a HTML table containing structure and alphanumeric data for
 2034     molecules in a SD file along with highlighting of values using random colors
 2035     from a specified list of colors, type:
 2036 
 2037         % RDKitDrawMoleculesAndDataTable.py --highlightValuesClasses Random
 2038           --highlightColorsRandom "colorspec,Lavendar,MediumPurple,SkyBlue,
 2039           CornflowerBlue,LightGreen,MediumSeaGreen,Orange,Coral,Khaki,Gold,
 2040           Salmon,LightPink,Aquamarine,MediumTurquoise,LightGray" 
 2041           -i Sample.sdf -o SampleOut.html
 2042 
 2043     To generate a HTML table containing structure and alphanumeric data for
 2044     molecules in a SMILES file specific columns, type:
 2045 
 2046         % RDKitDrawMoleculesAndDataTable.py --infileParams "smilesDelimiter,
 2047           comma, smilesColumn,1,smilesNameColumn,2"
 2048           -i SampleSMILES.csv -o SampleOut.html
 2049 
 2050 Author:
 2051     Manish Sud(msud@san.rr.com)
 2052 
 2053 See also:
 2054     RDKitConvertFileFormat.py, RDKitDrawMolecules.py, RDKitRemoveDuplicateMolecules.py,
 2055     RDKitSearchFunctionalGroups.py, RDKitSearchSMARTS.py
 2056 
 2057 Copyright:
 2058     Copyright (C) 2021 Manish Sud. All rights reserved.
 2059 
 2060     The functionality available in this script is implemented using RDKit, an
 2061     open source toolkit for cheminformatics developed by Greg Landrum.
 2062 
 2063     This file is part of MayaChemTools.
 2064 
 2065     MayaChemTools is free software; you can redistribute it and/or modify it under
 2066     the terms of the GNU Lesser General Public License as published by the Free
 2067     Software Foundation; either version 3 of the License, or (at your option) any
 2068     later version.
 2069 
 2070 """
 2071 
 2072 if __name__ == "__main__":
 2073     main()