1 #!/bin/env python 2 # 3 # File: RDKitDrawMoleculesAndDataTable.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 import random 37 38 # RDKit imports... 39 try: 40 from rdkit import rdBase 41 from rdkit import Chem 42 from rdkit.Chem import AllChem 43 except ImportError as ErrMsg: 44 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 45 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 46 sys.exit(1) 47 48 # MayaChemTools imports... 49 try: 50 from docopt import docopt 51 import MiscUtil 52 import RDKitUtil 53 except ImportError as ErrMsg: 54 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 55 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 56 sys.exit(1) 57 58 ScriptName = os.path.basename(sys.argv[0]) 59 Options = {} 60 OptionsInfo = {} 61 62 def main(): 63 """Start execution of the script.""" 64 65 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 66 67 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 68 69 # Retrieve command line arguments and options... 70 RetrieveOptions() 71 72 # Process and validate command line arguments and options... 73 ProcessOptions() 74 75 # Perform actions required by the script... 76 GenerateMoleculesAndDataTable() 77 78 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 79 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 80 81 def GenerateMoleculesAndDataTable(): 82 """Generate a HTML table containing molecules and alphanumerical data.""" 83 84 # Retrieve data... 85 ValidMols = RetrieveMoleculesAndData() 86 87 # Setup data type map... 88 DataMap = IdentifyStructureAndNumericalData(ValidMols) 89 90 # Validate data labels used to specify highlighting data... 91 ValidateSpecifiedDataLabels(DataMap) 92 93 # Validate show molecule name option... 94 ValidateShowMolNameOption(DataMap) 95 96 # Compute 2D coordinates before alignment... 97 if OptionsInfo["Compute2DCoords"]: 98 MiscUtil.PrintInfo("\nComputing 2D coordinates for primary structure data...") 99 for Mol in ValidMols: 100 AllChem.Compute2DCoords(Mol) 101 102 # Perform alignment to a common template for primary molecular structure data... 103 PerformAlignment(ValidMols) 104 105 # Write out a HTML file... 106 WriteHTMLTableFile(ValidMols, DataMap) 107 108 def WriteHTMLTableFile(ValidMols, DataMap): 109 """Write out a HTML table file.""" 110 111 Outfile = OptionsInfo["Outfile"] 112 113 Writer = open(Outfile, "w") 114 if Writer is None: 115 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 116 117 MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile) 118 119 WriteHTMLPageHeader(Writer, DataMap) 120 WriteHTMLPageTitle(Writer) 121 122 WriteHTMLTableHeader(Writer) 123 WriteHTMLTableRows(Writer, ValidMols, DataMap) 124 WriteHTMLTableEnd(Writer) 125 126 WriteHTMLPageFooter(Writer) 127 WriteHTMLPageEnd(Writer) 128 129 if Writer is not None: 130 Writer.close() 131 132 def WriteHTMLTableRows(Writer, ValidMols, DataMap): 133 """Write out HTML table rows.""" 134 135 WriteTableHeaderRow(Writer, ValidMols, DataMap) 136 WriteTableDataRows(Writer, ValidMols, DataMap) 137 WriteTableFooterRow(Writer, ValidMols, DataMap) 138 139 def WriteTableDataRows(Writer, ValidMols, DataMap): 140 """Write out table data row.""" 141 142 Writer.write(""" <tbody>\n""") 143 144 MolCount = 0 145 for Mol in ValidMols: 146 MolCount += 1 147 Writer.write(""" <tr>\n""") 148 149 if OptionsInfo["CounterCol"]: 150 Writer.write(""" <td></td>\n""") 151 152 SetupPrimaryStructureTableData(Writer, Mol) 153 154 if OptionsInfo["ShowMolName"]: 155 MolName = RDKitUtil.GetMolName(Mol, MolCount) 156 WrappedMolName = MiscUtil.WrapText(MolName, "<br/>", OptionsInfo["WrapTextWidth"]) 157 Writer.write(""" <td>%s</td>\n""" % WrappedMolName) 158 159 # Set up rest of the data.. 160 AvailableDataLabelsMap = Mol.GetPropsAsDict(includePrivate = False, includeComputed = False) 161 for DataLabel in DataMap["DataLabels"]: 162 if not DataLabel in AvailableDataLabelsMap: 163 Writer.write(""" <td></td>\n""") 164 continue 165 166 # Check for empty value... 167 DataValue = "%s" % AvailableDataLabelsMap[DataLabel] 168 DataValue = DataValue.strip() 169 if MiscUtil.IsEmpty(DataValue): 170 Writer.write(""" <td></td>\n""") 171 continue 172 173 if DataMap["StructureDataMap"][DataLabel]: 174 SetupNonPrimaryStructureTableData(Writer, DataLabel, DataValue, DataMap) 175 else: 176 SetupAlphanumericTableData(Writer, DataLabel, DataValue, DataMap) 177 178 Writer.write(""" </tr>\n""") 179 180 Writer.write(""" </tbody>\n""") 181 182 def SetupPrimaryStructureTableData(Writer, Mol): 183 """Set up an inline SVG image for primary structure data for a table cell.""" 184 185 HightlightAtomList = SetupAtomListToHighlight(Mol, "Structure") 186 SVGImageTag = SetupMolInLineSVGImageTag(Mol, HightlightAtomList) 187 188 Writer.write(""" <td bgcolor="white"><%s></td>\n""" % SVGImageTag) 189 190 def SetupNonPrimaryStructureTableData(Writer, DataLabel, DataValue, DataMap): 191 """Set up an inline SVG image for non primary structure data cell.""" 192 193 WrappedDataValue = DataValue 194 if OptionsInfo["WrapText"]: 195 WrappedDataValue = MiscUtil.WrapText(DataValue, "<br/>", OptionsInfo["WrapTextWidth"]) 196 197 if DataMap["SMILESDataMap"][DataLabel]: 198 Mol = Chem.MolFromSmiles(DataValue, sanitize = False) 199 Mol.UpdatePropertyCache(strict = False) 200 else: 201 MiscUtil.PrintWarning("\nIgnoring uknown structure data column type with column label %s: %s\n" % (DataLabel, DataValue)) 202 Writer.write(""" <td>%s</td>\n""" % WrappedDataValue) 203 return 204 205 if Mol is None: 206 MiscUtil.PrintWarning("\nSMILES parsing failed for data label %s: %s\n" % (DataLabel, DataValue)) 207 Writer.write(""" <td>%s</td>\n""" % WrappedDataValue) 208 return 209 elif not Mol.GetNumHeavyAtoms(): 210 Writer.write(""" <td>%s</td>\n""" % WrappedDataValue) 211 return 212 elif AllChem.Compute2DCoords(Mol) < 0: 213 Writer.write(""" <td>%s</td>\n""" % WrappedDataValue) 214 return 215 216 HightlightAtomList = SetupAtomListToHighlight(Mol, DataLabel) 217 SVGImageTag = SetupMolInLineSVGImageTag(Mol, HightlightAtomList) 218 219 Writer.write(""" <td bgcolor="white"><%s></td>\n""" % SVGImageTag) 220 221 def SetupAlphanumericTableData(Writer, DataLabel, DataValue, DataMap): 222 """Set up alphanumeric data.""" 223 224 BackgroundColor, BackgroundColorType = GetAlphanumeircValueHighlightBackgroundColor(DataLabel, DataValue, DataMap) 225 SetupAlphanumericTableDataValue(Writer, DataValue, BackgroundColor, BackgroundColorType) 226 227 def WriteTableHeaderRow(Writer, ValidMols, DataMap): 228 """Write out table header row.""" 229 230 TableHeaderStyle = OptionsInfo["TableHeaderStyle"] 231 if TableHeaderStyle is None: 232 Writer.write(""" <thead>\n""") 233 Writer.write(""" <tr>\n""") 234 elif re.match("^(thead|table)", TableHeaderStyle): 235 Writer.write(""" <thead class="%s">\n""" % TableHeaderStyle) 236 Writer.write(""" <tr>\n""") 237 else: 238 Writer.write(""" <thead>\n""") 239 Writer.write(""" <tr bgcolor="%s"\n""" % TableHeaderStyle) 240 241 if OptionsInfo["CounterCol"]: 242 Writer.write(""" <th></th>\n""") 243 Writer.write(""" <th>Structure</th>\n""") 244 if OptionsInfo["ShowMolName"]: 245 Writer.write(""" <th>%s</th>\n""" % OptionsInfo["ShowMolNameDataLabel"]) 246 247 # Write out rest of the column headers... 248 for DataLabel in DataMap["DataLabels"]: 249 Writer.write(""" <th>%s</th>\n""" % DataLabel) 250 251 Writer.write(""" </tr>\n""") 252 Writer.write(""" </thead>\n""") 253 254 def WriteTableFooterRow(Writer, ValidMols, DataMap): 255 """Write out table footer row.""" 256 257 if not OptionsInfo["TableFooter"]: 258 return 259 260 Writer.write(""" <tfoot>\n""") 261 Writer.write(""" <tr>\n""") 262 263 if OptionsInfo["CounterCol"]: 264 Writer.write(""" <td></td>\n""") 265 Writer.write(""" <td>Structure</td>\n""") 266 if OptionsInfo["ShowMolName"]: 267 Writer.write(""" <td>%s</td>\n""" % OptionsInfo["ShowMolNameDataLabel"]) 268 269 # Write out rest of the column headers... 270 for DataLabel in DataMap["DataLabels"]: 271 Writer.write(""" <td>%s</td>\n""" % DataLabel) 272 273 Writer.write(""" </tr>\n""") 274 Writer.write(""" </tfoot>\n""") 275 276 def WriteHTMLPageHeader(Writer, DataMap): 277 """Write out HTML page header.""" 278 279 # Collect column indices containing counter and structure data to disable 280 # sorting and searching. In addition, set up a list to exclude counter and 281 # primary structure columns from column visibility pulldown along with 282 # any other columns... 283 # 284 if OptionsInfo["CounterCol"]: 285 StrColIndicesList = ["0", "1"] 286 ColVisibilityExcludeColIndicesList = ["0", "1"] 287 ColIndexOffset = 2 288 FreezeLeftColumns = "2" 289 else: 290 StrColIndicesList = ["0"] 291 ColVisibilityExcludeColIndicesList = ["0"] 292 ColIndexOffset = 1 293 FreezeLeftColumns = "1" 294 295 if OptionsInfo["ShowMolName"]: 296 ColIndexOffset += 1 297 298 MaxColVisColCount = OptionsInfo["ColVisibilityCtrlMax"] 299 MaxDataColVisColCount = MaxColVisColCount - len(ColVisibilityExcludeColIndicesList) 300 MaxDataColVisColCount = MaxColVisColCount 301 302 DataColVisibilityExclude = False 303 ColCount = len(DataMap["DataLabels"]) 304 if OptionsInfo["ColVisibility"]: 305 if ColCount > MaxDataColVisColCount: 306 DataColVisibilityExclude = True 307 MiscUtil.PrintWarning("The number of data columns, %d, is more than %d. Only first %d data columns will be available in column visibility pulldown." % (ColCount, MaxColVisColCount, MaxColVisColCount)) 308 309 DisplayButtons = False 310 if OptionsInfo["ColVisibility"]: 311 if ColCount > 0 or OptionsInfo["ShowMolName"]: 312 DisplayButtons = True 313 314 FreezeCols = False 315 if (OptionsInfo["FreezeCols"] and OptionsInfo["ScrollX"]): 316 FreezeCols = True 317 318 for Index, DataLabel in enumerate(DataMap["DataLabels"]): 319 if DataMap["StructureDataMap"][DataLabel]: 320 StrColIndex = Index + ColIndexOffset 321 StrColIndicesList.append("%s" % StrColIndex) 322 323 if OptionsInfo["ColVisibility"]: 324 if Index >= MaxDataColVisColCount: 325 ColIndex = Index + ColIndexOffset 326 ColVisibilityExcludeColIndicesList.append("%s" %ColIndex) 327 328 StrColIndices = MiscUtil.JoinWords(StrColIndicesList, ", ") 329 ColVisibilityExcludeColIndices = MiscUtil.JoinWords(ColVisibilityExcludeColIndicesList, ", ") 330 331 Paging = "true" if OptionsInfo["Paging"] else "false" 332 PageLength = "%d" % OptionsInfo["PageLength"] 333 PagingType = "\"%s\"" % OptionsInfo["PagingType"] 334 335 ScrollX = "true" if OptionsInfo["ScrollX"] else "false" 336 337 ScrollY = "" 338 if OptionsInfo["ScrollY"]: 339 if re.search("vh$", OptionsInfo["ScrollYSize"]): 340 ScrollY = "\"%s\"" % OptionsInfo["ScrollYSize"] 341 else: 342 ScrollY = "%s" % OptionsInfo["ScrollYSize"] 343 344 RegexSearch = "true" if OptionsInfo["RegexSearch"] else "false" 345 346 # Start HTML header... 347 Title = "Molecules and data table" if OptionsInfo["Header"] is None else OptionsInfo["Header"] 348 349 Writer.write("""\ 350 <!doctype html> 351 <html lang="en"> 352 <head> 353 <title>%s</title> 354 <meta charset="utf-8"> 355 <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> 356 <link rel="stylesheet" type="text/css" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css"> 357 <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.16/css/dataTables.bootstrap4.min.css"> 358 359 """ % (Title)) 360 361 if (FreezeCols): 362 Writer.write("""\ 363 <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/fixedcolumns/3.2.4/css/fixedColumns.bootstrap4.min.css"> 364 """) 365 366 if (OptionsInfo["KeysNavigation"]): 367 Writer.write("""\ 368 <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/keytable/2.3.2/css/keyTable.bootstrap4.min.css"> 369 """) 370 371 Writer.write("""\ 372 373 <script type="text/javascript" language="javascript" src="https://code.jquery.com/jquery-1.12.4.js"></script> 374 <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.16/js/jquery.dataTables.min.js"></script> 375 <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/1.10.16/js/dataTables.bootstrap4.min.js"></script> 376 377 """) 378 379 if DisplayButtons: 380 Writer.write("""\ 381 <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.5.1/js/dataTables.buttons.min.js"></script> 382 <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.5.1/js/buttons.bootstrap4.min.js"></script> 383 <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/buttons/1.5.1/js/buttons.colVis.min.js"></script> 384 385 """) 386 387 if (FreezeCols): 388 Writer.write("""\ 389 <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/fixedcolumns/3.2.4/js/dataTables.fixedColumns.min.js"></script> 390 """) 391 392 if (OptionsInfo["KeysNavigation"]): 393 Writer.write("""\ 394 <script type="text/javascript" language="javascript" src="https://cdn.datatables.net/keytable/2.3.2/js/dataTables.keyTable.min.js"></script> 395 """) 396 397 # Intialize table using Bootstrap, DataTables and JQuery frameworks... 398 Writer.write("""\ 399 400 <script type="text/javascript" class="init"> 401 402 $(document).ready(function() { 403 var MolsAndDataTable = $('#MolsAndDataTable').DataTable( { 404 "columnDefs": [ 405 { 406 "orderable": false, 407 "searchable": false, 408 "targets": [%s] 409 }, 410 """ % (StrColIndices)) 411 412 if OptionsInfo["ColVisibility"]: 413 Writer.write("""\ 414 { 415 "className": "noColVisCtrl", 416 "targets": [%s] 417 } 418 """ % (ColVisibilityExcludeColIndices)) 419 420 Writer.write("""\ 421 ], 422 """) 423 424 # Setup column visibility control pulldown by excluding counter column 425 # and primary structure column from the list... 426 # 427 if OptionsInfo["ColVisibility"]: 428 # Set up dom for button display... 429 if OptionsInfo["Paging"]: 430 Writer.write("""\ 431 "dom": "<'row'<'col'l><'col'B><'col'f>>" + 432 "<'row'<'col-sm-12'tr>>" + 433 "<'row'<'col-sm-5'i><'col-sm-7'p>>", 434 """) 435 else: 436 Writer.write("""\ 437 "dom": "<'row'<'col-sm-6'B><'col-sm-6'f>>" + 438 "<'row'<'col-sm-12'tr>>" + 439 "<'row'<'col-sm-5'i><'col-sm-7'p>>", 440 """) 441 # Set up buttons... 442 Writer.write("""\ 443 "buttons": [ 444 { 445 "extend": "colvis", 446 "text": "Column visibility", 447 "className": "btn btn-outline-light text-dark", 448 "columns": ":not(.noColVisCtrl)", 449 """) 450 if not DataColVisibilityExclude: 451 Writer.write("""\ 452 "prefixButtons": [ "colvisRestore" ], 453 """) 454 455 Writer.write("""\ 456 "columnText": function ( dt, colIndex, colLabel ) { 457 return (colIndex + 1) + ": " + colLabel; 458 }, 459 } 460 ], 461 """) 462 463 # Write out rest of the variables for DataTables... 464 if FreezeCols: 465 Writer.write("""\ 466 "fixedColumns": { 467 "leftColumns": %s 468 }, 469 """ % (FreezeLeftColumns)) 470 471 if (OptionsInfo["KeysNavigation"]): 472 Writer.write("""\ 473 "keys": true, 474 """) 475 476 Writer.write("""\ 477 "pageLength": %s, 478 "lengthMenu": [ [10, 15, 25, 50, 100, 500, 1000, -1], [10, 15, 25, 50, 100, 500, 1000, "All"] ], 479 "paging": %s, 480 "pagingType": %s, 481 "scrollX": %s, 482 "scrollY": %s, 483 "scrollCollapse": true, 484 "order": [], 485 "search" : {"regex" : %s}, 486 } ); 487 """ % (PageLength, Paging, PagingType, ScrollX, ScrollY, RegexSearch)) 488 489 if OptionsInfo["CounterCol"]: 490 Writer.write("""\ 491 MolsAndDataTable.on( 'order.dt search.dt', function () { 492 MolsAndDataTable.column(0, {search:'applied', order:'applied'}).nodes().each( function (cell, rowIndex) { 493 cell.innerHTML = rowIndex + 1; 494 } ); 495 } ).draw(); 496 """) 497 498 # End of Javacscript code... 499 Writer.write("""\ 500 } ); 501 502 </script> 503 """) 504 505 # Finish up HTML header... 506 Writer.write("""\ 507 508 </head> 509 <body> 510 <div class="container-fluid"> 511 <br/> 512 """) 513 514 def WriteHTMLPageEnd(Writer): 515 """Write out HTML page end.""" 516 517 Writer.write("""\ 518 </div> 519 </body> 520 </html> 521 """) 522 523 def WriteHTMLPageTitle(Writer): 524 """Write out HTML page title.""" 525 526 if OptionsInfo["Header"] is None: 527 return 528 529 Writer.write(""" <%s class="text-center">%s</%s>\n""" % (OptionsInfo["HeaderStyle"], OptionsInfo["Header"], OptionsInfo["HeaderStyle"])) 530 531 def WriteHTMLPageFooter(Writer): 532 """Write out HTML page footer.""" 533 534 if OptionsInfo["Footer"] is None: 535 return 536 537 Writer.write(""" <br/>\n <p class="%s">%s</p>\n""" % (OptionsInfo["FooterClass"], OptionsInfo["Footer"])) 538 539 def WriteHTMLTableHeader(Writer): 540 """Write out HTML table header.""" 541 542 if OptionsInfo["TableStyle"] is None: 543 Writer.write("""\n <table id="MolsAndDataTable" cellspacing="0" width="100%">\n""") 544 else: 545 Writer.write(""" <table id="MolsAndDataTable" class="%s" cellspacing="0" width="100%s">\n""" % (OptionsInfo["TableStyle"], "%")) 546 547 def WriteHTMLTableEnd(Writer): 548 """Write out HTML table end.""" 549 550 Writer.write(""" </table>\n\n""") 551 552 def RetrieveMoleculesAndData(): 553 """Retrieve molecules and data from input file.""" 554 555 MiscUtil.PrintInfo("\nReading file %s..." % OptionsInfo["Infile"]) 556 557 if MiscUtil.CheckFileExt(OptionsInfo["Infile"] ,"smi csv tsv txt"): 558 # Check for the presence of SMILES column name in title line... 559 Infile = open(OptionsInfo["Infile"], "r") 560 if Infile is None: 561 MiscUtil.PrintError("Couldn't open file %s..." % OptionsInfo["Infile"]) 562 Line = Infile.readline() 563 Infile.close() 564 565 if not re.search("SMILES", Line, re.I): 566 MiscUtil.PrintError("The input file, %s, must contain a title line containing a column name with SMILES in its name." % OptionsInfo["Infile"]) 567 568 if MiscUtil.CheckFileExt(OptionsInfo["Infile"],"sdf sd smi"): 569 ValidMols, MolCount, ValidMolCount = RDKitUtil.ReadAndValidateMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"]) 570 else: 571 ValidMols, MolCount, ValidMolCount = RetrieveMoleculesFromTextFile(OptionsInfo["Infile"]) 572 573 MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) 574 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 575 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) 576 577 return ValidMols 578 579 def RetrieveMoleculesFromTextFile(Infile): 580 """Retrieve molecules from a CSV/TSV text file.""" 581 582 # Read and parse text lines... 583 Delimiter = "," if MiscUtil.CheckFileExt(Infile ,"csv") else "\t" 584 QuoteChar = '"' 585 IgnoreHeaderLine = False 586 TextLinesWords = MiscUtil.GetTextLinesWords(Infile, Delimiter, QuoteChar, IgnoreHeaderLine) 587 588 # Process column names... 589 ColNames = TextLinesWords[0] 590 ColCount = len(ColNames) 591 592 MolColIndex = None 593 MolDataColIndices = [] 594 595 FirstSMILES = True 596 for ColIndex in range(0, ColCount): 597 if re.search("SMILES", ColNames[ColIndex], re.I) and FirstSMILES: 598 MolColIndex = ColIndex 599 FirstSMILES = False 600 continue 601 602 MolDataColIndices.append(ColIndex) 603 604 if MolColIndex is None: 605 MiscUtil.PrintError("The input file, %s, must contain a title line containing a column name with SMILES in its name." % Infile) 606 607 ValidMols = [] 608 MolCount = 0 609 610 Sanitize = OptionsInfo["InfileParams"]["Sanitize"] 611 612 # Process data lines... 613 for LineIndex in range(1, len(TextLinesWords)): 614 MolCount += 1 615 LineWords = TextLinesWords[LineIndex] 616 if len(LineWords) != ColCount: 617 MiscUtil.PrintWarning("Ignoring text line number %d: Number of columns, %d, must match number of columns, %d, in title line.\nLine: %s" % (MolCount, len(LineWords), ColCount, Delimiter.join(LineWords))) 618 continue 619 620 # Process molecule column... 621 MolSMILES = LineWords[MolColIndex] 622 Mol = Chem.MolFromSmiles(MolSMILES, sanitize = Sanitize) 623 if Mol is None: 624 MiscUtil.PrintWarning("Ignoring text line number %d: SMILES parsing failed\nLine: %s" % (MolCount, Delimiter.join(LineWords))) 625 continue 626 627 # Process molecule data columns... 628 for ColIndex in MolDataColIndices: 629 Name = ColNames[ColIndex] 630 Value = LineWords[ColIndex] 631 Mol.SetProp(Name, Value) 632 633 ValidMols.append(Mol) 634 635 ValidMolCount = len(ValidMols) 636 637 return (ValidMols, MolCount, ValidMolCount) 638 639 def IdentifyStructureAndNumericalData(ValidMols): 640 """Identify structure and alphanumerical data.""" 641 642 DataMap = {} 643 DataMap["DataLabels"] = [] 644 DataMap["DataLabelsMap"] = {} 645 DataMap["CanonicalDataLabelsMap"] = {} 646 647 DataMap["StructureDataMap"] = {} 648 DataMap["SMILESDataMap"] = {} 649 650 # Retrieve all possible data labels... 651 if MiscUtil.CheckFileExt(OptionsInfo["Infile"] ,"smi csv tsv txt"): 652 # First molecule contains all possible data fields... 653 Mol = ValidMols[0] 654 ProcessMolDataLabels(ValidMols[0], DataMap) 655 else: 656 # Go over all molecules to identify unique data labels... 657 MiscUtil.PrintInfo("\nRetrieving unique data labels for data in file %s..." % OptionsInfo["Infile"]) 658 for Mol in ValidMols: 659 ProcessMolDataLabels(Mol, DataMap) 660 661 return DataMap 662 663 def ProcessMolDataLabels(Mol, DataMap): 664 """Process data label to identify and track its type.""" 665 666 for DataLabel in Mol.GetPropNames(includePrivate = False, includeComputed = False): 667 if DataLabel in DataMap["DataLabelsMap"]: 668 continue 669 670 # Track labels... 671 DataMap["DataLabels"].append(DataLabel) 672 DataMap["DataLabelsMap"][DataLabel] = DataLabel 673 DataMap["CanonicalDataLabelsMap"][DataLabel.lower()] = DataLabel 674 675 DataMap["StructureDataMap"][DataLabel] = False 676 DataMap["SMILESDataMap"][DataLabel] = False 677 678 if re.search("SMILES", DataLabel, re.I): 679 DataMap["StructureDataMap"][DataLabel] = True 680 DataMap["SMILESDataMap"][DataLabel] = True 681 682 def ValidateShowMolNameOption(DataMap): 683 """Validate show molecule name option.""" 684 685 if not OptionsInfo["ShowMolName"]: 686 return 687 688 if not MiscUtil.CheckFileExt(OptionsInfo["Infile"],"sdf sd smi"): 689 OptionsInfo["ShowMolName"] = False 690 return 691 692 CanonicalDataLabel = OptionsInfo["ShowMolNameDataLabel"].lower() 693 if CanonicalDataLabel in DataMap["CanonicalDataLabelsMap"]: 694 OptionsInfo["ShowMolName"] = False 695 if not OptionsInfo["ShowMolNameAuto"]: 696 MiscUtil.PrintWarning("Ignoring \"--showMolName\" option: Data label \"Name\" corresponding to molecule name is already present in input file.") 697 698 def ValidateSpecifiedDataLabels(DataMap): 699 """Validate data labels used to specify highlighting data.""" 700 701 ValidateSpecifiedDataLabelsForHighlightSMARTS(DataMap) 702 703 ValidateSpecifiedDataLabelsForHighlightValues(DataMap) 704 ValidateSpecifiedDataLabelsForHighlightRanges(DataMap) 705 ValidateSpecifiedDataLabelsForHighlightClasses(DataMap) 706 707 def ValidateSpecifiedDataLabelsForHighlightSMARTS(DataMap): 708 """Validate data labels used to specify highlighting SMARTS option.""" 709 710 if OptionsInfo["HighlightSMARTSAllMode"]: 711 return 712 713 for DataLabel in OptionsInfo["HighlightSMARTSDataLabels"]: 714 if re.match("^Structure$", DataLabel, re.I): 715 continue 716 717 CanonicalDataLabel = DataLabel.lower() 718 if not CanonicalDataLabel in DataMap["CanonicalDataLabelsMap"]: 719 MiscUtil.PrintError("The data label specified, %s, using option \"--highlightSMARTS\" doesn't exist in input file." % DataLabel) 720 721 Label = DataMap["CanonicalDataLabelsMap"][CanonicalDataLabel] 722 if not DataMap["StructureDataMap"][Label]: 723 MiscUtil.PrintError("The data label specified, %s, using option \"--highlightSMARTS\" doesn't correspond to structure data: Valid structure data labels: SMILES in data label." % DataLabel) 724 725 def ValidateSpecifiedDataLabelsForHighlightValues(DataMap): 726 """Validate data labels used to specify highlighting values option.""" 727 728 ValidateDataLabels("--highlightValues", DataMap, OptionsInfo["HighlightValuesLabels"]) 729 730 def ValidateSpecifiedDataLabelsForHighlightRanges(DataMap): 731 """Validate data labels used to specify highlighting ranges option.""" 732 733 ValidateDataLabels("--highlightRanges", DataMap, OptionsInfo["HighlightRangesLabels"]) 734 735 def ValidateSpecifiedDataLabelsForHighlightClasses(DataMap): 736 """Validate data labels used to specify highlighting classes option.""" 737 738 if OptionsInfo["HighlightClassesRules"] is None: 739 return 740 741 ValidDataLabelsList = [] 742 NotValidDataLabelsList = [] 743 for Label in OptionsInfo["HighlightClassesLabels"]: 744 ValidCanonicalLabel = None 745 746 for LabelSynonym in OptionsInfo["HighlightClassesSynonymsMap"][Label]: 747 CanonicalLabel = LabelSynonym.lower() 748 749 # Is this label already in use... 750 if CanonicalLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]: 751 MiscUtil.PrintInfo("") 752 MiscUtil.PrintWarning("The data label, %s, for class, %s , in option \"--highlightValuesClasses\" has already been used in \"--highlightValues\" option. It'll be ignored during highlighting." % (LabelSynonym, OptionsInfo["HighlightClasses"])) 753 continue 754 755 if CanonicalLabel in OptionsInfo["HighlightRangesCanonicalLabelsMap"]: 756 MiscUtil.PrintInfo("") 757 MiscUtil.PrintWarning("The data label, %s, for class, %s , in option \"--highlightValuesClasses\" has already been used in \"--highlightValuesRanges\" option. It'll be ignored during highlighting." % (LabelSynonym, OptionsInfo["HighlightClasses"])) 758 continue 759 760 # Is this label present in input file... 761 if CanonicalLabel in DataMap["CanonicalDataLabelsMap"]: 762 ValidCanonicalLabel = CanonicalLabel 763 break 764 765 if ValidCanonicalLabel is None: 766 MiscUtil.PrintWarning("The data label or its synonyms - %s - for class, %s , in option \"--highlightValuesClasses\" either don't exist in input file or have already been used for highlighting in option \"--highlightValuesClasses\" or \"--highlightValuesRanges\". It'll be ignored during highlighting." % (MiscUtil.JoinWords(OptionsInfo["HighlightClassesSynonymsMap"][Label], ", "), OptionsInfo["HighlightClasses"])) 767 NotValidDataLabelsList.append(Label) 768 continue 769 770 # Track label... 771 OptionsInfo["HighlightClassesCanonicalLabelsMap"][ValidCanonicalLabel] = Label 772 ValidDataLabelsList.append(DataMap["CanonicalDataLabelsMap"][ValidCanonicalLabel]) 773 774 ValidDataLabelsCount = len(ValidDataLabelsList) 775 NotValidDataLabelsCount = len(NotValidDataLabelsList) 776 DataLabelsCount = len(OptionsInfo["HighlightClassesLabels"]) 777 778 if ValidDataLabelsCount == 0: 779 MiscUtil.PrintInfo("") 780 MiscUtil.PrintWarning("The data labels and their synonyms for class, %s , in option \"--highlightValuesClasses\" either don't exists in input file or have already been used for highlighting in option \"--highlightValuesClasses\" or \"--highlightValuesRanges\". No class highlighting will be performed. Missing data labels: %s" % (OptionsInfo["HighlightClasses"], MiscUtil.JoinWords(OptionsInfo["HighlightClassesLabels"], ", "))) 781 elif ValidDataLabelsCount < DataLabelsCount: 782 MiscUtil.PrintInfo("") 783 MiscUtil.PrintWarning("The class, %s, based highlighting specified using \"--highlightValuesClasses\" option will be performed using only, %d, out of, %d, data labels: %s\nThe rest of the data label(s) - %s - either don't exist in the input file or have aready been used for highlighting in option \"--highlightValuesClasses\" or \"--highlightValuesRanges\"." % (OptionsInfo["HighlightClasses"], ValidDataLabelsCount, DataLabelsCount, MiscUtil.JoinWords(ValidDataLabelsList, ", "), MiscUtil.JoinWords(NotValidDataLabelsList, ", ") )) 784 785 def ValidateDataLabels(OptionName, DataMap, DataLabels): 786 """Validate data labels.""" 787 788 for DataLabel in DataLabels: 789 if re.match("^Structure$", DataLabel, re.I): 790 MiscUtil.PrintError("The data label specified, %s, using option \"-%s\" must not correspond to structure data. Structure label is not allowed." % (DataLabel, OptionName)) 791 792 CanonicalDataLabel = DataLabel.lower() 793 if not CanonicalDataLabel in DataMap["CanonicalDataLabelsMap"]: 794 MiscUtil.PrintError("The data label specified, %s, using option \"%s\" doesn't exist in input file." % (DataLabel, OptionName)) 795 796 Label = DataMap["CanonicalDataLabelsMap"][CanonicalDataLabel] 797 if DataMap["StructureDataMap"][Label]: 798 MiscUtil.PrintError("The data label specified, %s, using option \"%s\" must not correspond to structure data: Valid structure data labels contain \"SMILES\" in their name.." % (DataLabel, OptionName)) 799 800 def SetupMolInLineSVGImageTag(Mol, HightlightAtomList): 801 """Setup a inline SVG image tag for molecule.""" 802 803 SVGText = RDKitUtil.GetInlineSVGForMolecule(Mol, OptionsInfo["MolImageWidth"], OptionsInfo["MolImageHeight"], AtomListToHighlight = HightlightAtomList, Base64Encoded = OptionsInfo["MolImageEncoded"]) 804 805 if OptionsInfo["MolImageEncoded"]: 806 SVGInlineImageTag = "img src=\"data:image/svg+xml;base64,\n%s\"" % SVGText 807 else: 808 SVGInlineImageTag = "img src=\"data:image/svg+xml;charset=UTF-8,\n%s\"" % SVGText 809 810 return SVGInlineImageTag 811 812 def SetupAtomListToHighlight(Mol, DataLabel): 813 """Set up atom list to highlight using specified SMARTS patterns.""" 814 815 HighlightAtomList = None 816 if OptionsInfo["HighlightSMARTS"] is None: 817 return HighlightAtomList 818 819 if OptionsInfo["HighlightSMARTSAllMode"]: 820 PatternMol = OptionsInfo["HighlightSMARTSPatternMol"] 821 else: 822 CanonicalDataLabel = DataLabel.lower() 823 if not CanonicalDataLabel in OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"]: 824 return HighlightAtomList 825 826 Label = OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"][CanonicalDataLabel] 827 PatternMol = OptionsInfo["HighlightSMARTSPatternMolsMap"][Label] 828 829 # Get matched atom lists and flatten it... 830 MatchedAtomsLists = Mol.GetSubstructMatches(PatternMol) 831 MatchedAtoms = [ Atom for AtomsList in MatchedAtomsLists for Atom in AtomsList] 832 833 if len(MatchedAtoms): 834 HighlightAtomList = MatchedAtoms 835 836 return HighlightAtomList 837 838 def GetAlphanumeircValueHighlightBackgroundColor(DataLabel, DataValue, DataMap): 839 """Get background highlight color for a value.""" 840 841 BackgroundColor = None 842 BackgroundColorType = None 843 844 CanonicalDataLabel =DataLabel.lower() 845 if CanonicalDataLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]: 846 return GetBackgroundColorUsingHighlightValuesMode(DataLabel, DataValue, DataMap) 847 elif CanonicalDataLabel in OptionsInfo["HighlightRangesCanonicalLabelsMap"]: 848 return GetBackgroundColorUsingHighlightRangesMode(DataLabel, DataValue, DataMap) 849 elif CanonicalDataLabel in OptionsInfo["HighlightClassesCanonicalLabelsMap"]: 850 return GetBackgroundColorUsingHighlightClassesMode(DataLabel, DataValue, DataMap) 851 elif OptionsInfo["HighlightClassesRandom"]: 852 return GetBackgroundColorUsingRandomMode(DataLabel, DataValue, DataMap) 853 854 return (BackgroundColor, BackgroundColorType) 855 856 def GetBackgroundColorUsingHighlightValuesMode(DataLabel, DataValue, DataMap): 857 """Get background highlight color for a value.""" 858 859 BackgroundColor = None 860 BackgroundColorType = None 861 862 CanonicalDataLabel =DataLabel.lower() 863 if not CanonicalDataLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]: 864 return (BackgroundColor, BackgroundColorType) 865 866 Label = OptionsInfo["HighlightValuesCanonicalLabelsMap"][CanonicalDataLabel] 867 DataType = OptionsInfo["HighlightValuesTypesMap"][Label] 868 Criterion = OptionsInfo["HighlightValuesCriteriaMap"][Label] 869 CriterionValue = OptionsInfo["HighlightValuesCriteriaValuesMap"][Label] 870 871 return GetBackgroundColorForHighlightingValue(DataLabel, DataValue, DataType, Criterion, CriterionValue) 872 873 def GetBackgroundColorUsingHighlightClassesMode(DataLabel, DataValue, DataMap): 874 """Get background highlight color for a value.""" 875 876 BackgroundColor = None 877 BackgroundColorType = None 878 879 CanonicalDataLabel =DataLabel.lower() 880 if not CanonicalDataLabel in OptionsInfo["HighlightClassesCanonicalLabelsMap"]: 881 return (BackgroundColor, BackgroundColorType) 882 883 Label = OptionsInfo["HighlightClassesCanonicalLabelsMap"][CanonicalDataLabel] 884 DataType = OptionsInfo["HighlightClassesTypesMap"][Label] 885 Criterion = OptionsInfo["HighlightClassesCriteriaMap"][Label] 886 CriterionValue = OptionsInfo["HighlightClassesCriteriaValuesMap"][Label] 887 888 return GetBackgroundColorForHighlightingValue(DataLabel, DataValue, DataType, Criterion, CriterionValue) 889 890 def GetBackgroundColorForHighlightingValue(DataLabel, DataValue, DataType, Criterion, CriterionValue): 891 """Get background color for highlighting a value.""" 892 893 ValueOkay = False 894 if re.match("^numeric$", DataType, re.I): 895 if not MiscUtil.IsNumber(DataValue): 896 MiscUtil.PrintWarning("Ignoring data value, %s, for data label, %s, during numeric highlighting: It must be a number" % (DataValue, DataLabel)) 897 return (BackgroundColor, BackgroundColorType) 898 899 DataValue = float(DataValue) 900 if re.match("^gt$", Criterion, re.I): 901 ValueOkay = True if DataValue > CriterionValue else False 902 elif re.match("^lt$", Criterion, re.I): 903 ValueOkay = True if DataValue < CriterionValue else False 904 elif re.match("^ge$", Criterion, re.I): 905 ValueOkay = True if DataValue >= CriterionValue else False 906 elif re.match("^le$", Criterion, re.I): 907 ValueOkay = True if DataValue <= CriterionValue else False 908 elif re.match("^eq$", Criterion, re.I): 909 ValueOkay = True if DataValue == CriterionValue else False 910 elif re.match("^ne$", Criterion, re.I): 911 ValueOkay = True if DataValue != CriterionValue else False 912 else: 913 return (BackgroundColor, BackgroundColorType) 914 elif re.match("^text$", DataType, re.I): 915 DataValue = "%s" % DataValue 916 if re.match("^gt$", Criterion, re.I): 917 ValueOkay = True if DataValue > CriterionValue else False 918 elif re.match("^lt$", Criterion, re.I): 919 ValueOkay = True if DataValue < CriterionValue else False 920 elif re.match("^ge$", Criterion, re.I): 921 ValueOkay = True if DataValue >= CriterionValue else False 922 elif re.match("^le$", Criterion, re.I): 923 ValueOkay = True if DataValue <= CriterionValue else False 924 elif re.match("^eq$", Criterion, re.I): 925 ValueOkay = True if DataValue == CriterionValue else False 926 elif re.match("^ne$", Criterion, re.I): 927 ValueOkay = True if DataValue != CriterionValue else False 928 else: 929 return (BackgroundColor, BackgroundColorType) 930 elif re.match("^regex$", DataType, re.I): 931 DataValue = "%s" % DataValue 932 if re.match("^eq$", Criterion, re.I): 933 ValueOkay = True if re.search("%s" % CriterionValue, DataValue, re.I) else False 934 elif re.match("^ne$", Criterion, re.I): 935 ValueOkay = False if re.search("%s" % CriterionValue, DataValue, re.I) else True 936 else: 937 return (BackgroundColor, BackgroundColorType) 938 939 BackgroundColor = OptionsInfo["HighlightColorsList"][0] if ValueOkay else OptionsInfo["HighlightColorsList"][1] 940 BackgroundColorType = OptionsInfo["HighlightColorsType"] 941 942 return (BackgroundColor, BackgroundColorType) 943 944 def GetBackgroundColorUsingHighlightRangesMode(DataLabel, DataValue, DataMap): 945 """Get background highlight color for value range.""" 946 947 BackgroundColor = None 948 BackgroundColorType = None 949 950 CanonicalDataLabel =DataLabel.lower() 951 if not CanonicalDataLabel in OptionsInfo["HighlightRangesCanonicalLabelsMap"]: 952 return (BackgroundColor, BackgroundColorType) 953 954 Label = OptionsInfo["HighlightRangesCanonicalLabelsMap"][CanonicalDataLabel] 955 DataType = OptionsInfo["HighlightRangesTypesMap"][Label] 956 CriterionLower = OptionsInfo["HighlightRangesCriteriaLowerMap"][Label] 957 CriterionLowerValue = OptionsInfo["HighlightRangesCriteriaLowerValuesMap"][Label] 958 CriterionUpper = OptionsInfo["HighlightRangesCriteriaUpperMap"][Label] 959 CriterionUpperValue = OptionsInfo["HighlightRangesCriteriaUpperValuesMap"][Label] 960 961 if re.match("^numeric$", DataType, re.I): 962 if not MiscUtil.IsNumber(DataValue): 963 MiscUtil.PrintWarning("Ignoring data value, %s, for data label, %s, during numeric highlighting: It must be a number" % (DataValue, DataLabel)) 964 return (BackgroundColor, BackgroundColorType) 965 966 DataValue = float(DataValue) 967 ColorIndex = 1 968 969 if DataValue < CriterionLowerValue and re.match("^lt$", CriterionLower, re.I): 970 ColorIndex = 0 971 elif DataValue <= CriterionLowerValue and re.match("^le$", CriterionLower, re.I): 972 ColorIndex = 0 973 elif DataValue > CriterionUpperValue and re.match("^gt$", CriterionUpper, re.I): 974 ColorIndex = 2 975 elif DataValue >= CriterionUpperValue and re.match("^ge$", CriterionUpper, re.I): 976 ColorIndex = 2 977 elif re.match("^text$", DataType, re.I): 978 DataValue = "%s" % DataValue 979 ColorIndex = 1 980 981 if DataValue < CriterionLowerValue and re.match("^lt$", CriterionLower, re.I): 982 ColorIndex = 0 983 elif DataValue <= CriterionLowerValue and re.match("^le$", CriterionLower, re.I): 984 ColorIndex = 0 985 elif DataValue > CriterionUpperValue and re.match("^gt$", CriterionUpper, re.I): 986 ColorIndex = 2 987 elif DataValue >= CriterionUpperValue and re.match("^ge$", CriterionUpper, re.I): 988 ColorIndex = 2 989 else: 990 return (BackgroundColor, BackgroundColorType) 991 992 BackgroundColor = OptionsInfo["HighlightColorsRangesList"][ColorIndex] 993 BackgroundColorType = OptionsInfo["HighlightColorsRangesType"] 994 995 return (BackgroundColor, BackgroundColorType) 996 997 def GetBackgroundColorUsingRandomMode(DataLabel, DataValue, DataMap): 998 """Get a random background highlight color for a value.""" 999 1000 BackgroundColor = random.choice(OptionsInfo["HighlightColorsRandomList"]) 1001 BackgroundColorType = OptionsInfo["HighlightColorsRandomType"] 1002 1003 return (BackgroundColor, BackgroundColorType) 1004 1005 def SetupAlphanumericTableDataValue(Writer, DataValue, BackgroundColor, BackgroundColorType): 1006 """Set up alphanumeric data value for a table cell.""" 1007 1008 WrappedDataValue = "%s" % DataValue 1009 1010 # Look for new lines... 1011 Delim = "<br/>" 1012 if re.search("(\r\n|\r|\n)", WrappedDataValue): 1013 WrappedDataValue = re.sub("(\r\n|\r|\n)", "<br/>", DataValue) 1014 1015 # Wrap text... 1016 if OptionsInfo["WrapText"] and len(WrappedDataValue) > OptionsInfo["WrapTextWidth"]: 1017 WrappedDataLines = [] 1018 for DataLine in WrappedDataValue.split("<br/>"): 1019 WrappedDataLine = MiscUtil.WrapText(DataLine, "<br/>", OptionsInfo["WrapTextWidth"]) 1020 WrappedDataLines.append(WrappedDataLine) 1021 1022 WrappedDataValue = "<br/>".join(WrappedDataLines) 1023 1024 # Highlight value... 1025 if BackgroundColor is not None: 1026 ColorTypeTag = GetBackgroundColorTypeTagForTableValue(BackgroundColor, BackgroundColorType) 1027 Writer.write(""" <td %s = "%s">%s</td>\n""" % (ColorTypeTag, BackgroundColor, WrappedDataValue)) 1028 else: 1029 Writer.write(""" <td>%s</td>\n""" % WrappedDataValue) 1030 1031 def GetBackgroundColorTypeTagForTableValue(Color, ColorType): 1032 """Setup color type tage for setting background of a table value.""" 1033 1034 ColorTypeTag = "class" if re.match("^colorclass", ColorType, re.I) else "bgcolor" 1035 1036 return ColorTypeTag 1037 1038 def PerformAlignment(ValidMols): 1039 """Perform alignment to a common template specified by a SMARTS pattern.""" 1040 1041 if OptionsInfo["AlignmentSMARTSPattern"] is None: 1042 return 1043 1044 MiscUtil.PrintInfo("\nPerforming alignment for primary structure data...") 1045 1046 PatternMol = Chem.MolFromSmarts(OptionsInfo["AlignmentSMARTSPattern"]) 1047 AllChem.Compute2DCoords(PatternMol) 1048 1049 MatchedValidMols = [ValidMol for ValidMol in ValidMols if ValidMol.HasSubstructMatch(PatternMol)] 1050 for ValidMol in MatchedValidMols: 1051 AllChem.GenerateDepictionMatching2DStructure(ValidMol, PatternMol) 1052 1053 def ProcessHighlightSMARTSOption(): 1054 """Process highlight SMARTS option.""" 1055 1056 OptionsInfo["HighlightSMARTS"] = None 1057 OptionsInfo["HighlightSMARTSAllMode"] = False 1058 OptionsInfo["HighlightSMARTSPatternMol"] = None 1059 1060 OptionsInfo["HighlightSMARTSDataLabels"] = [] 1061 OptionsInfo["HighlightSMARTSDataLabelsMap"] = {} 1062 1063 OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"] = {} 1064 OptionsInfo["HighlightSMARTSPatternsMap"] = {} 1065 OptionsInfo["HighlightSMARTSPatternMolsMap"] = {} 1066 1067 OptionsInfo["HighlightSMARTSDelim"] = Options["--highlightSMARTSDelim"] 1068 1069 if re.match("^None$", Options["--highlightSMARTS"], re.I): 1070 # Nothing to proecess... 1071 return 1072 1073 HighlightSMARTS = Options["--highlightSMARTS"].strip() 1074 if not HighlightSMARTS: 1075 MiscUtil.PrintError("No valid values specified using \"--highlightSMARTS\" option.") 1076 1077 OptionsInfo["HighlightSMARTS"] = HighlightSMARTS 1078 HighlightSMARTSWords = HighlightSMARTS.split(OptionsInfo["HighlightSMARTSDelim"]) 1079 1080 if len(HighlightSMARTSWords) == 1: 1081 PatternMol = Chem.MolFromSmarts(HighlightSMARTS) 1082 if PatternMol is None: 1083 MiscUtil.PrintError("The value specified, %s, using option \"--highlightSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--highlightSMARTS"]) 1084 OptionsInfo["HighlightSMARTSAllMode"] = True 1085 OptionsInfo["HighlightSMARTSPatternMol"] = PatternMol 1086 return 1087 1088 if len(HighlightSMARTSWords) % 2: 1089 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightSMARTS\" option must be an even number." % (len(HighlightSMARTSWords))) 1090 1091 HighlightSMARTSAllMode = False 1092 1093 for Index in range(0, len(HighlightSMARTSWords), 2): 1094 DataLabel = HighlightSMARTSWords[Index].strip() 1095 SMARTSPattern = HighlightSMARTSWords[Index + 1].strip() 1096 1097 PatternMol = Chem.MolFromSmarts(SMARTSPattern) 1098 if PatternMol is None: 1099 MiscUtil.PrintError("The value specified, %s, using option \"--highlightSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--highlightSMARTS"]) 1100 1101 if DataLabel in OptionsInfo["HighlightSMARTSDataLabelsMap"]: 1102 MiscUtil.PrintError("The datalabel, %s, specified in pair, \"%s, %s\", using option \"--highlightSMARTS\" is not a valid: Multiple occurences of data label" % (DataLabel, DataLabel, SMARTSPattern)) 1103 1104 OptionsInfo["HighlightSMARTSDataLabels"].append(DataLabel) 1105 OptionsInfo["HighlightSMARTSDataLabelsMap"][DataLabel] = DataLabel 1106 OptionsInfo["HighlightSMARTSCanonicalDataLabelsMap"][DataLabel.lower()] = DataLabel 1107 OptionsInfo["HighlightSMARTSPatternsMap"][DataLabel] = SMARTSPattern 1108 OptionsInfo["HighlightSMARTSPatternMolsMap"][DataLabel] = PatternMol 1109 1110 def ProcessHighlightDataOptions(): 1111 """Process highlight values and colors option.""" 1112 1113 ProcessHighlightValuesOption() 1114 ProcessHighlightValuesRangesOption() 1115 ProcessHighlightValuesClassesOption() 1116 1117 ProcessHighlightColorsOption() 1118 ProcessHighlightColorsRangesOption() 1119 ProcessHighlightColorsRandomOption() 1120 1121 def ProcessHighlightValuesOption(): 1122 """Process highlight values option.""" 1123 1124 OptionsInfo["HighlightValues"] = None 1125 OptionsInfo["HighlightValuesLabels"] = [] 1126 1127 OptionsInfo["HighlightValuesLabelsMap"] = {} 1128 OptionsInfo["HighlightValuesCanonicalLabelsMap"] = {} 1129 1130 OptionsInfo["HighlightValuesTypesMap"] = {} 1131 OptionsInfo["HighlightValuesCriteriaMap"] = {} 1132 OptionsInfo["HighlightValuesCriteriaValuesMap"] = {} 1133 1134 HighlightValues = Options["--highlightValues"].strip() 1135 if re.match("^None$", HighlightValues, re.I): 1136 return 1137 1138 OptionsInfo["HighlightValues"] = HighlightValues 1139 HighlightValuesWords = HighlightValues.split(",") 1140 1141 if len(HighlightValuesWords) % 4: 1142 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightValues\" option must be a multiple of 4." % (len(HighlightValuesWords))) 1143 1144 for Index in range(0, len(HighlightValuesWords), 4): 1145 DataLabel = HighlightValuesWords[Index].strip() 1146 DataType = HighlightValuesWords[Index + 1].strip() 1147 DataCriterion = HighlightValuesWords[Index + 2].strip() 1148 DataValue = HighlightValuesWords[Index + 3].strip() 1149 1150 if not re.match("^(numeric|text|regex)$", DataType, re.I): 1151 MiscUtil.PrintError("The data type, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. Supported values: numeric, regex or text." % (DataType, DataLabel, DataType, DataCriterion, DataValue)) 1152 1153 if re.match("^regex$", DataType, re.I): 1154 if not re.match("^(eq|ne)$", DataCriterion, re.I): 1155 MiscUtil.PrintError("The data criterion, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. Supported values: eq or ne" % (DataType, DataLabel, DataType, DataCriterion, DataValue)) 1156 else: 1157 if not re.match("^(gt|lt|ge|le|eq|ne)$", DataCriterion, re.I): 1158 MiscUtil.PrintError("The data criterion, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. Supported values: gt, lt, ge, le, eq, or ne." % (DataType, DataLabel, DataType, DataCriterion, DataValue)) 1159 1160 # Check criterion value... 1161 if re.match("^numeric$", DataType, re.I): 1162 if not MiscUtil.IsNumber(DataValue): 1163 MiscUtil.PrintError("The data value, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid. It must be a number for data type, %s" % (DataType, DataLabel, DataType, DataCriterion, DataValue, DataType)) 1164 DataValue = float(DataValue) 1165 1166 # Track values... 1167 if DataLabel in OptionsInfo["HighlightValuesLabelsMap"]: 1168 MiscUtil.PrintError("The data label, %s, specified in quratet \"%s,%s,%s,%s\", using \"--highlightValues\" option is not valid: Multiple occurences of data label" % (DataLabel, DataLabel, DataType, DataCriterion, DataValue)) 1169 1170 OptionsInfo["HighlightValuesLabels"].append(DataLabel) 1171 OptionsInfo["HighlightValuesLabelsMap"][DataLabel] = DataLabel 1172 OptionsInfo["HighlightValuesCanonicalLabelsMap"][DataLabel.lower()] = DataLabel 1173 1174 OptionsInfo["HighlightValuesTypesMap"][DataLabel] = DataType 1175 OptionsInfo["HighlightValuesCriteriaMap"][DataLabel] = DataCriterion 1176 OptionsInfo["HighlightValuesCriteriaValuesMap"][DataLabel] = DataValue 1177 1178 def ProcessHighlightValuesRangesOption(): 1179 """Process highlight values ranges option.""" 1180 1181 OptionsInfo["HighlightRanges"] = None 1182 OptionsInfo["HighlightRangesLabels"] = [] 1183 1184 OptionsInfo["HighlightRangesLabelsMap"] = {} 1185 OptionsInfo["HighlightRangesCanonicalLabelsMap"] = {} 1186 1187 OptionsInfo["HighlightRangesTypesMap"] = {} 1188 OptionsInfo["HighlightRangesCriteriaLowerMap"] = {} 1189 OptionsInfo["HighlightRangesCriteriaLowerValuesMap"] = {} 1190 OptionsInfo["HighlightRangesCriteriaUpperMap"] = {} 1191 OptionsInfo["HighlightRangesCriteriaUpperValuesMap"] = {} 1192 1193 HighlightRanges = Options["--highlightValuesRanges"].strip() 1194 if re.match("^None$", HighlightRanges, re.I): 1195 return 1196 1197 OptionsInfo["HighlightRanges"] = HighlightRanges 1198 HighlightRangesWords = HighlightRanges.split(",") 1199 1200 if len(HighlightRangesWords) % 6: 1201 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified in sextet \"%s\" using \"--highlightValuesRanges\" option must be a multiple of 6." % (len(HighlightRangesWords), HighlightRanges)) 1202 1203 for Index in range(0, len(HighlightRangesWords), 6): 1204 DataLabel = HighlightRangesWords[Index].strip() 1205 DataType = HighlightRangesWords[Index + 1].strip() 1206 LowerBoundDataCriterion = HighlightRangesWords[Index + 2].strip() 1207 LowerBoundDataValue = HighlightRangesWords[Index + 3].strip() 1208 UpperBoundDataCriterion = HighlightRangesWords[Index + 4].strip() 1209 UpperBoundDataValue = HighlightRangesWords[Index + 5].strip() 1210 1211 SpecifiedSextet = "%s,%s,%s,%s,%s,%s" % (DataLabel, DataType, LowerBoundDataCriterion, LowerBoundDataValue, UpperBoundDataCriterion, UpperBoundDataValue) 1212 1213 CanonicalDataLabel = DataLabel.lower() 1214 if CanonicalDataLabel in OptionsInfo["HighlightValuesCanonicalLabelsMap"]: 1215 MiscUtil.PrintError("The data label specified, %s, using option \"--highlightRanges\" has already been used in \"--highlightValues\" option" % DataLabel) 1216 1217 if not re.match("^(numeric|text)$", DataType, re.I): 1218 MiscUtil.PrintError("The data type, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option is not valid. Supported values: numeric text." % (DataType, SpecifiedSextet)) 1219 1220 if not re.match("^(lt|le)$", LowerBoundDataCriterion, re.I): 1221 MiscUtil.PrintError("The lower bound criterion, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option is not valid. Supported values: lt or le." % (LowerBoundDataCriterion, SpecifiedSextet)) 1222 1223 if not re.match("^(gt|ge)$", UpperBoundDataCriterion, re.I): 1224 MiscUtil.PrintError("The upper bound criterion, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option is not valid. Supported values: gt or ge." % (UpperBoundDataCriterion, SpecifiedSextet)) 1225 1226 if re.match("^numeric$", DataType, re.I): 1227 if not MiscUtil.IsNumber(LowerBoundDataValue): 1228 MiscUtil.PrintError("The lower bound data value, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. It must be a number for \"%s\" data type." % (LowerBoundDataValue, SpecifiedSextet, DataType)) 1229 1230 if not MiscUtil.IsNumber(UpperBoundDataValue): 1231 MiscUtil.PrintError("The upper bound data value, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. It must be a number for \"%s\"data type." % (UpperBoundDataValue, SpecifiedSextet, DataType)) 1232 1233 if float(LowerBoundDataValue) >= float(UpperBoundDataValue): 1234 MiscUtil.PrintError("The lower bound data value, %s, must be less than upper bound value, %s, specified in sextet \"%s\" using \"--highlightValuesRanges\" option." % (LowerBoundDataValue, UpperBoundDataValue, SpecifiedSextet)) 1235 1236 LowerBoundDataValue = float(LowerBoundDataValue) 1237 UpperBoundDataValue = float(UpperBoundDataValue) 1238 else: 1239 if LowerBoundDataValue >= UpperBoundDataValue: 1240 MiscUtil.PrintError("The lower bound data value, %s, must be less than upper bound value, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. It must be a number for data type, %s" % (LowerBoundDataValue, UpperBoundDataValue, SpecifiedSextet, DataType)) 1241 1242 # Track values... 1243 if DataLabel in OptionsInfo["HighlightRangesLabelsMap"]: 1244 MiscUtil.PrintError("The data label, %s, specified in sextet \"%s\", using \"--highlightValuesRanges\" option is not valid. Multiple occurences of data label" % (DataLabel, SpecifiedSextet)) 1245 1246 OptionsInfo["HighlightRangesLabels"].append(DataLabel) 1247 OptionsInfo["HighlightRangesLabelsMap"][DataLabel] = DataLabel 1248 OptionsInfo["HighlightRangesCanonicalLabelsMap"][CanonicalDataLabel] = DataLabel 1249 1250 OptionsInfo["HighlightRangesTypesMap"][DataLabel] = DataType 1251 1252 OptionsInfo["HighlightRangesCriteriaLowerMap"][DataLabel] = LowerBoundDataCriterion 1253 OptionsInfo["HighlightRangesCriteriaLowerValuesMap"][DataLabel] = LowerBoundDataValue 1254 OptionsInfo["HighlightRangesCriteriaUpperMap"][DataLabel] = UpperBoundDataCriterion 1255 OptionsInfo["HighlightRangesCriteriaUpperValuesMap"][DataLabel] = UpperBoundDataValue 1256 1257 def ProcessHighlightValuesClassesOption(): 1258 """Process highlight values classes option.""" 1259 1260 OptionsInfo["HighlightClasses"] = None 1261 OptionsInfo["HighlightClassesRules"] = None 1262 OptionsInfo["HighlightClassesSynonymsMap"] = None 1263 OptionsInfo["HighlightClassesRandom"] = False 1264 1265 OptionsInfo["HighlightClassesLabels"] = [] 1266 OptionsInfo["HighlightClassesLabelsMap"] = {} 1267 OptionsInfo["HighlightClassesCanonicalLabelsMap"] = {} 1268 1269 OptionsInfo["HighlightClassesTypesMap"] = {} 1270 OptionsInfo["HighlightClassesCriteriaMap"] = {} 1271 OptionsInfo["HighlightClassesCriteriaValuesMap"] = {} 1272 1273 HighlightClasses = Options["--highlightValuesClasses"].strip() 1274 if re.match("^None$", HighlightClasses, re.I): 1275 return 1276 1277 OptionsInfo["HighlightClasses"] = HighlightClasses 1278 1279 if re.match("^RuleOf5$", HighlightClasses, re.I): 1280 HighlightClassessRules = "MolecularWeight,numeric,le,500,HydrogenBondDonors,numeric,le,5,HydrogenBondAcceptors,numeric,le,10,LogP,numeric,le,5" 1281 elif re.match("^RuleOf3$", HighlightClasses, re.I): 1282 HighlightClassessRules = "MolecularWeight,numeric,le,300,HydrogenBondDonors,numeric,le,3,HydrogenBondAcceptors,numeric,le,3,LogP,numeric,le,3,RotatableBonds,numeric,le,3,TPSA,numeric,le,60" 1283 elif re.match("^DrugLike$", HighlightClasses, re.I): 1284 HighlightClassessRules = "MolecularWeight,numeric,le,500,HydrogenBondDonors,numeric,le,5,HydrogenBondAcceptors,numeric,le,10,LogP,numeric,le,5,RotatableBonds,numeric,le,10,TPSA,numeric,le,140" 1285 elif re.match("^Random$", HighlightClasses, re.I): 1286 if OptionsInfo["HighlightValues"] is not None: 1287 MiscUtil.PrintError("The value specified, %s, using option \"--highlightValuesClasses\" is not allowed in conjunction with \"--highlightValues\" option." % HighlightClasses) 1288 if OptionsInfo["HighlightRanges"] is not None: 1289 MiscUtil.PrintError("The value specified, %s, using option \"--highlightValuesClasses\" is not allowed in conjunction with \"--highlightRanges\" option ." % HighlightClasses) 1290 1291 OptionsInfo["HighlightClassesRandom"] = True 1292 return 1293 else: 1294 MiscUtil.PrintError("The value specified, %d, using option \"--highlightValuesClasses\" is not supported." % HighlightClasses) 1295 return 1296 1297 OptionsInfo["HighlightClassesRules"] = HighlightClassessRules 1298 1299 # Process rules for highlighting values... 1300 HighlightClassesWords = HighlightClassessRules.split(",") 1301 for Index in range(0, len(HighlightClassesWords), 4): 1302 DataLabel = HighlightClassesWords[Index].strip() 1303 DataType = HighlightClassesWords[Index + 1].strip() 1304 DataCriterion = HighlightClassesWords[Index + 2].strip() 1305 DataValue = HighlightClassesWords[Index + 3].strip() 1306 1307 DataValue = float(DataValue) 1308 1309 if DataLabel in OptionsInfo["HighlightClassesLabelsMap"]: 1310 MiscUtil.PrintWarning("Ignoring duplicate datalabel, %s, specified in highlighting values rule for class, %s, in \"--highlightClassesValue\" option..." % (DataLabel, HighlightClasses)) 1311 continue 1312 1313 OptionsInfo["HighlightClassesLabels"].append(DataLabel) 1314 OptionsInfo["HighlightClassesLabelsMap"][DataLabel] = DataLabel 1315 1316 OptionsInfo["HighlightClassesTypesMap"][DataLabel] = DataType 1317 OptionsInfo["HighlightClassesCriteriaMap"][DataLabel] = DataCriterion 1318 OptionsInfo["HighlightClassesCriteriaValuesMap"][DataLabel] = DataValue 1319 1320 # Set up synonyms for data labels corresponding to physicochemical properties 1321 # calculated by MayaChemTools and RDKit... 1322 OptionsInfo["HighlightClassesSynonymsMap"] = {} 1323 OptionsInfo["HighlightClassesSynonymsMap"]["MolecularWeight"] = ["MolecularWeight", "MolWt"] 1324 OptionsInfo["HighlightClassesSynonymsMap"]["HydrogenBondDonors"] = ["HydrogenBondDonors", "NHOHCount"] 1325 OptionsInfo["HighlightClassesSynonymsMap"]["HydrogenBondAcceptors"] = ["HydrogenBondAcceptors", "NOCount"] 1326 OptionsInfo["HighlightClassesSynonymsMap"]["LogP"] = ["SLogP", "MolLogP"] 1327 OptionsInfo["HighlightClassesSynonymsMap"]["RotatableBonds"] = ["RotatableBonds", "NumRotatableBonds"] 1328 OptionsInfo["HighlightClassesSynonymsMap"]["TPSA"] = ["TPSA", "TPSA"] 1329 1330 def ProcessHighlightColorsOption(): 1331 """Process highlight colors option.""" 1332 1333 OptionsInfo["HighlightColors"] = None 1334 OptionsInfo["HighlightColorsType"] = None 1335 OptionsInfo["HighlightColorsList"] = None 1336 1337 HighlightColors = "colorclass,table-success, table-danger" 1338 if not re.match("^auto$", Options["--highlightColors"], re.I): 1339 HighlightColors = Options["--highlightColors"].strip() 1340 if MiscUtil.IsEmpty(HighlightColors): 1341 MiscUtil.PrintError("The value specified using \"--highlightColors\" is empty.") 1342 1343 OptionsInfo["HighlightColors"] = re.sub(" ", "", HighlightColors) 1344 HighlightColorsList = [Color.lower() for Color in OptionsInfo["HighlightColors"].split(",")] 1345 1346 if len(HighlightColorsList) != 3: 1347 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightColors\" option must be 3." % (len(HighlightColorsList))) 1348 1349 ColorsType, Color1, Color2 = HighlightColorsList 1350 if not re.match("^(colorclass|colorspec)$", ColorsType, re.I): 1351 MiscUtil.PrintError("The color type, %s, specified using \"--highlightColors\" option is not valid. Supported values: colorclass or colorspec." % ColorsType) 1352 1353 ColorsList = [Color1, Color2] 1354 if re.match("^colorclass$", ColorsType, re.I): 1355 CheckOptionTableClassColorValues("--highlightColors", ColorsList) 1356 1357 OptionsInfo["HighlightColorsList"] = ColorsList 1358 OptionsInfo["HighlightColorsType"] = ColorsType 1359 1360 def ProcessHighlightColorsRangesOption(): 1361 """Process highlight colors ranges option.""" 1362 1363 OptionsInfo["HighlightColorsRanges"] = None 1364 OptionsInfo["HighlightColorsRangesType"] = None 1365 OptionsInfo["HighlightColorsRangesList"] = None 1366 1367 HighlightColors = "colorclass,table-success, table-warning, table-danger" 1368 if not re.match("^auto$", Options["--highlightColorsRanges"], re.I): 1369 HighlightColors = Options["--highlightColorsRanges"].strip() 1370 if MiscUtil.IsEmpty(HighlightColors): 1371 MiscUtil.PrintError("The value specified using \"--highlightColorsRanges\" is empty.") 1372 1373 OptionsInfo["HighlightColorsRanges"] = re.sub(" ", "", HighlightColors) 1374 HighlightColorsList = [Color.lower() for Color in OptionsInfo["HighlightColorsRanges"].split(",")] 1375 1376 if len(HighlightColorsList) != 4: 1377 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightColorsRanges\" option must be 4." % (len(HighlightColorsList))) 1378 1379 ColorsType, Color1, Color2, Color3 = HighlightColorsList 1380 if not re.match("^(colorclass|colorspec)$", ColorsType, re.I): 1381 MiscUtil.PrintError("The color type, %s, specified using \"--highlightColorsRanges\" option is not valid. Supported values: colorclass or colorspec." % ColorsType) 1382 1383 ColorsList = [Color1, Color2, Color3] 1384 if re.match("^colorclass$", ColorsType, re.I): 1385 CheckOptionTableClassColorValues("--highlightColorsRanges", ColorsList) 1386 1387 OptionsInfo["HighlightColorsRangesList"] = ColorsList 1388 OptionsInfo["HighlightColorsRangesType"] = ColorsType 1389 1390 def ProcessHighlightColorsRandomOption(): 1391 """Process highlight colors random option.""" 1392 1393 OptionsInfo["HighlightColorsRandom"] = None 1394 OptionsInfo["HighlightColorsRandomType"] = None 1395 OptionsInfo["HighlightColorsRandomList"] = None 1396 1397 HighlightColors = "colorclass,table-primary,table-success,table-danger,table-info,table-warning,table-secondary" 1398 if not re.match("^auto$", Options["--highlightColorsRandom"], re.I): 1399 HighlightColors = Options["--highlightColorsRandom"].strip() 1400 if MiscUtil.IsEmpty(HighlightColors): 1401 MiscUtil.PrintError("The value specified using \"--highlightColorsRandom\" is empty.") 1402 1403 OptionsInfo["HighlightColorsRandom"] = re.sub(" ", "", HighlightColors) 1404 HighlightColorsList = [Color.lower() for Color in OptionsInfo["HighlightColorsRandom"].split(",")] 1405 1406 if len(HighlightColorsList) <= 1: 1407 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--highlightColorsRandom\" option must be > 1." % (len(HighlightColorsList))) 1408 1409 ColorsType = HighlightColorsList[0] 1410 ColorsList = HighlightColorsList[1:] 1411 1412 if not re.match("^(colorclass|colorspec)$", ColorsType, re.I): 1413 MiscUtil.PrintError("The color type, %s, specified using \"--highlightColorsRandim\" option is not valid. Supported values: colorclass or colorspec." % ColorsType) 1414 1415 if re.match("^colorclass$", ColorsType, re.I): 1416 CheckOptionTableClassColorValues("--highlightColorsRandom", ColorsList) 1417 1418 OptionsInfo["HighlightColorsRandomList"] = ColorsList 1419 OptionsInfo["HighlightColorsRandomType"] = ColorsType 1420 1421 def CheckOptionTableClassColorValues(OptionName, ColorsList): 1422 """Check names of table color classes and issue a warning for unknown names.""" 1423 1424 TableClassColors = ["thead-dark", "thead-light", "table-primary", "table-success", "table-danger", "table-info", "table-warning", "table-active", "table-secondary", "table-light", "table-dark", "bg-primary", "bg-success", "bg-danger", "bg-info", "bg-warning", "bg-secondary", "bg-dark", "bg-light"] 1425 1426 for Color in ColorsList: 1427 if not Color in TableClassColors: 1428 MiscUtil.PrintWarning("The color class name, %s, specified using option \"%s\" appears to be a unknown name..." % (Color, OptionName)) 1429 1430 def ProcessOptions(): 1431 """Process and validate command line arguments and options.""" 1432 1433 MiscUtil.PrintInfo("Processing options...") 1434 1435 # Validate options... 1436 ValidateOptions() 1437 1438 OptionsInfo["Infile"] = Options["--infile"] 1439 OptionsInfo["Outfile"] = Options["--outfile"] 1440 OptionsInfo["Overwrite"] = Options["--overwrite"] 1441 1442 # No need for any RDKit specific --outfileParams.... 1443 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], OptionsInfo["Infile"]) 1444 1445 AlignmentSMARTSPattern = None 1446 if not re.match("^None$", Options["--alignmentSMARTS"], re.I): 1447 AlignmentSMARTSPattern = Options["--alignmentSMARTS"] 1448 OptionsInfo["AlignmentSMARTSPattern"] = AlignmentSMARTSPattern 1449 1450 Compute2DCoords = True 1451 if re.match("^no$", Options["--compute2DCoords"], re.I): 1452 Compute2DCoords = False 1453 OptionsInfo["Compute2DCoords"] = Compute2DCoords 1454 1455 CounterCol = True 1456 if re.match("^no$", Options["--counterCol"], re.I): 1457 CounterCol = False 1458 OptionsInfo["CounterCol"] = CounterCol 1459 1460 ColVisibility = True 1461 if re.match("^no$", Options["--colVisibility"], re.I): 1462 ColVisibility = False 1463 OptionsInfo["ColVisibility"] = ColVisibility 1464 1465 OptionsInfo["ColVisibilityCtrlMax"] = int(Options["--colVisibilityCtrlMax"]) 1466 1467 Footer = None 1468 if not re.match("^None$", Options["--footer"], re.I): 1469 Footer = Options["--footer"] 1470 OptionsInfo["Footer"] = Footer 1471 1472 FooterClass = Options["--footerClass"].strip() 1473 if MiscUtil.IsEmpty(FooterClass): 1474 MiscUtil.PrintError("The value specified using option \"--footerClass\" is empty.") 1475 OptionsInfo["FooterClass"] = FooterClass 1476 1477 FreezeCols = True 1478 if re.match("^no$", Options["--freezeCols"], re.I): 1479 FreezeCols = False 1480 OptionsInfo["FreezeCols"] = FreezeCols 1481 1482 Header = None 1483 if not re.match("^None$", Options["--header"], re.I): 1484 Header = Options["--header"] 1485 OptionsInfo["Header"] = Header 1486 1487 HeaderStyle = Options["--headerStyle"].strip() 1488 if MiscUtil.IsEmpty(HeaderStyle): 1489 MiscUtil.PrintError("The value specified using option \"--headerStyle\" is empty.") 1490 OptionsInfo["HeaderStyle"] = HeaderStyle 1491 1492 ProcessHighlightSMARTSOption() 1493 ProcessHighlightDataOptions() 1494 1495 OptionsInfo["KeysNavigation"] = True 1496 if re.match("^no$", Options["--keysNavigation"], re.I): 1497 OptionsInfo["KeysNavigation"] = False 1498 1499 SizeValues = Options["--molImageSize"].split(",") 1500 OptionsInfo["MolImageWidth"] = int(SizeValues[0]) 1501 OptionsInfo["MolImageHeight"] = int(SizeValues[1]) 1502 1503 OptionsInfo["MolImageEncoded"] = True 1504 if re.match("^no$", Options["--molImageEncoded"], re.I): 1505 OptionsInfo["MolImageEncoded"] = False 1506 1507 OptionsInfo["Paging"] = True 1508 if re.match("^no$", Options["--paging"], re.I): 1509 OptionsInfo["Paging"] = False 1510 1511 PagingType = Options["--pagingType"] 1512 if not re.match("^(numbers|simple|simple_numbers|full|full_numbers|simple_number)$", Options["--pagingType"], re.I): 1513 MiscUtil.PrintWarning("The paging type name, %s, specified using option \"--pagingType\" appears to be a unknown type..." % (PagingType)) 1514 OptionsInfo["PagingType"] = PagingType.lower() 1515 1516 OptionsInfo["PageLength"] = int(Options["--pageLength"]) 1517 1518 OptionsInfo["RegexSearch"] = True 1519 if re.match("^no$", Options["--regexSearch"], re.I): 1520 OptionsInfo["RegexSearch"] = False 1521 1522 OptionsInfo["ShowMolName"] = True 1523 OptionsInfo["ShowMolNameDataLabel"] = "Name" 1524 if re.match("^no$", Options["--showMolName"], re.I): 1525 OptionsInfo["ShowMolName"] = False 1526 1527 OptionsInfo["ShowMolNameAuto"] = True if re.match("^auto$", Options["--showMolName"], re.I) else False 1528 1529 OptionsInfo["ScrollX"] = True 1530 if re.match("^no$", Options["--scrollX"], re.I): 1531 OptionsInfo["ScrollX"] = False 1532 1533 OptionsInfo["ScrollY"] = True 1534 if re.match("^no$", Options["--scrollY"], re.I): 1535 OptionsInfo["ScrollY"] = False 1536 1537 OptionsInfo["ScrollYSize"] = Options["--scrollYSize"] 1538 if re.match("vh$", Options["--scrollYSize"], re.I): 1539 ScrollYSize = int(re.sub("vh$", "", Options["--scrollYSize"])) 1540 if ScrollYSize <= 0: 1541 MiscUtil.PrintError("The value specified, %s, for option \"--scrollYSize\" is not valid. Supported value: > 0 followed by \"vh\"" % Options["--scrollYSize"]) 1542 1543 TableStyle = None 1544 if not re.match("^None$", Options["--tableStyle"], re.I): 1545 if re.match("^All$", Options["--tableStyle"], re.I): 1546 TableStyle = "table table-striped table-bordered table-hover table-dark" 1547 else: 1548 TableStyle = re.sub(" ", "", Options["--tableStyle"]) 1549 for Style in [Style for Style in TableStyle.split(",")]: 1550 if not re.match("^(table|table-striped|table-bordered|table-hover|table-dark|table-sm)$", Style, re.I): 1551 MiscUtil.PrintWarning("The table style name, %s, specified using option \"-t, --tableStyle\" appears to be a unknown style..." % (Style)) 1552 TableStyle = re.sub(",", " ", TableStyle.lower()) 1553 OptionsInfo["TableStyle"] = TableStyle 1554 1555 TableHeaderStyle = None 1556 if not re.match("^None$", Options["--tableHeaderStyle"], re.I): 1557 TableHeaderStyle = Options["--tableHeaderStyle"] 1558 TableHeaderStyle = TableHeaderStyle.lower() 1559 CheckOptionTableClassColorValues("--tableHeaderStyle", [TableHeaderStyle]) 1560 OptionsInfo["TableHeaderStyle"] = TableHeaderStyle 1561 1562 OptionsInfo["TableFooter"] = True 1563 if re.match("^no$", Options["--tableFooter"], re.I): 1564 OptionsInfo["TableFooter"] = False 1565 1566 OptionsInfo["WrapText"] = True 1567 if re.match("^no$", Options["--wrapText"], re.I): 1568 OptionsInfo["WrapText"] = False 1569 1570 OptionsInfo["WrapTextWidth"] = int(Options["--wrapTextWidth"]) 1571 1572 def RetrieveOptions(): 1573 """Retrieve command line arguments and options.""" 1574 1575 # Get options... 1576 global Options 1577 Options = docopt(_docoptUsage_) 1578 1579 # Set current working directory to the specified directory... 1580 WorkingDir = Options["--workingdir"] 1581 if WorkingDir: 1582 os.chdir(WorkingDir) 1583 1584 # Handle examples option... 1585 if "--examples" in Options and Options["--examples"]: 1586 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 1587 sys.exit(0) 1588 1589 def ValidateOptions(): 1590 """Validate option values.""" 1591 1592 if not re.match("^None$", Options["--alignmentSMARTS"], re.I): 1593 PatternMol = Chem.MolFromSmarts(Options["--alignmentSMARTS"]) 1594 if PatternMol is None: 1595 MiscUtil.PrintError("The value specified, %s, using option \"--alignmentSMARTS\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--alignmentSMARTS"]) 1596 1597 MiscUtil.ValidateOptionTextValue("-c, --compute2DCoords", Options["--compute2DCoords"], "yes no auto") 1598 1599 MiscUtil.ValidateOptionTextValue("--counterCol", Options["--counterCol"], "yes no") 1600 MiscUtil.ValidateOptionTextValue("--colVisibility", Options["--colVisibility"], "yes no") 1601 MiscUtil.ValidateOptionIntegerValue("--colVisibilityCtrlMax", Options["--colVisibilityCtrlMax"], {">": 0}) 1602 1603 MiscUtil.ValidateOptionTextValue("--freezeCols", Options["--freezeCols"], "yes no") 1604 MiscUtil.ValidateOptionTextValue("--highlightValuesClasses", Options["--highlightValuesClasses"], "RuleOf5 RuleOf3 DrugLike Random None") 1605 1606 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 1607 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt") 1608 1609 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "html") 1610 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 1611 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 1612 1613 MiscUtil.ValidateOptionTextValue("-k, --keysNavigation", Options["--keysNavigation"], "yes no") 1614 1615 MiscUtil.ValidateOptionNumberValues("-m, --molImageSize", Options["--molImageSize"], 2, ",", "integer", {">": 0}) 1616 MiscUtil.ValidateOptionTextValue("--molImageEncoded", Options["--molImageEncoded"], "yes no") 1617 1618 MiscUtil.ValidateOptionTextValue("-p, --paging", Options["--paging"], "yes no") 1619 MiscUtil.ValidateOptionIntegerValue("--pageLength", Options["--pageLength"], {">": 0}) 1620 MiscUtil.ValidateOptionTextValue("-r, --regexSearch", Options["--regexSearch"], "yes no") 1621 1622 MiscUtil.ValidateOptionTextValue("--showMolName", Options["--showMolName"], "yes no auto") 1623 1624 MiscUtil.ValidateOptionTextValue("--scrollX", Options["--scrollX"], "yes no") 1625 MiscUtil.ValidateOptionTextValue("--scrollY", Options["--scrollY"], "yes no") 1626 if not re.search("vh$", Options["--scrollYSize"], re.I): 1627 MiscUtil.ValidateOptionIntegerValue("--scrollYSize", Options["--scrollYSize"], {">": 0}) 1628 1629 MiscUtil.ValidateOptionTextValue("--tableFooter", Options["--tableFooter"], "yes no") 1630 1631 MiscUtil.ValidateOptionTextValue("--wrapText", Options["--wrapText"], "yes no") 1632 MiscUtil.ValidateOptionIntegerValue("--wrapTextWidth", Options["--wrapTextWidth"], {">": 0}) 1633 1634 # Setup a usage string for docopt... 1635 _docoptUsage_ = """ 1636 RDKitDrawMoleculesAndDataTable.py - Generate a HTML data table 1637 1638 Usage: 1639 RDKitDrawMoleculesAndDataTable.py [--alignmentSMARTS <SMARTS>] 1640 [--compute2DCoords <yes or no>] [--counterCol <yes or no>] 1641 [--colVisibility <yes or no>] [--colVisibilityCtrlMax <number>] [--footer <text>] 1642 [--footerClass <text>] [--freezeCols <yes or no>] [--header <text>] 1643 [--headerStyle <text>] [--highlightSMARTS <SMARTS,...>] [--highlightSMARTSDelim <text>] 1644 [--highlightValues <datalabel,datatype,criterion,value,...>] 1645 [--highlightValuesRanges <datalabel,datatype,criterion1,vaue1,criterion2,value2...>] 1646 [--highlightValuesClasses <RuleOf5,RuleOf3,...>] 1647 [--highlightColors <colortype,color1,color2>] 1648 [--highlightColorsRanges <colortype,color1,color2,color3>] 1649 [--highlightColorsRandom <colottype,color1,color2,...>] 1650 [--infileParams <Name,Value,...>] [--keysNavigation <yes or no>] 1651 [--molImageSize <width,height>] [--molImageEncoded <yes or no> ] [--overwrite] 1652 [--paging <yes or no>] [--pagingType <numbers,simple, ...>] [--pageLength <number>] 1653 [--regexSearch <yes or no>] [--showMolName <yes or no>] 1654 [--scrollX <yes or no>] [--scrollY <yes or no>] [--scrollYSize <number>] 1655 [--tableStyle <table,table-striped,...>] [--tableFooter <yes or no>] 1656 [--tableHeaderStyle <thead-dark,thead-light,...>] [--wrapText <yes or no>] 1657 [--wrapTextWidth <number>] [-w <dir>] -i <infile> -o <outfile> 1658 RDKitDrawMoleculesAndDataTable.py -h | --help | -e | --examples 1659 1660 Description: 1661 Generate an interactive HTML table with columns corresponding to molecules 1662 and available alphanumerical data in an input file. The drawing of molecules are 1663 embedded in the columns as in line SVG images. 1664 1665 The interactive HTML table may contain multiple columns with drawing of 1666 molecules. These columns are automatically generated for each data field in SD 1667 file or a column name in SMILES and CSV/TSV file containing SMILES 1668 string in their names. The first molecular drawing column in the HTML table 1669 represents primary molecular structure data available in an input file. It 1670 corresponds to MOL block is SD file or a first column containing SMILES string 1671 in its name in SMILES and CSV/TSV files. 1672 1673 The interactive table requires internet access for viewing in a browser and 1674 employs the following frameworks: JQuery, Bootstrap, and DataTable. It provides 1675 the following functionality: sorting by columns, page length control, page 1676 navigation, searching data with regular expressions, and horizontal/vertical 1677 scrolling, row highlighting during hovering, a counter column, freezing of primary 1678 structure and counter columns, and column visibility control. 1679 1680 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi), 1681 CSV/TSV (.csv, .tsv, .txt) 1682 1683 The supported output file format is HTML (.html). 1684 1685 Options: 1686 -a, --alignmentSMARTS <SMARTS> [default: none] 1687 SMARTS pattern for aligning molecules to a common template. This option is 1688 only used for primary molecular data in SD, SMILES and CSV/TSV files. It is 1689 ignored for all other molecular coordinates corresponding to data fields in SD 1690 file or columns in SMILES and CSV/TSV files containing SMILES string in their 1691 names. 1692 -c, --compute2DCoords <yes or no> [default: yes] 1693 Compute 2D coordinates of molecules before drawing. Default: yes for SMILES 1694 strings in SMILES, CSV/TSV, and SD file data fields. In addition, 2D coordinated are 1695 always calculated for molecules corresponding to data fields in SD file or columns 1696 in SMILES and CSV/TSV files containing SMILES string in their names. 1697 --counterCol <yes or no> [default: yes] 1698 Show a counter column as the first column in the table. It contains the position 1699 for each row in the table. 1700 --colVisibility <yes or no> [default: yes] 1701 Show a dropdown button to toggle visibility of columns in the table. The counter 1702 and primary structure columns are excluded from the list. 1703 --colVisibilityCtrlMax <number> [default: 25] 1704 Maximum number of columns to show in column visibility dropdown button. The 1705 rest of the data columns are not listed in the dropdown and are shown in the table. 1706 A word to the wise: The display of too many columns appear to hang interactive 1707 Javascript framework for Bootstrap and DataTables. 1708 --freezeCols <yes or no> [default: yes] 1709 Lock counter and primary structure columns in place during horizontal scrolling. 1710 --footer <text> [default: none] 1711 Footer text to insert at the bottom of the HTML page after the table. 1712 --footerClass <text> [default: small text-center text-muted] 1713 Footer class style to use with <p> tag. 1714 -e, --examples 1715 Print examples. 1716 -h, --help 1717 Print this help message. 1718 --header <text> [default: none] 1719 Header text to insert at the top of the HTML page before the table. 1720 --headerStyle <text> [default: h5] 1721 Header style to use. Possible values: h1 to h6. 1722 --highlightSMARTS <SMARTS,...> [default: none] 1723 SMARTS pattern for highlighting atoms and bonds in molecules. All matched 1724 substructures are highlighted. 1725 1726 The SMARTS string is used to highlight atoms and bonds in drawing of 1727 molecules present in a HTML table across multiple columns. These columns 1728 correspond to data field labels in SD file or a column name in SMILES and 1729 CSV/TSV file containing SMILES string in their names. The first molecular 1730 drawing column in HTML table corresponds to primary molecular structure 1731 data available in an input file. It is identified by a label 'Structure' across 1732 all input formats. 1733 1734 A single SMARTS string is used to highlight a common substructure across 1735 all columns containing drawing of molecules in HTML table. 1736 1737 Format: 1738 1739 SMARTS 1740 Structure,SMARTS1,DataLabel,SMARTS2,... 1741 Structure,SMARTS1,Collabel,SMARTS2,... 1742 1743 Example: 1744 1745 c1ccccc1 1746 Structure,c1ccccc1,SMILESR1,c1ccccc1,SMILESR2,c1ccccc1 1747 1748 --highlightSMARTSDelim <text> [default: ,] 1749 Delimiter for parsing SMARTS patterns specified using '--highlightSMARTS' 1750 option. Default: ',' comma character. Possible value: Any arbitrary text or 1751 a valid character. You may use arbitrary text as a delimiter to handle 1752 presence of special characters such as comma, semicolon, tilde etc. in 1753 SMARTS patterns. 1754 --highlightValues <datalabel,datatype,criterion,value,...> [default: none] 1755 Highlighting methodology to use for highlighting alphanumerical data 1756 corresponding to data fields in SD file or column names in SMILES and 1757 CSV/TSV text files. 1758 1759 Input text contains these quartets: DataLabel, DataType, Criterion, Value. 1760 Possible datatype values: numeric, text. Possible criterion values for numeric 1761 and text: gt, lt, ge, le. 1762 1763 The 'datalabel' corresponds to either data field label in SD file or column name 1764 in SMILES and CSV/TSV text files. 1765 1766 Examples: 1767 1768 MolecularWeight,numeric,le,500 1769 MolecularWeight,numeric,le,450,SLogP,numeric,le,5 1770 Name,text,eq,Aspirin 1771 Name,regex,eq,acid|amine 1772 1773 --highlightValuesRanges <datalabel,datatype,...> [default: none] 1774 Highlighting methodology to use for highlighting ranges of alphanumerical 1775 data corresponding to data fields in SD file or column names in SMILES and 1776 CSV/TSV text files. 1777 1778 Input text contains these sextets: DataLabel, DataType, CriterionLowerBound, 1779 LowerBoundValue, CriterionUpperBound, UpperBoundValue. 1780 1781 Possible datatype values: numeric or text. Possible criterion values: Lower 1782 bound value - lt, le; Upper bound value: gt, ge. 1783 1784 The 'datalabel' corresponds to either data field label in SD file or column name 1785 in SMILES and CSV/TSV text files. 1786 1787 Examples: 1788 1789 MolecularWeight,numeric,lt,450,gt,1000 1790 MolecularWeight,numeric,lt,450,gt,1000,SLogP,numeric,lt,0,gt,5 1791 1792 --highlightValuesClasses <RuleOf5,RuleOf3,...> [default: none] 1793 Highlighting methodology to use for highlighting ranges of numerical data 1794 data corresponding to specific set of data fields in SD file or column names in 1795 SMILES and CSV/TSV text files. Possible values: RuleOf5, RuleOf3, DrugLike, 1796 Random. 1797 1798 The following value classes are supported: RuleOf5, RuleOf3, LeadLike, DrugLike. 1799 LeadLike is equivalent to RuleOf3. 1800 1801 Each supported class encompasses a specific set of data labels along with 1802 appropriate criteria to compare and highlight column values, except for 1803 'Random' class. The data labels in these classes are automatically associated 1804 with appropriate data fields in SD file or column names in SMILES and CSV/TSV 1805 text files. 1806 1807 No data labels are associated with 'Random' class. It is used to highlight 1808 available alphanumeric data by randomly selecting a highlight color from the 1809 list of colors specified using '--highlightColorsRandom' option. The 'Random' 1810 class value is not allowed in conjunction with '--highlightValues' or 1811 '--highlightValuesRanges'. 1812 1813 The rules to highlight values for the supported classes are as follows. 1814 1815 RuleOf5 [ Ref 91 ]: 1816 1817 MolecularWeight,numeric,le,500 (MolecularWeight <= 500) 1818 HydrogenBondDonors,numeric,le,5 (HydrogenBondDonors <= 5) 1819 HydrogenBondAcceptors,numeric,le,10 (HydrogenBondAcceptors <= 10) 1820 LogP,numeric,le,5 (LogP <= 5) 1821 1822 RuleOf3 or LeadLike [ Ref 92 ]: 1823 1824 MolecularWeight,numeric,le,300 (MolecularWeight <= 300) 1825 HydrogenBondDonors,numeric,le,3 (HydrogenBondDonors <= 3) 1826 HydrogenBondAcceptors,numeric,le,3 (HydrogenBondAcceptors <= 3) 1827 LogP,numeric,le,3 (LogP <= 3) 1828 RotatableBonds,numeric,le,3 (RotatableBonds <= 3) 1829 TPSA,numeric,le,60 (TPSA <= 60) 1830 1831 DrugLike: 1832 1833 MolecularWeight,numeric,le,500 (MolecularWeight <= 500) 1834 HydrogenBondDonors,numeric,le,5 (HydrogenBondDonors <= 5) 1835 HydrogenBondAcceptors,numeric,le,10 (HydrogenBondAcceptors <= 10) 1836 LogP,numeric,le,5 (LogP <= 5) 1837 RotatableBonds,numeric,le,10 (RotatableBonds <= 10) 1838 TPSA,numeric,le,140 (TPSA <= 140) 1839 1840 The following synonyms are automatically detected for data labels used 1841 by MayaChemTools and RDKit packages during the calculation of 1842 physicochemical properties. 1843 1844 MayaChemTools: MolecularWeight, HydrogenBondDonors, HydrogenBondAcceptors, 1845 SLogP, RotatableBonds, TPSA. 1846 1847 RDKit: MolWt, NHOHCount, NOCount, MolLogP, NumRotatableBonds, TPSA 1848 1849 --highlightColors <colortype,color1,color2> [default: auto] 1850 Background colors used to highlight column values based on criterion 1851 specified by '--highlightValues' and '--highlightColorsClasses' option. Default 1852 value: colorclass,table-success, table-danger. 1853 1854 The first color is used to highlight column values that satisfy the specified 1855 criterion for the column. The second color highlights the rest of the values 1856 in the column. 1857 1858 Possible values for colortype: colorclass or colorspec. 1859 1860 Any valid bootstrap contextual color class is supported for 'colorclass' 1861 color type. For example: table-primary (Blue), table-success (Green), 1862 table-danger (Red), table-info (Light blue), table-warning (Orange), 1863 table-secondary (Grey), table-light (Light grey), and table-dark (Dark grey). 1864 1865 The following bootstrap color classes may also used: bg-primary bg-success, 1866 bg-danger bg-info, bg-warning, bg-secondary. 1867 1868 Any valid color name or hexadecimal color specification is supported for 1869 'colorspec' color type: For example: red, green, blue, #ff000, #00ff00, #0000ff. 1870 --highlightColorsRanges <colortype,color1,color2,color3> [default: auto] 1871 Background colors used to highlight column values using criteria specified 1872 by '--highlightValuesRanges' option. Default value: colorclass, table-success, 1873 table-warning, table-danger. 1874 1875 The first and third color are used to highlight column values lower and higher 1876 than the specified values for the lower and upper bound. The middle color highlights 1877 the rest of the values in the column. 1878 1879 The supported color type and values are explained in the section for '--highlightColors'. 1880 --highlightColorsRandom <colortype,color1,color2,...> [default: auto] 1881 Background color list to use for randomly selecting a color to highlight 1882 column values during 'Random" value of '--highlightValuesClasses' option. 1883 1884 Default value: colorclass,table-primary,table-success,table-danger,table-info, 1885 table-warning,table-secondary. 1886 1887 The supported color type and values are explained in the section for '--highlightColors'. 1888 -i, --infile <infile> 1889 Input file name. 1890 --infileParams <Name,Value,...> [default: auto] 1891 A comma delimited list of parameter name and value pairs for reading 1892 molecules from files. The supported parameter names for different file 1893 formats, along with their default values, are shown below: 1894 1895 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 1896 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 1897 sanitize,yes 1898 1899 Possible values for smilesDelimiter: space, comma or tab. 1900 -k, --keysNavigation <yes or no> [default: yes] 1901 Provide Excel like keyboard cell navigation for the table. 1902 -m, --molImageSize <width,height> [default: 200,150] 1903 Image size of a molecule in pixels. 1904 --molImageEncoded <yes or no> [default: yes] 1905 Base64 encode SVG image of a molecule for inline embedding in a HTML page. 1906 The inline SVG image may fail to display in browsers without encoding. 1907 -o, --outfile <outfile> 1908 Output file name. 1909 --overwrite 1910 Overwrite existing files. 1911 -p, --paging <yes or no> [default: yes] 1912 Provide page navigation for browsing data in the table. 1913 --pagingType <numbers, simple, ...> [default: full_numbers] 1914 Type of page navigation. Possible values: numbers, simple, simple_numbers, 1915 full, full_numbers, or first_last_numbers. 1916 1917 numbers - Page number buttons only 1918 simple - 'Previous' and 'Next' buttons only 1919 simple_numbers - 'Previous' and 'Next' buttons, plus page numbers 1920 full - 'First', 'Previous', 'Next' and 'Last' buttons 1921 full_numbers - 'First', 'Previous', 'Next' and 'Last' buttons, plus 1922 page numbers 1923 first_last_numbers - 'First' and 'Last' buttons, plus page numbers 1924 1925 --pageLength <number> [default: 15] 1926 Number of rows to show per page. 1927 -r, --regexSearch <yes or no> [default: yes] 1928 Allow regular expression search through alphanumerical data in the table. 1929 -s, --showMolName <yes or no> [default: auto] 1930 Show molecule names in a column next to the column corresponding to primary 1931 structure data in SD and SMILES file. The default value is yes for SD and SMILES file. 1932 This option is ignored for CSV/TSV text files. 1933 --scrollX <yes or no> [default: yes] 1934 Provide horizontal scroll bar in the table as needed. 1935 --scrollY <yes or no> [default: yes] 1936 Provide vertical scroll bar in the table as needed. 1937 --scrollYSize <number> [default: 75vh] 1938 Maximum height of table viewport either in pixels or percentage of the browser 1939 window height before providing a vertical scroll bar. Default: 75% of the height of 1940 browser window. 1941 -t, --tableStyle <table,table-striped,...> [default: table,table-hover,table-sm] 1942 Style of table. Possible values: table, table-striped, table-bordered, 1943 table-hover, table-dark, table-sm, none, or All. Default: 'table,table-hover'. A 1944 comma delimited list of any valid Bootstrap table styles is also supported. 1945 --tableFooter <yes or no> [default: yes] 1946 Show column headers at the end of the table. 1947 --tableHeaderStyle <thead-dark,thead-light,...> [default: thead-dark] 1948 Style of table header. Possible values: thead-dark, thead-light, or none. 1949 The names of the following contextual color classes are also supported: 1950 table-primary (Blue), table-success (Green), table-danger (Red), table-info 1951 (Light blue), table-warning (Orange), table-active (Grey), table-light (Light 1952 grey), and table-dark (Dark grey). 1953 -w, --workingdir <dir> 1954 Location of working directory which defaults to the current directory. 1955 --wrapText <yes or no> [default: yes] 1956 Wrap alphanumeric text using <br/> delimiter for display in a HTML table. 1957 --wrapTextWidth <number> [default: 40] 1958 Maximum width in characters before wraping alphanumeric text for display 1959 in a HTML table. 1960 1961 Examples: 1962 To generate a HTML table containing structure and alphanumeric data for 1963 molecules in a SD file along with all the bells and whistles to interact with 1964 the table, type: 1965 1966 % RDKitDrawMoleculesAndDataTable.py -i Sample.sdf -o SampleOut.html 1967 1968 To generate a HTML table containing structure and alphanumeric data for 1969 molecules in a SMILES file along with all the bells and whistles to interact 1970 with the table, type: 1971 1972 % RDKitDrawMoleculesAndDataTable.py -i Sample.smi -o SampleOut.html 1973 1974 To generate a HTML table containing multiple structure columns for molecules 1975 in a CSV file along with all the bells and whistles to interact with the table, type: 1976 1977 % RDKitDrawMoleculesAndDataTable.py -i SampleSeriesRGroupsD3R.csv 1978 -o SampleSeriesRGroupsD3ROut.html 1979 1980 To generate a HTML table containing structure and alphanumeric data for 1981 molecules in a SD file along without any bells and whistles to interact with 1982 the table, type: 1983 1984 % RDKitDrawMoleculesAndDataTable.py --colVisibility no --freezeCols no 1985 --keysNavigation no --paging no --regexSearch no --scrollX no 1986 --scrollY no -i Sample.sdf -o SampleOut.html 1987 1988 To generate a HTML table containing structure and alphanumeric data for 1989 molecules in a SD file along with highlighting molecular weight values 1990 using a specified criterion, type: 1991 1992 % RDKitDrawMoleculesAndDataTable.py --highlightValues 1993 "MolecularWeight,numeric,le,500" -i Sample.sdf -o SampleOut.html 1994 1995 To generate a HTML table containing structure and alphanumeric data for 1996 molecules in a SD file along with highlighting range of molecular weight values 1997 using a specified criterion, type: 1998 1999 % RDKitDrawMoleculesAndDataTable.py --highlightValuesRanges 2000 "MolecularWeight,numeric,lt,400,gt,500" -i Sample.sdf -o SampleOut.html 2001 2002 To generate a HTML table containing structure and alphanumeric data for 2003 molecules in a SD file along with highlighting molecular weight values and 2004 ranges of SLogP values using a specified criterion and color schemes, type: 2005 2006 % RDKitDrawMoleculesAndDataTable.py --highlightValues 2007 "MolecularWeight,numeric,le,500" --highlightValuesRanges 2008 "SLogP,numeric,lt,0,gt,5" --highlightColors "colorclass,table-success, 2009 table-danger" --highlightColorsRanges "colorclass,table-danger, 2010 table-success,table-warning" -i Sample.sdf -o SampleOut.html 2011 2012 To generate a HTML table containing structure and alphanumeric data for 2013 molecules in a SD file along with highlighting RuleOf5 physicochemical 2014 properties using a pre-defined set of criteria, type: 2015 2016 % RDKitDrawMoleculesAndDataTable.py --highlightValuesClasses RuleOf5 2017 -i Sample.sdf -o SampleOut.html 2018 2019 To generate a HTML table containing structure and alphanumeric data for 2020 molecules in a SD file along with all the bells and whistles to interact 2021 with the table and highlight a specific SMARTS pattern in molecules, type: 2022 2023 % RDKitDrawMoleculesAndDataTable.py --highlightSMARTS "c1ccccc1" 2024 -i Sample.sdf -o SampleOut.html 2025 2026 To generate a HTML table containing structure and alphanumeric data for 2027 molecules in a SD file along with highlighting of values using random colors 2028 from a default list of colors, type: 2029 2030 % RDKitDrawMoleculesAndDataTable.py --highlightValuesClasses Random 2031 -i Sample.sdf -o SampleOut.html 2032 2033 To generate a HTML table containing structure and alphanumeric data for 2034 molecules in a SD file along with highlighting of values using random colors 2035 from a specified list of colors, type: 2036 2037 % RDKitDrawMoleculesAndDataTable.py --highlightValuesClasses Random 2038 --highlightColorsRandom "colorspec,Lavendar,MediumPurple,SkyBlue, 2039 CornflowerBlue,LightGreen,MediumSeaGreen,Orange,Coral,Khaki,Gold, 2040 Salmon,LightPink,Aquamarine,MediumTurquoise,LightGray" 2041 -i Sample.sdf -o SampleOut.html 2042 2043 To generate a HTML table containing structure and alphanumeric data for 2044 molecules in a SMILES file specific columns, type: 2045 2046 % RDKitDrawMoleculesAndDataTable.py --infileParams "smilesDelimiter, 2047 comma, smilesColumn,1,smilesNameColumn,2" 2048 -i SampleSMILES.csv -o SampleOut.html 2049 2050 Author: 2051 Manish Sud(msud@san.rr.com) 2052 2053 See also: 2054 RDKitConvertFileFormat.py, RDKitDrawMolecules.py, RDKitRemoveDuplicateMolecules.py, 2055 RDKitSearchFunctionalGroups.py, RDKitSearchSMARTS.py 2056 2057 Copyright: 2058 Copyright (C) 2024 Manish Sud. All rights reserved. 2059 2060 The functionality available in this script is implemented using RDKit, an 2061 open source toolkit for cheminformatics developed by Greg Landrum. 2062 2063 This file is part of MayaChemTools. 2064 2065 MayaChemTools is free software; you can redistribute it and/or modify it under 2066 the terms of the GNU Lesser General Public License as published by the Free 2067 Software Foundation; either version 3 of the License, or (at your option) any 2068 later version. 2069 2070 """ 2071 2072 if __name__ == "__main__": 2073 main()