MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitEnumerateCompoundLibrary.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2019 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 
  37 # RDKit imports...
  38 try:
  39     from rdkit import rdBase
  40     from rdkit import Chem
  41     from rdkit.Chem import AllChem
  42     from rdkit.Chem import FunctionalGroups
  43 except ImportError as ErrMsg:
  44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  46     sys.exit(1)
  47 
  48 # MayaChemTools imports...
  49 try:
  50     from docopt import docopt
  51     import MiscUtil
  52     import RDKitUtil
  53 except ImportError as ErrMsg:
  54     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  55     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  56     sys.exit(1)
  57 
  58 ScriptName = os.path.basename(sys.argv[0])
  59 Options = {}
  60 OptionsInfo = {}
  61 
  62 RxnNamesMap = {}
  63 
  64 def main():
  65     """Start execution of the script"""
  66     
  67     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76     
  77     # Perform actions required by the script...
  78     PerformChemicalLibraryEnumeration()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def PerformChemicalLibraryEnumeration():
  84     """Retrieve functional groups information and perform search."""
  85 
  86     ProcessReactionNamesInfo()
  87     PerformEnumeration()
  88 
  89 def PerformEnumeration():
  90     """Enumerate virutal compound library."""
  91 
  92     ReactantFilesList = OptionsInfo["ReactantFilesList"]
  93     Outfile = OptionsInfo["Outfile"]
  94 
  95     RxnByNameMode = OptionsInfo["RxnByNameMode"]
  96     if RxnByNameMode:
  97         RxnSMIRKSPattern = OptionsInfo["RxnNameSMIRKS"]
  98     else:
  99         RxnSMIRKSPattern = OptionsInfo["SpecifiedSMIRKS"]
 100 
 101     # Set up a reaction and match number of reactants in rxn SMIRKS against number of
 102     # reactant files...
 103     Rxn = AllChem.ReactionFromSmarts(RxnSMIRKSPattern)
 104     RxnReactantsCount = Rxn.GetNumReactantTemplates()
 105 
 106     ReactantFilesList = OptionsInfo["ReactantFilesList"]
 107     ReactantFilesCount = len(ReactantFilesList)
 108     if  ReactantFilesCount != RxnReactantsCount:
 109         MiscUtil.PrintError("The number of specified reactant files, %d, must match number of rectants, %d, in reaction SMIRKS" % (ReactantFilesCount, RxnReactantsCount))
 110         
 111     # Retrieve reactant molecules...
 112     ReactantsMolsList = RetrieveReactantsMolecules()
 113     
 114     # Set up  a molecule writer...
 115     Writer = None
 116     Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
 117     if Writer is None:
 118         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
 119 
 120     MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile)
 121 
 122     # Set up reaction...
 123     ReturnReactants = False
 124     if OptionsInfo["UseReactantNames"]:
 125         ReturnReactants = True
 126     RxnProducts = AllChem.EnumerateLibraryFromReaction(Rxn, ReactantsMolsList, ReturnReactants)
 127 
 128     # Generate product molecules and write them out...
 129     
 130     Compute2DCoords = OptionsInfo["Compute2DCoords"]
 131     Sanitize = OptionsInfo["Sanitize"]
 132     
 133     ProdMolCount = 0
 134     ValidProdMolCount = 0
 135     
 136     if ReturnReactants:
 137         for Products, Reactants in list(RxnProducts):
 138             for ProdMol in Products:
 139                 ProdMolCount += 1
 140 
 141                 # Set product name...
 142                 ReactantMolNames = [ReactantMol.GetProp("_Name") for ReactantMol in Reactants]
 143                 Delimiter = "_"
 144                 ProdMolName = Delimiter.join(ReactantMolNames) + "_Prod%d" % ProdMolCount
 145                 ProdMol.SetProp("_Name", ProdMolName)
 146 
 147                 Status = WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords)
 148                 if Status:
 149                     ValidProdMolCount += 1
 150     else:
 151         for Products in list(RxnProducts):
 152             for ProdMol in Products:
 153                 ProdMolCount += 1
 154 
 155                 # Set product name...
 156                 ProdMolName = "Prod%d" % ProdMolCount
 157                 ProdMol.SetProp("_Name", ProdMolName)
 158                 
 159                 Status = WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords)
 160                 if Status:
 161                     ValidProdMolCount += 1
 162 
 163     if Writer is not None:
 164         Writer.close()
 165     
 166     if ValidProdMolCount:
 167         MiscUtil.PrintInfo("\nTotal number of product molecules: %d" % ProdMolCount)
 168         MiscUtil.PrintInfo("Number of valid product molecules: %d" % ValidProdMolCount)
 169         MiscUtil.PrintInfo("Number of ignored product molecules: %d" % (ProdMolCount - ValidProdMolCount))
 170     else:
 171         MiscUtil.PrintInfo("\nThe compound library enumeration failed to generate any product molecules.\nCheck to make sure the reactants specified in input files match their corresponding specifications in reaction SMIRKS and try again.")
 172 
 173 def WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords):
 174     """Prepare and write out product  molecule."""
 175 
 176     try:
 177         if Sanitize:
 178             Chem.SanitizeMol(ProdMol)
 179     except (RuntimeError, ValueError):
 180         MiscUtil.PrintWarning("Ignoring product molecule: Failed to sanitize...\n")
 181         return False
 182 
 183     try:
 184         if Compute2DCoords:
 185             AllChem.Compute2DCoords(ProdMol)
 186     except (RuntimeError, ValueError):
 187         MiscUtil.PrintWarning("Ignoring product molecule: Failed to compute 2D coordinates...\n")
 188         return False
 189 
 190     Writer.write(ProdMol)
 191 
 192     return True
 193 
 194 def RetrieveReactantsMolecules():
 195     """Retrieve reactant molecules from each reactant file and return a list containing lists of molecules
 196     for each reactant file."""
 197 
 198     MiscUtil.PrintInfo("\nProcessing reactant file(s)...")
 199     
 200     ReactantsMolsList = []
 201     ReactantFilesList = OptionsInfo["ReactantFilesList"]
 202     UseReactantNames = OptionsInfo["UseReactantNames"]
 203     ReactantCount = 0
 204     
 205     for FileIndex in range(0, len(ReactantFilesList)):
 206         ReactantCount += 1
 207         ReactantFile = ReactantFilesList[FileIndex]
 208         
 209         MiscUtil.PrintInfo("\nProcessing reactant file: %s..." % ReactantFile)
 210 
 211         Mols  = RDKitUtil.ReadMolecules(ReactantFile, **OptionsInfo["InfileParams"])
 212         
 213         ValidMols = []
 214         MolCount = 0
 215         ValidMolCount = 0
 216         
 217         for Mol in Mols:
 218             MolCount += 1
 219             if Mol is None:
 220                 continue
 221             
 222             if RDKitUtil.IsMolEmpty(Mol):
 223                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 224                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 225                 continue
 226             
 227             ValidMolCount += 1
 228 
 229             # Check and set mol name...
 230             if UseReactantNames:
 231                 MolName = RDKitUtil.GetMolName(Mol)
 232                 if not len(MolName):
 233                     MolName = "React%dMol%d" % (ReactantCount, MolCount)
 234                     Mol.SetProp("_Name", MolName)
 235                 
 236             ValidMols.append(Mol)
 237 
 238         ReactantsMolsList.append(ValidMols)
 239         
 240         MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
 241         MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 242         MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
 243     
 244     return ReactantsMolsList
 245     
 246 def ProcessReactionNamesInfo():
 247     """Process reaction names information."""
 248     
 249     if not OptionsInfo["RxnByNameMode"]:
 250         return
 251 
 252     RetrieveReactionNamesInfo()
 253     ProcessSpecifiedReactionName()
 254 
 255 def ProcessSpecifiedReactionName():
 256     """Process and validate specified reaction name."""
 257 
 258     OptionsInfo["RxnNameSMIRKS"] = None
 259     
 260     # Set up a map of valid group rxn names for checking specified rxn names...
 261     CanonicalRxnNameMap = {}
 262     for Name in RxnNamesMap['Names']:
 263         CanonicalRxnNameMap[Name.lower()] = Name
 264     
 265     CanonicalRxnName = OptionsInfo["RxnName"].lower()
 266     if CanonicalRxnName in CanonicalRxnNameMap:
 267         Name = CanonicalRxnNameMap[CanonicalRxnName]
 268         OptionsInfo["RxnNameSMIRKS"] = RxnNamesMap['SMIRKSPattern'][Name]
 269     else:
 270         MiscUtil.PrintError("The rxn name name, %s, specified using \"-r, --rxnName\" option is not a valid name." % (OptionsInfo["RxnName"]))
 271     
 272 def ProcessListReactionNamesOption():
 273     """Process list reaction names information."""
 274 
 275     # Validate and process dataFile option for listing reaction names information...
 276     OptionsInfo["RxnNamesFile"] = None
 277     if not re.match("^auto$", Options["--rxnNamesFile"], re.I):
 278         MiscUtil.ValidateOptionFilePath("--rxnNamesFile", Options["--rxnNamesFile"])
 279         OptionsInfo["RxnNamesFile"] = Options["--rxnNamesFile"]
 280     
 281     RetrieveReactionNamesInfo()
 282     ListReactionNamesInfo()
 283 
 284 def RetrieveReactionNamesInfo():
 285     """Retrieve reaction names information."""
 286 
 287     RxnNamesFilePath = OptionsInfo["RxnNamesFile"]
 288     if RxnNamesFilePath is None:
 289         MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath()
 290         RxnNamesFilePath = os.path.join(MayaChemToolsDataDir, "ReactionNamesAndSMIRKS.csv")
 291         
 292     MiscUtil.PrintInfo("\nRetrieving reaction names and SMIRKS patterns from file %s" % (RxnNamesFilePath))
 293     
 294     if not os.path.exists(RxnNamesFilePath):
 295         MiscUtil.PrintError("The reaction names file, %s, doesn't exist.\n" % (RxnNamesFilePath))
 296 
 297     Delimiter = ','
 298     QuoteChar = '"'
 299     IgnoreHeaderLine = True
 300     RxnLinesWords = MiscUtil.GetTextLinesWords(RxnNamesFilePath, Delimiter, QuoteChar, IgnoreHeaderLine)
 301     
 302     RxnNamesMap['Names'] = []
 303     RxnNamesMap['SMIRKSPattern'] = {}
 304     
 305     for LineWords in RxnLinesWords:
 306         Name = LineWords[0]
 307         SMIRKSPattern = LineWords[1]
 308 
 309         if Name in RxnNamesMap['SMIRKSPattern']:
 310             MiscUtil.PrintWarning("Ignoring duplicate reaction name: %s..." % Name)
 311         else:
 312             RxnNamesMap['Names'].append(Name)
 313             RxnNamesMap['SMIRKSPattern'][Name] = SMIRKSPattern
 314         
 315     if not len(RxnNamesMap['Names']):
 316         MiscUtil.PrintError("Failed to retrieve any reaction names and SMIRKS patterns...")
 317         
 318     MiscUtil.PrintInfo("Total number of reactions present in reaction names and SMIRKS file: %d" % (len(RxnNamesMap['Names'])))
 319 
 320 def ListReactionNamesInfo():
 321     """List reaction names information"""
 322 
 323     MiscUtil.PrintInfo("\nListing available freaction names and SMIRKS patterns...")
 324     MiscUtil.PrintInfo("\nReactionName\tSMIRKSPattern")
 325     
 326     for Name in sorted(RxnNamesMap['Names']):
 327         SMIRKSPattern = RxnNamesMap['SMIRKSPattern'][Name]
 328         MiscUtil.PrintInfo("%s\t%s" % (Name, SMIRKSPattern))
 329 
 330     MiscUtil.PrintInfo("")
 331 
 332 def ProcessOptions():
 333     """Process and validate command line arguments and options"""
 334     
 335     MiscUtil.PrintInfo("Processing options...")
 336     
 337     # Validate options...
 338     ValidateOptions()
 339     
 340     Compute2DCoords = True
 341     if not re.match("^yes$", Options["--compute2DCoords"], re.I):
 342         Compute2DCoords = False
 343     OptionsInfo["Compute2DCoords"]  = Compute2DCoords
 344 
 345     OptionsInfo["Mode"] = Options["--mode"]
 346     RxnByNameMode = True
 347     if not re.match("^RxnByName$", Options["--mode"], re.I):
 348         RxnByNameMode = False
 349     OptionsInfo["RxnByNameMode"] = RxnByNameMode
 350 
 351     OptionsInfo["ProdMolNamesMode"] = Options["--prodMolNames"]
 352     UseReactantNames = False
 353     if re.match("^UseReactants$", Options["--prodMolNames"], re.I):
 354         UseReactantNames = True
 355     OptionsInfo["UseReactantNames"] = UseReactantNames
 356     
 357     OptionsInfo["RxnName"] = Options["--rxnName"]
 358     OptionsInfo["RxnNameSMIRKS"] = None
 359     if OptionsInfo["RxnByNameMode"]:
 360         if not Options["--rxnName"]:
 361             MiscUtil.PrintError("No rxn name specified using \"-r, --rxnName\" option during \"RxnByName\" value of \"-m, --mode\" option")
 362 
 363     OptionsInfo["RxnNamesFile"] = None
 364     if not re.match("^auto$", Options["--rxnNamesFile"], re.I):
 365         OptionsInfo["RxnNamesFile"] = Options["--rxnNamesFile"]
 366 
 367     ReactantFiles = re.sub(" ", "", Options["--infiles"])
 368     ReactantFilesList = []
 369     ReactantFilesList = ReactantFiles.split(",")
 370     OptionsInfo["ReactantFiles"] = ReactantFiles
 371     OptionsInfo["ReactantFilesList"] = ReactantFilesList
 372 
 373     OptionsInfo["SpecifiedSMIRKS"] = Options["--smirksRxn"]
 374     if not OptionsInfo["RxnByNameMode"]:
 375         if not Options["--smirksRxn"]:
 376             MiscUtil.PrintError("No rxn SMIRKS pattern specified using \"-r, --rxnName\" option during \"RxnByName\" value of \"-m, --mode\" option")
 377     
 378     OptionsInfo["Outfile"] = Options["--outfile"]
 379     OptionsInfo["Overwrite"] = Options["--overwrite"]
 380 
 381     # Use first reactant file as input file as all input files have the same format...
 382     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], ReactantFilesList[0])
 383 
 384     # No need to pass any input or output file name due to absence of any auto parameter...
 385     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 386     
 387     Sanitize = True
 388     if not re.match("^yes$", Options["--sanitize"], re.I):
 389         Sanitize = False
 390     OptionsInfo["Sanitize"]  = Sanitize
 391 
 392 def RetrieveOptions():
 393     """Retrieve command line arguments and options"""
 394     
 395     # Get options...
 396     global Options
 397     Options = docopt(_docoptUsage_)
 398 
 399     # Set current working directory to the specified directory...
 400     WorkingDir = Options["--workingdir"]
 401     if WorkingDir:
 402         os.chdir(WorkingDir)
 403     
 404     # Handle examples option...
 405     if "--examples" in Options and Options["--examples"]:
 406         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 407         sys.exit(0)
 408     
 409     # Handle listing of functional group information...
 410     if  Options and Options["--list"]:
 411         ProcessListReactionNamesOption()
 412         sys.exit(0)
 413 
 414 def ValidateOptions():
 415     """Validate option values"""
 416     
 417     MiscUtil.ValidateOptionTextValue("--compute2DCoords", Options["--compute2DCoords"], "yes no")
 418     
 419     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "RxnByName RxnBySMIRKS")
 420     MiscUtil.ValidateOptionTextValue("-p, --prodMolNames", Options["--prodMolNames"], "UseReactants Sequential")
 421     
 422     if not re.match("^auto$", Options["--rxnNamesFile"], re.I):
 423         MiscUtil.ValidateOptionFilePath("--rxnNamesFile", Options["--rxnNamesFile"])
 424 
 425     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi")
 426     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 427     
 428     ReactantFiles = re.sub(" ", "", Options["--infiles"])
 429     if not ReactantFiles:
 430         MiscUtil.PrintError("No reactant files specified for \"-i, --infiles\" option")
 431 
 432     # Validate file extensions...
 433     for ReactantFile in ReactantFiles.split(","):
 434         MiscUtil.ValidateOptionFilePath("-i, --infiles", ReactantFile)
 435         MiscUtil.ValidateOptionFileExt("-i, --infiles", ReactantFile, "sdf sd smi csv tsv txt")
 436         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infiles", ReactantFile, "-o, --outfile", Options["--outfile"])
 437         
 438     # Match file formats...
 439     FirstFile = True
 440     FirstFileFormat = ""
 441     for ReactantFile in ReactantFiles.split(","):
 442         FileFormat = ""
 443         if MiscUtil.CheckFileExt(ReactantFile, "sdf sd"):
 444             FileFormat = "SD"
 445         elif MiscUtil.CheckFileExt(ReactantFile, "smi csv tsv txt"):
 446             FileFormat = "SMILES"
 447         else:
 448             MiscUtil.PrintError("The file name specified , %s, for option \"-i, --infiles\" is not valid. Supported file formats: sdf sd smi csv tsv txt\n" % ReactantFile)
 449             
 450         if FirstFile:
 451             FirstFile = False
 452             FirstFileFormat = FileFormat
 453             continue
 454         
 455         if not re.match("^%s$" % FirstFileFormat, FileFormat, re.IGNORECASE):
 456             MiscUtil.PrintError("All reactant file names -  %s - specified using option \"-i, --infiles\" must have the same file format.\n" % ReactantFiles)
 457             
 458 
 459     MiscUtil.ValidateOptionTextValue("--sanitize", Options["--sanitize"], "yes no")
 460     
 461 # Setup a usage string for docopt...
 462 _docoptUsage_ = """
 463 RDKitEnumerateCompoundLibrary.py - Enumerate a virtual compound library
 464 
 465 Usage:
 466     RDKitEnumerateCompoundLibrary.py  [--compute2DCoords <yes or no>] [--infileParams <Name,Value,...>]
 467                                       [--mode <RxnByName or RxnBySMIRKS>] [--outfileParams <Name,Value,...>] [--overwrite]
 468                                       [--prodMolNames <UseReactants or Sequential>] [--rxnName <text>]
 469                                       [--rxnNamesFile <FileName or auto>] [--smirksRxn <text>] [--sanitize <yes or no>]
 470                                       [-w <dir>] -i  <ReactantFile1,...> -o <outfile>
 471     RDKitEnumerateCompoundLibrary.py [--rxnNamesFile <FileName or auto>] -l | --list
 472     RDKitEnumerateCompoundLibrary.py -h | --help | -e | --examples
 473 
 474 Description:
 475     Perform a combinatorial enumeration of a virtual library of molecules for a reaction specified
 476     using a reaction name or SMIRKS pattern and reactant input files.
 477 
 478     The SMIRKS patterns for supported reactions names [ Ref 134 ] are retrieved from file,
 479     ReactionNamesAndSMIRKS.csv, available in MayaChemTools data directory. The current
 480     list of supported reaction names is shown below:
 481 
 482     '1,2,4_triazole_acetohydrazide', '1,2,4_triazole_carboxylic_acid_ester', 3_nitrile_pyridine,
 483     Benzimidazole_derivatives_aldehyde, Benzimidazole_derivatives_carboxylic_acid_ester,
 484     Benzofuran, Benzothiazole, Benzothiophene, Benzoxazole_aromatic_aldehyde,
 485     Benzoxazole_carboxylic_acid, Buchwald_Hartwig, Decarboxylative_coupling, Fischer_indole,
 486     Friedlaender_chinoline, Grignard_alcohol, Grignard_carbonyl, Heck_non_terminal_vinyl,
 487     Heck_terminal_vinyl, Heteroaromatic_nuc_sub, Huisgen_Cu_catalyzed_1,4_subst,
 488     Huisgen_disubst_alkyne, Huisgen_Ru_catalyzed_1,5_subst, Imidazole, Indole, Mitsunobu_imide,
 489     Mitsunobu_phenole, Mitsunobu_sulfonamide, Mitsunobu_tetrazole_1, Mitsunobu_tetrazole_2,
 490     Mitsunobu_tetrazole_3, Mitsunobu_tetrazole_4, N_arylation_heterocycles, Negishi,
 491     Niementowski_quinazoline, Nucl_sub_aromatic_ortho_nitro, Nucl_sub_aromatic_para_nitro,
 492     Oxadiazole, Paal_Knorr_pyrrole, Phthalazinone, Pictet_Spengler, Piperidine_indole,
 493     Pyrazole, Reductive_amination, Schotten_Baumann_amide, Sonogashira, Spiro_chromanone,
 494     Stille, Sulfon_amide, Suzuki, Tetrazole_connect_regioisomer_1, Tetrazole_connect_regioisomer_2,
 495     Tetrazole_terminal, Thiazole, Thiourea, Triaryl_imidazole, Urea, Williamson_ether, Wittig 
 496 
 497     The supported input file formats are: SD (.sdf, .sd), SMILES (.smi, .csv, .tsv, .txt)
 498 
 499     The supported output file formats are:  SD (.sdf, .sd), SMILES (.smi)
 500 
 501 Options:
 502     -c, --compute2DCoords <yes or no>  [default: yes]
 503         Compute 2D coordinates of product molecules before writing them out.
 504     -i, --infiles <ReactantFile1, ReactantFile2...>
 505         Comma delimited list of reactant file names for enumerating a compound library
 506         using reaction SMIRKS. The number of reactant files must match number of
 507         reaction components in reaction SMIRKS. All reactant input files must have
 508         the same format.
 509     --infileParams <Name,Value,...>  [default: auto]
 510         A comma delimited list of parameter name and value pairs for reading
 511         molecules from files. The supported parameter names for different file
 512         formats, along with their default values, are shown below:
 513             
 514             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 515             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 516                 smilesTitleLine,auto,sanitize,yes
 517             
 518         Possible values for smilesDelimiter: space, comma or tab. These parameters apply
 519         to all reactant input files, which must have the same file format.
 520     -e, --examples
 521         Print examples.
 522     -h, --help
 523         Print this help message.
 524     -l, --list
 525         List available reaction names along with corresponding SMIRKS patterns without
 526         performing any enumeration.
 527     -m, --mode <RxnByName or RxnBySMIRKS>  [default: RxnByName]
 528         Indicate whether a reaction is specified by a reaction name or a SMIRKS pattern.
 529         Possible values: RxnByName or RxnBySMIRKS.
 530     -o, --outfile <outfile>
 531         Output file name.
 532     --outfileParams <Name,Value,...>  [default: auto]
 533         A comma delimited list of parameter name and value pairs for writing
 534         molecules to files. The supported parameter names for different file
 535         formats, along with their default values, are shown below:
 536             
 537             SD: kekulize,no
 538             SMILES: kekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 539                 smilesTitleLine,yes
 540             
 541     -p, --prodMolNames <UseReactants or Sequential>  [default: UseReactants]
 542         Generate names of product molecules using reactant names or assign names in
 543         a sequential order. Possible values: UseReactants or Sequential. Format of
 544         molecule names: UseReactants - <ReactName1>_<ReactName2>..._Prod<Num>;
 545         Sequential - Prod<Num>
 546     --overwrite
 547         Overwrite existing files.
 548     -r, --rxnName <text>
 549         Name of a reaction to use for enumerating a compound library. This option
 550         is only used during 'RxnByName' value of '-m, --mode' option.
 551     --rxnNamesFile <FileName or auto>  [default: auto]
 552         Specify a file name containing data for names of reactions and SMIRKS patterns or
 553         use default file, ReactionNamesAndSMIRKS.csv, available in MayaChemTools data
 554         directory.
 555         
 556         Reactions SMIRKS file format: RxnName,RxnSMIRKS.
 557         
 558         The format of data in local reaction names file must match format of the reaction
 559         SMIRKS file available in MayaChemTools data directory.
 560     -s, --smirksRxn <text>
 561         SMIRKS pattern of a reaction to use for enumerating a compound library. This
 562         option is only used during 'RxnBySMIRKS' value of '-m, --mode' option.
 563     --sanitize <yes or no>  [default: yes]
 564         Sanitize product molecules before writing them out.
 565     -w, --workingdir <dir>
 566         Location of working directory which defaults to the current directory.
 567 
 568 Examples:
 569     To list all available reaction names along with their SMIRKS pattern, type:
 570 
 571          % RDKitEnumerateCompoundLibrary.py -l
 572 
 573     To perform a combinatorial enumeration of a virtual compound library corresponding
 574     to named amide reaction, Schotten_Baumann_amide and write out a SMILES file
 575     type:
 576 
 577         % RDKitEnumerateCompoundLibrary.py -r Schotten_Baumann_amide
 578           -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.smi
 579 
 580     To perform a combinatorial enumeration of a virtual compound library corresponding
 581     to an amide reaction specified using a SMIRKS pattern and write out a SD file containing
 582     sanitized molecules, computed 2D coordinates, and generation of molecule names from
 583     reactant names, type:
 584 
 585         % RDKitEnumerateCompoundLibrary.py -m RxnBySMIRKS
 586           -s '[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]'
 587           -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.sdf
 588 
 589     To perform a combinatorial enumeration of a virtual compound library corresponding
 590     to an amide reaction specified using a SMIRKS pattern  and write out a SD file containing
 591     unsanitized molecules, without generating 2D coordinates, and a sequential generation
 592     of molecule names, type:
 593 
 594         % RDKitEnumerateCompoundLibrary.py -m RxnBySMIRKS -c no -s no
 595           -p Sequential -s '[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]'
 596           -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.sdf
 597 
 598 Author:
 599     Manish Sud(msud@san.rr.com)
 600 
 601 See also:
 602     RDKitConvertFileFormat.py, RDKitFilterPAINS.py, RDKitSearchFunctionalGroups.py,
 603     RDKitSearchSMARTS.py
 604 
 605 Copyright:
 606     Copyright (C) 2019 Manish Sud. All rights reserved.
 607 
 608     The functionality available in this script is implemented using RDKit, an
 609     open source toolkit for cheminformatics developed by Greg Landrum.
 610 
 611     This file is part of MayaChemTools.
 612 
 613     MayaChemTools is free software; you can redistribute it and/or modify it under
 614     the terms of the GNU Lesser General Public License as published by the Free
 615     Software Foundation; either version 3 of the License, or (at your option) any
 616     later version.
 617 
 618 """
 619 
 620 if __name__ == "__main__":
 621     main()