MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitEnumerateCompoundLibrary.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 
  37 # RDKit imports...
  38 try:
  39     from rdkit import rdBase
  40     from rdkit import Chem
  41     from rdkit.Chem import AllChem
  42     from rdkit.Chem import FunctionalGroups
  43 except ImportError as ErrMsg:
  44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  46     sys.exit(1)
  47 
  48 # MayaChemTools imports...
  49 try:
  50     from docopt import docopt
  51     import MiscUtil
  52     import RDKitUtil
  53 except ImportError as ErrMsg:
  54     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  55     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  56     sys.exit(1)
  57 
  58 ScriptName = os.path.basename(sys.argv[0])
  59 Options = {}
  60 OptionsInfo = {}
  61 
  62 RxnNamesMap = {}
  63 
  64 def main():
  65     """Start execution of the script."""
  66     
  67     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  68     
  69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  70     
  71     # Retrieve command line arguments and options...
  72     RetrieveOptions()
  73     
  74     # Process and validate command line arguments and options...
  75     ProcessOptions()
  76     
  77     # Perform actions required by the script...
  78     PerformChemicalLibraryEnumeration()
  79     
  80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  82 
  83 def PerformChemicalLibraryEnumeration():
  84     """Retrieve functional groups information and perform search."""
  85 
  86     ProcessReactionNamesInfo()
  87     PerformEnumeration()
  88 
  89 def PerformEnumeration():
  90     """Enumerate virutal compound library."""
  91 
  92     ReactantFilesList = OptionsInfo["ReactantFilesList"]
  93     Outfile = OptionsInfo["Outfile"]
  94 
  95     RxnByNameMode = OptionsInfo["RxnByNameMode"]
  96     if RxnByNameMode:
  97         RxnSMARTSPattern = OptionsInfo["RxnNameSMARTS"]
  98     else:
  99         RxnSMARTSPattern = OptionsInfo["SpecifiedSMARTS"]
 100 
 101     # Set up a reaction and match number of reactants in rxn SMARTS against number of
 102     # reactant files...
 103     MiscUtil.PrintInfo("\nValidating reaction SMARTS...")
 104     try:
 105         Rxn = AllChem.ReactionFromSmarts(RxnSMARTSPattern)
 106     except Exception as ErrMsg:
 107         MiscUtil.PrintError("Failed to validate reaction SMARTS %s\n%s\n" % (RxnSMARTSPattern, ErrMsg))
 108     
 109     RxnReactantsCount = Rxn.GetNumReactantTemplates()
 110 
 111     ReactantFilesList = OptionsInfo["ReactantFilesList"]
 112     ReactantFilesCount = len(ReactantFilesList)
 113     if  ReactantFilesCount != RxnReactantsCount:
 114         MiscUtil.PrintError("The number of specified reactant files, %d, must match number of reactants, %d, in reaction SMARTS" % (ReactantFilesCount, RxnReactantsCount))
 115         
 116     # Retrieve reactant molecules...
 117     ReactantsMolsList = RetrieveReactantsMolecules()
 118     
 119     # Set up  a molecule writer...
 120     Writer = None
 121     Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
 122     if Writer is None:
 123         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
 124 
 125     MiscUtil.PrintInfo("\nGenerating file %s..." % Outfile)
 126 
 127     # Set up reaction...
 128     ReturnReactants = False
 129     if OptionsInfo["UseReactantNames"]:
 130         ReturnReactants = True
 131     RxnProducts = AllChem.EnumerateLibraryFromReaction(Rxn, ReactantsMolsList, ReturnReactants)
 132     
 133     # Generate product molecules and write them out...
 134     
 135     Compute2DCoords = OptionsInfo["Compute2DCoords"]
 136     Sanitize = OptionsInfo["Sanitize"]
 137     
 138     ProdMolCount = 0
 139     ValidProdMolCount = 0
 140     
 141     if ReturnReactants:
 142         for Products, Reactants in list(RxnProducts):
 143             for ProdMol in Products:
 144                 ProdMolCount += 1
 145 
 146                 # Set product name...
 147                 ReactantMolNames = [ReactantMol.GetProp("_Name") for ReactantMol in Reactants]
 148                 Delimiter = "_"
 149                 ProdMolName = Delimiter.join(ReactantMolNames) + "_Prod%d" % ProdMolCount
 150                 ProdMol.SetProp("_Name", ProdMolName)
 151 
 152                 Status = WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords)
 153                 if Status:
 154                     ValidProdMolCount += 1
 155     else:
 156         for Products in list(RxnProducts):
 157             for ProdMol in Products:
 158                 ProdMolCount += 1
 159 
 160                 # Set product name...
 161                 ProdMolName = "Prod%d" % ProdMolCount
 162                 ProdMol.SetProp("_Name", ProdMolName)
 163                 
 164                 Status = WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords)
 165                 if Status:
 166                     ValidProdMolCount += 1
 167 
 168     if Writer is not None:
 169         Writer.close()
 170     
 171     if ValidProdMolCount:
 172         MiscUtil.PrintInfo("\nTotal number of product molecules: %d" % ProdMolCount)
 173         MiscUtil.PrintInfo("Number of valid product molecules: %d" % ValidProdMolCount)
 174         MiscUtil.PrintInfo("Number of ignored product molecules: %d" % (ProdMolCount - ValidProdMolCount))
 175     else:
 176         MiscUtil.PrintInfo("\nThe compound library enumeration failed to generate any product molecules.\nCheck to make sure the reactants specified in input files match their corresponding specifications in reaction SMARTS and try again.")
 177 
 178 def WriteProductMolecule(Writer, ProdMol, Sanitize, Compute2DCoords):
 179     """Prepare and write out product  molecule."""
 180 
 181     try:
 182         if Sanitize:
 183             Chem.SanitizeMol(ProdMol)
 184     except (RuntimeError, ValueError):
 185         MiscUtil.PrintWarning("Ignoring product molecule: Failed to sanitize...\n")
 186         return False
 187 
 188     try:
 189         if Compute2DCoords:
 190             AllChem.Compute2DCoords(ProdMol)
 191     except (RuntimeError, ValueError):
 192         MiscUtil.PrintWarning("Ignoring product molecule: Failed to compute 2D coordinates...\n")
 193         return False
 194 
 195     Writer.write(ProdMol)
 196 
 197     return True
 198 
 199 def RetrieveReactantsMolecules():
 200     """Retrieve reactant molecules from each reactant file and return a list containing lists of molecules
 201     for each reactant file."""
 202 
 203     MiscUtil.PrintInfo("\nProcessing reactant file(s)...")
 204     
 205     ReactantsMolsList = []
 206     ReactantFilesList = OptionsInfo["ReactantFilesList"]
 207     UseReactantNames = OptionsInfo["UseReactantNames"]
 208     ReactantCount = 0
 209     
 210     for FileIndex in range(0, len(ReactantFilesList)):
 211         ReactantCount += 1
 212         ReactantFile = ReactantFilesList[FileIndex]
 213         
 214         MiscUtil.PrintInfo("\nProcessing reactant file: %s..." % ReactantFile)
 215 
 216         Mols  = RDKitUtil.ReadMolecules(ReactantFile, **OptionsInfo["InfileParams"])
 217         
 218         ValidMols = []
 219         MolCount = 0
 220         ValidMolCount = 0
 221         
 222         for Mol in Mols:
 223             MolCount += 1
 224             if Mol is None:
 225                 continue
 226             
 227             if RDKitUtil.IsMolEmpty(Mol):
 228                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
 229                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 230                 continue
 231             
 232             ValidMolCount += 1
 233 
 234             # Check and set mol name...
 235             if UseReactantNames:
 236                 MolName = RDKitUtil.GetMolName(Mol)
 237                 if not len(MolName):
 238                     MolName = "React%dMol%d" % (ReactantCount, MolCount)
 239                     Mol.SetProp("_Name", MolName)
 240                 
 241             ValidMols.append(Mol)
 242 
 243         ReactantsMolsList.append(ValidMols)
 244         
 245         MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount)
 246         MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 247         MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
 248     
 249     return ReactantsMolsList
 250     
 251 def ProcessReactionNamesInfo():
 252     """Process reaction names information."""
 253     
 254     if not OptionsInfo["RxnByNameMode"]:
 255         return
 256 
 257     RetrieveReactionNamesInfo()
 258     ProcessSpecifiedReactionName()
 259 
 260 def ProcessSpecifiedReactionName():
 261     """Process and validate specified reaction name."""
 262 
 263     OptionsInfo["RxnNameSMARTS"] = None
 264     
 265     # Set up a map of valid group rxn names for checking specified rxn names...
 266     CanonicalRxnNameMap = {}
 267     for Name in RxnNamesMap['Names']:
 268         CanonicalRxnNameMap[Name.lower()] = Name
 269     
 270     CanonicalRxnName = OptionsInfo["RxnName"].lower()
 271     if CanonicalRxnName in CanonicalRxnNameMap:
 272         Name = CanonicalRxnNameMap[CanonicalRxnName]
 273         OptionsInfo["RxnNameSMARTS"] = RxnNamesMap['SMARTSPattern'][Name]
 274     else:
 275         MiscUtil.PrintError("The rxn name name, %s, specified using \"-r, --rxnName\" option is not a valid name." % (OptionsInfo["RxnName"]))
 276     
 277 def ProcessListReactionNamesOption():
 278     """Process list reaction names information."""
 279 
 280     ProcessReactionNamesFileOption()
 281     ProcessColumnOptions()
 282     
 283     RetrieveReactionNamesInfo()
 284     ListAndValidateReactionNamesInfo()
 285 
 286 def RetrieveReactionNamesInfo():
 287     """Retrieve reaction names information."""
 288 
 289     RxnNamesFile = OptionsInfo["RxnNamesFile"]
 290     
 291     MiscUtil.PrintInfo("\nRetrieving reaction names and SMARTS patterns from file %s" % (RxnNamesFile))
 292     
 293     if not os.path.exists(RxnNamesFile):
 294         MiscUtil.PrintError("The reaction names file, %s, doesn't exist.\n" % (RxnNamesFile))
 295 
 296     IgnoreHeaderLine = True
 297     RxnLinesWords = MiscUtil.GetTextLinesWords(RxnNamesFile, OptionsInfo["RxnNamesFileDelimiter"], OptionsInfo["RxnNamesFileQuote"], IgnoreHeaderLine)
 298     
 299     RxnNamesMap['Names'] = []
 300     RxnNamesMap['SMARTSPattern'] = {}
 301 
 302     RxnNameColindex = OptionsInfo["RxnNameColnum"] - 1
 303     RxnSMARTSColindex = OptionsInfo["RxnSMARTSColnum"] - 1
 304     
 305     for LineWords in RxnLinesWords:
 306         Name = LineWords[RxnNameColindex]
 307         SMARTSPattern = LineWords[RxnSMARTSColindex]
 308 
 309         if Name in RxnNamesMap['SMARTSPattern']:
 310             MiscUtil.PrintWarning("Ignoring duplicate reaction name: %s..." % Name)
 311         else:
 312             RxnNamesMap['Names'].append(Name)
 313             RxnNamesMap['SMARTSPattern'][Name] = SMARTSPattern
 314         
 315     if not len(RxnNamesMap['Names']):
 316         MiscUtil.PrintError("Failed to retrieve any reaction names and SMARTS patterns...")
 317         
 318     MiscUtil.PrintInfo("Total number of reactions present in reaction names and SMARTS file: %d" % (len(RxnNamesMap['Names'])))
 319 
 320 def ListAndValidateReactionNamesInfo():
 321     """List and validate reaction names information."""
 322 
 323     ListReactionNamesInfo()
 324     ValidateReactionNamesInfo()
 325 
 326 def ListReactionNamesInfo():
 327     """List reaction names information."""
 328 
 329     MiscUtil.PrintInfo("\nListing available reaction names and SMARTS patterns...")
 330     MiscUtil.PrintInfo("\nReactionName\tSMARTSPattern")
 331 
 332     RxnCount = 0
 333     for Name in sorted(RxnNamesMap['Names']):
 334         RxnCount += 1
 335         SMARTSPattern = RxnNamesMap['SMARTSPattern'][Name]
 336         MiscUtil.PrintInfo("%s\t%s" % (Name, SMARTSPattern))
 337     
 338     MiscUtil.PrintInfo("\nTotal number of reactions: %s" % RxnCount)
 339 
 340 def ValidateReactionNamesInfo():
 341     """Validate reaction names information."""
 342 
 343     MiscUtil.PrintInfo("\nValidating reaction SMARTS patterns...")
 344 
 345     RxnCount = 0
 346     ValidRxnCount = 0
 347     for Name in sorted(RxnNamesMap['Names']):
 348         RxnCount += 1
 349         SMARTSPattern = RxnNamesMap['SMARTSPattern'][Name]
 350         try:
 351             Rxn = AllChem.ReactionFromSmarts(SMARTSPattern)
 352             ValidRxnCount += 1
 353         except Exception as ErrMsg:
 354             MiscUtil.PrintInfo("\nFailed to validate reaction SMARTS. ReactionName: %s;  SMARTSPattern: %s\n%s\n" % (Name, SMARTSPattern, ErrMsg))
 355 
 356     InvalidRxnCount = RxnCount - ValidRxnCount
 357     MiscUtil.PrintInfo("\nTotal number of reactions: %s\nNumber of valid reactions: %s\nNumber of invalid reactions: %s" % (RxnCount, ValidRxnCount, InvalidRxnCount))
 358         
 359     MiscUtil.PrintInfo("")
 360 
 361 def ProcessReactionNamesFileOption():
 362     """Process reaction names file option."""
 363     
 364     RxnNamesFile = None
 365     if not re.match("^auto$", Options["--rxnNamesFile"], re.I):
 366         MiscUtil.ValidateOptionFilePath("--rxnNamesFile", Options["--rxnNamesFile"])
 367         RxnNamesFile = Options["--rxnNamesFile"]
 368 
 369     if RxnNamesFile is None:
 370         MayaChemToolsDataDir = MiscUtil.GetMayaChemToolsLibDataPath()
 371         RxnNamesFile = os.path.join(MayaChemToolsDataDir, "ReactionNamesAndSMARTS.csv")
 372     
 373     OptionsInfo["RxnNamesFile"] = RxnNamesFile
 374     OptionsInfo["RxnNamesFileDelimiter"] = ','
 375     OptionsInfo["RxnNamesFileQuote"] = '"'
 376     
 377 def ProcessColumnOptions():
 378     """Process column options. """
 379 
 380     ProcessColumnModeOption()
 381     RetrieveColumnNames()
 382     
 383     ProcessReactionNameColOption()
 384     ProcessReactionSMARTSColOption()
 385     
 386 def ProcessColumnModeOption():
 387     """Process column mode option."""
 388 
 389     CollabelMode, ColnumMode = [False, False]
 390     Colmode = Options["--colmode"]
 391     if re.match("^collabel$", Colmode, re.I):
 392         CollabelMode = True
 393     elif re.match("^colnum$", Colmode, re.I):
 394         ColnumMode = True
 395     else:
 396         MiscUtil.PrintError("The value, %s, specified for option \"-c, --colmode\" is not valid. Supported values: collabel or colnum\n" % (Colmode))
 397 
 398     OptionsInfo["Colmode"] = Colmode
 399     OptionsInfo["CollabelMode"] = CollabelMode
 400     OptionsInfo["ColnumMode"] = ColnumMode
 401 
 402 def RetrieveColumnNames():
 403     """Retrieve column names. """
 404     
 405     RxnNamesFile = OptionsInfo["RxnNamesFile"]
 406     IgnoreHeaderLine = False
 407     RxnLinesWords = MiscUtil.GetTextLinesWords(RxnNamesFile, OptionsInfo["RxnNamesFileDelimiter"], OptionsInfo["RxnNamesFileQuote"], IgnoreHeaderLine)
 408     Colnames = RxnLinesWords[0]
 409     
 410     if len(Colnames) == 0:
 411         MiscUtil.PrintError("The first line in reaction names, %s, is empty. It must contain column names.\n" % OptionsInfo["RxnNamesFile"])
 412     
 413     ColnameToColnumMap = {}
 414     ColnumToColnameMap = {}
 415     for ColIndex, Colname in enumerate(Colnames):
 416         Colnum = ColIndex + 1
 417         ColnameToColnumMap[Colname] = Colnum
 418         ColnumToColnameMap[Colnum] = Colname
 419 
 420     OptionsInfo["Colnames"] = Colnames
 421     OptionsInfo["ColCount"] = len(Colnames)
 422     OptionsInfo["ColnameToColnumMap"] = ColnameToColnumMap
 423     OptionsInfo["ColnumToColnameMap"] = ColnumToColnameMap
 424 
 425 def ProcessReactionNameColOption():
 426     """Process reaction name column option. """
 427     
 428     RxnNameCol = Options["--colRxnName"]
 429     if re.match("^auto$", RxnNameCol, re.I):
 430         Colname = "RxnName"
 431         if Colname not in OptionsInfo["ColnameToColnumMap"]:
 432             MiscUtil.PrintError("The reaction name column name, %s, doen't exist in reaction names file. You must specify a valid reaction name column name or number using \"--colRxnName\" option.\n" % Colname)
 433         
 434         Colnum = OptionsInfo["ColnameToColnumMap"][Colname]
 435         RxnNameColSpec = Colnum if OptionsInfo["ColnumMode"] else Colname
 436     else:
 437         RxnNameColSpec = RxnNameCol
 438 
 439     RxnNameColname, RxnNameColnum = ProcessColumnSpecification("--colRxnName", RxnNameColSpec)
 440     
 441     OptionsInfo["RxnNameCol"] = RxnNameCol
 442     OptionsInfo["RxnNameColname"] = RxnNameColname
 443     OptionsInfo["RxnNameColnum"] = RxnNameColnum
 444 
 445 def ProcessReactionSMARTSColOption():
 446     """Process reaction SMARTS column option. """
 447     
 448     RxnSMARTSCol = Options["--colRxnSMARTS"]
 449     if re.match("^auto$", RxnSMARTSCol, re.I):
 450         Colname = "RxnSMARTS"
 451         if Colname not in OptionsInfo["ColnameToColnumMap"]:
 452             MiscUtil.PrintError("The reaction SMARTS column name, %s, doen't exist in reaction names file. You must specify a valid reaction name column name or number using \"--colRxnSMARTS\" option.\n" % Colname)
 453         
 454         Colnum = OptionsInfo["ColnameToColnumMap"][Colname]
 455         RxnSMARTSColSpec = Colnum if OptionsInfo["ColnumMode"] else Colname
 456     else:
 457         RxnSMARTSColSpec = RxnSMARTSCol
 458 
 459     RxnSMARTSColname, RxnSMARTSColnum = ProcessColumnSpecification("--colRxnSMARTS", RxnSMARTSColSpec)
 460     
 461     OptionsInfo["RxnSMARTSCol"] = RxnSMARTSCol
 462     OptionsInfo["RxnSMARTSColname"] = RxnSMARTSColname
 463     OptionsInfo["RxnSMARTSColnum"] = RxnSMARTSColnum
 464 
 465 def ProcessColumnSpecification(OptionName, Colspec):
 466     """Process column specification corresponding to a column name or number."""
 467 
 468     Colname, Colnum = [None, None]
 469     if OptionsInfo["ColnumMode"]:
 470         Colnum = int(Colspec)
 471         if Colnum not in OptionsInfo["ColnumToColnameMap"]:
 472             MiscUtil.PrintError("The column number, %s, specified using \"%s\" option doesn't exist in reaction names file. You must specify a valid column number. Valid values: >= 1 and <= %s\n" % (Colnum, OptionName, OptionsInfo["ColCount"]))
 473         Colname = OptionsInfo["ColnumToColnameMap"][Colnum]
 474     else:
 475         Colname = Colspec
 476         if Colname not in OptionsInfo["ColnameToColnumMap"]:
 477             MiscUtil.PrintError("The column name, %s, specified using \"%s\" option doesn't exist in input file. You must specify a valid column name. Valid values: %s\n" % (Colname, OptionName, " ".join(OptionsInfo["Colnames"])))
 478         Colnum = OptionsInfo["ColnameToColnumMap"][Colname]
 479 
 480     return (Colname, Colnum)
 481 
 482 
 483 def ProcessOptions():
 484     """Process and validate command line arguments and options."""
 485     
 486     MiscUtil.PrintInfo("Processing options...")
 487     
 488     # Validate options...
 489     ValidateOptions()
 490     
 491     Compute2DCoords = True
 492     if not re.match("^yes$", Options["--compute2DCoords"], re.I):
 493         Compute2DCoords = False
 494     OptionsInfo["Compute2DCoords"]  = Compute2DCoords
 495 
 496     OptionsInfo["Mode"] = Options["--mode"]
 497     RxnByNameMode = True
 498     if not re.match("^RxnByName$", Options["--mode"], re.I):
 499         RxnByNameMode = False
 500     OptionsInfo["RxnByNameMode"] = RxnByNameMode
 501 
 502     OptionsInfo["ProdMolNamesMode"] = Options["--prodMolNames"]
 503     UseReactantNames = False
 504     if re.match("^UseReactants$", Options["--prodMolNames"], re.I):
 505         UseReactantNames = True
 506     OptionsInfo["UseReactantNames"] = UseReactantNames
 507     
 508     OptionsInfo["RxnName"] = Options["--rxnName"]
 509     OptionsInfo["RxnNameSMARTS"] = None
 510     if OptionsInfo["RxnByNameMode"]:
 511         if not Options["--rxnName"]:
 512             MiscUtil.PrintError("No rxn name specified using \"-r, --rxnName\" option during \"RxnByName\" value of \"-m, --mode\" option")
 513     
 514     ProcessReactionNamesFileOption()
 515     ProcessColumnOptions()
 516     
 517     ReactantFiles = re.sub(" ", "", Options["--infiles"])
 518     ReactantFilesList = []
 519     ReactantFilesList = ReactantFiles.split(",")
 520     OptionsInfo["ReactantFiles"] = ReactantFiles
 521     OptionsInfo["ReactantFilesList"] = ReactantFilesList
 522 
 523     OptionsInfo["SpecifiedSMARTS"] = Options["--smartsRxn"]
 524     if not OptionsInfo["RxnByNameMode"]:
 525         if not Options["--smartsRxn"]:
 526             MiscUtil.PrintError("No rxn SMARTS pattern specified using \"-r, --rxnName\" option during \"RxnByName\" value of \"-m, --mode\" option")
 527     
 528     OptionsInfo["Outfile"] = Options["--outfile"]
 529     OptionsInfo["Overwrite"] = Options["--overwrite"]
 530 
 531     # Use first reactant file as input file as all input files have the same format...
 532     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], ReactantFilesList[0])
 533 
 534     # No need to pass any input or output file name due to absence of any auto parameter...
 535     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"])
 536     
 537     Sanitize = True
 538     if not re.match("^yes$", Options["--sanitize"], re.I):
 539         Sanitize = False
 540     OptionsInfo["Sanitize"]  = Sanitize
 541 
 542 def RetrieveOptions():
 543     """Retrieve command line arguments and options."""
 544     
 545     # Get options...
 546     global Options
 547     Options = docopt(_docoptUsage_)
 548     
 549     # Set current working directory to the specified directory...
 550     WorkingDir = Options["--workingdir"]
 551     if WorkingDir:
 552         os.chdir(WorkingDir)
 553     
 554     # Handle examples option...
 555     if "--examples" in Options and Options["--examples"]:
 556         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 557         sys.exit(0)
 558     
 559     # Handle listing of functional group information...
 560     if  Options and Options["--list"]:
 561         ProcessListReactionNamesOption()
 562         sys.exit(0)
 563 
 564 def ValidateOptions():
 565     """Validate option values."""
 566     
 567     MiscUtil.ValidateOptionTextValue("-c, --colmode", Options["--colmode"], "collabel colnum")
 568     
 569     MiscUtil.ValidateOptionTextValue("--compute2DCoords", Options["--compute2DCoords"], "yes no")
 570     
 571     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "RxnByName RxnBySMARTS")
 572     MiscUtil.ValidateOptionTextValue("-p, --prodMolNames", Options["--prodMolNames"], "UseReactants Sequential")
 573     
 574     if not re.match("^auto$", Options["--rxnNamesFile"], re.I):
 575         MiscUtil.ValidateOptionFilePath("--rxnNamesFile", Options["--rxnNamesFile"])
 576 
 577     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi")
 578     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 579     
 580     ReactantFiles = re.sub(" ", "", Options["--infiles"])
 581     if not ReactantFiles:
 582         MiscUtil.PrintError("No reactant files specified for \"-i, --infiles\" option")
 583 
 584     # Validate file extensions...
 585     for ReactantFile in ReactantFiles.split(","):
 586         MiscUtil.ValidateOptionFilePath("-i, --infiles", ReactantFile)
 587         MiscUtil.ValidateOptionFileExt("-i, --infiles", ReactantFile, "sdf sd smi csv tsv txt")
 588         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infiles", ReactantFile, "-o, --outfile", Options["--outfile"])
 589         
 590     # Match file formats...
 591     FirstFile = True
 592     FirstFileFormat = ""
 593     for ReactantFile in ReactantFiles.split(","):
 594         FileFormat = ""
 595         if MiscUtil.CheckFileExt(ReactantFile, "sdf sd"):
 596             FileFormat = "SD"
 597         elif MiscUtil.CheckFileExt(ReactantFile, "smi csv tsv txt"):
 598             FileFormat = "SMILES"
 599         else:
 600             MiscUtil.PrintError("The file name specified , %s, for option \"-i, --infiles\" is not valid. Supported file formats: sdf sd smi csv tsv txt\n" % ReactantFile)
 601             
 602         if FirstFile:
 603             FirstFile = False
 604             FirstFileFormat = FileFormat
 605             continue
 606         
 607         if not re.match("^%s$" % FirstFileFormat, FileFormat, re.IGNORECASE):
 608             MiscUtil.PrintError("All reactant file names -  %s - specified using option \"-i, --infiles\" must have the same file format.\n" % ReactantFiles)
 609             
 610 
 611     MiscUtil.ValidateOptionTextValue("--sanitize", Options["--sanitize"], "yes no")
 612     
 613 # Setup a usage string for docopt...
 614 _docoptUsage_ = """
 615 RDKitEnumerateCompoundLibrary.py - Enumerate a virtual compound library
 616 
 617 Usage:
 618     RDKitEnumerateCompoundLibrary.py  [--colmode <collabel or colnum>] [--colRxnName <text or number>]
 619                                       [--colRxnSMARTS <text or number>] [--compute2DCoords <yes or no>] [--infileParams <Name,Value,...>]
 620                                       [--mode <RxnByName or RxnBySMARTS>] [--outfileParams <Name,Value,...>] [--overwrite]
 621                                       [--prodMolNames <UseReactants or Sequential>] [--rxnName <text>]
 622                                       [--rxnNamesFile <FileName or auto>] [--smartsRxn <text>] [--sanitize <yes or no>]
 623                                       [-w <dir>] -i  <ReactantFile1,...> -o <outfile>
 624     RDKitEnumerateCompoundLibrary.py [--colmode <collabel or colnum>] [--colRxnName <text or number>] [--colRxnSMARTS <text or number>]
 625                                       [--rxnNamesFile <FileName or auto>] -l | --list
 626     RDKitEnumerateCompoundLibrary.py -h | --help | -e | --examples
 627 
 628 Description:
 629     Perform a combinatorial enumeration of a virtual library of molecules for a reaction specified
 630     using a reaction name or SMARTS pattern and reactant input files.
 631 
 632     The SMARTS patterns for supported reactions names [ Ref 134 ] are retrieved from file,
 633     ReactionNamesAndSMARTS.csv, available in MayaChemTools data directory. The current
 634     list of supported reaction names is shown below:
 635 
 636     '1,2,4_triazole_acetohydrazide', '1,2,4_triazole_carboxylic_acid_ester', 3_nitrile_pyridine,
 637     Benzimidazole_derivatives_aldehyde, Benzimidazole_derivatives_carboxylic_acid_ester,
 638     Benzofuran, Benzothiazole, Benzothiophene, Benzoxazole_aromatic_aldehyde,
 639     Benzoxazole_carboxylic_acid, Buchwald_Hartwig, Decarboxylative_coupling, Fischer_indole,
 640     Friedlaender_chinoline, Grignard_alcohol, Grignard_carbonyl, Heck_non_terminal_vinyl,
 641     Heck_terminal_vinyl, Heteroaromatic_nuc_sub, Huisgen_Cu_catalyzed_1,4_subst,
 642     Huisgen_disubst_alkyne, Huisgen_Ru_catalyzed_1,5_subst, Imidazole, Indole, Mitsunobu_imide,
 643     Mitsunobu_phenole, Mitsunobu_sulfonamide, Mitsunobu_tetrazole_1, Mitsunobu_tetrazole_2,
 644     Mitsunobu_tetrazole_3, Mitsunobu_tetrazole_4, N_arylation_heterocycles, Negishi,
 645     Niementowski_quinazoline, Nucl_sub_aromatic_ortho_nitro, Nucl_sub_aromatic_para_nitro,
 646     Oxadiazole, Paal_Knorr_pyrrole, Phthalazinone, Pictet_Spengler, Piperidine_indole,
 647     Pyrazole, Reductive_amination, Schotten_Baumann_amide, Sonogashira, Spiro_chromanone,
 648     Stille, Sulfon_amide, Suzuki, Tetrazole_connect_regioisomer_1, Tetrazole_connect_regioisomer_2,
 649     Tetrazole_terminal, Thiazole, Thiourea, Triaryl_imidazole, Urea, Williamson_ether, Wittig 
 650 
 651     The supported input file formats are: SD (.sdf, .sd), SMILES (.smi, .csv, .tsv, .txt)
 652 
 653     The supported output file formats are:  SD (.sdf, .sd), SMILES (.smi)
 654 
 655 Options:
 656     -c, --colmode <collabel or colnum>  [default: collabel]
 657         Use column number or name for the specification of columns in a CSV
 658         file containing reaction names along with reaction SMARTS. You may
 659         specify a reaction names file using '--rxnNamesFile' option.
 660     --colRxnName <text or number>  [default: auto]
 661         Column name or number corresponding to reaction names. The default value
 662         is automatically set based on the value of '-c, --colmode': 'RxnName'  for
 663         'collabel'; Reaction name column number for 'colnum'.
 664     --colRxnSMARTS <text or number>  [default: auto]
 665         Column name or number corresponding to reaction SMARTS strings. The default
 666         value is automatically set based on the value of '-c, --colmode': 'RxnSMARTS'
 667         for 'collabel'; Reacton SMARTS column number for 'colnum'.
 668     --compute2DCoords <yes or no>  [default: yes]
 669         Compute 2D coordinates of product molecules before writing them out.
 670     -i, --infiles <ReactantFile1, ReactantFile2...>
 671         Comma delimited list of reactant file names for enumerating a compound library
 672         using reaction SMARTS. The number of reactant files must match number of
 673         reaction components in reaction SMARTS. All reactant input files must have
 674         the same format.
 675     --infileParams <Name,Value,...>  [default: auto]
 676         A comma delimited list of parameter name and value pairs for reading
 677         molecules from files. The supported parameter names for different file
 678         formats, along with their default values, are shown below:
 679             
 680             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 681             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 682                 smilesTitleLine,auto,sanitize,yes
 683             
 684         Possible values for smilesDelimiter: space, comma or tab. These parameters apply
 685         to all reactant input files, which must have the same file format.
 686     -e, --examples
 687         Print examples.
 688     -h, --help
 689         Print this help message.
 690     -l, --list
 691         List available reaction names along with corresponding SMARTS patterns without
 692         performing any enumeration. In addition, reaction SMARTS patterns are validated.
 693     -m, --mode <RxnByName or RxnBySMARTS>  [default: RxnByName]
 694         Indicate whether a reaction is specified by a reaction name or a SMARTS pattern.
 695         Possible values: RxnByName or RxnBySMARTS.
 696     -o, --outfile <outfile>
 697         Output file name.
 698     --outfileParams <Name,Value,...>  [default: auto]
 699         A comma delimited list of parameter name and value pairs for writing
 700         molecules to files. The supported parameter names for different file
 701         formats, along with their default values, are shown below:
 702             
 703             SD: kekulize,yes,forceV3000,no
 704             SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 705                 smilesTitleLine,yes
 706             
 707     -p, --prodMolNames <UseReactants or Sequential>  [default: UseReactants]
 708         Generate names of product molecules using reactant names or assign names in
 709         a sequential order. Possible values: UseReactants or Sequential. Format of
 710         molecule names: UseReactants - <ReactName1>_<ReactName2>..._Prod<Num>;
 711         Sequential - Prod<Num>
 712     --overwrite
 713         Overwrite existing files.
 714     -r, --rxnName <text>
 715         Name of a reaction to use for enumerating a compound library. This option
 716         is only used during 'RxnByName' value of '-m, --mode' option.
 717     --rxnNamesFile <FileName or auto>  [default: auto]
 718         Specify a file name containing data for names of reactions and SMARTS patterns or
 719         use default file, ReactionNamesAndSMARTS.csv, available in MayaChemTools data
 720         directory.
 721         
 722         Default reactions SMARTS file format: RxnName,RxnSMARTS.
 723         
 724         The local file format is assumed to be same as the default file format. You may
 725         explicitly specify column names or numbers for reaction name and reaction
 726         SMARTS using '--colRxnName' and '--colRxnSMARTS' options.
 727     -s, --smartsRxn <text>
 728         SMARTS pattern of a reaction to use for enumerating a compound library. This
 729         option is only used during 'RxnBySMARTS' value of '-m, --mode' option.
 730     --sanitize <yes or no>  [default: yes]
 731         Sanitize product molecules before writing them out.
 732     -w, --workingdir <dir>
 733         Location of working directory which defaults to the current directory.
 734 
 735 Examples:
 736     To list all available reaction names along with their SMARTS pattern, type:
 737 
 738          % RDKitEnumerateCompoundLibrary.py -l
 739 
 740     To perform a combinatorial enumeration of a virtual compound library corresponding
 741     to named amide reaction, Schotten_Baumann_amide, and write out a SMILES file
 742     type:
 743 
 744         % RDKitEnumerateCompoundLibrary.py -r Schotten_Baumann_amide
 745           -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.smi
 746 
 747     To run the previous command using a local reaction names file with explicit
 748     specification of column names containing reaction names and SMARTS, and write
 749      out a SMILES file type:
 750 
 751         % RDKitEnumerateCompoundLibrary.py -r Schotten_Baumann_amide
 752           --rxnNamesFile ReactionNamesAndSMARTS.csv
 753           --colmode collabel --colRxnName RxnName --colRxnSMARTS RxnSMARTS
 754           -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.smi
 755 
 756     To perform a combinatorial enumeration of a virtual compound library corresponding
 757     to an amide reaction specified using a SMARTS pattern and write out a SD file containing
 758     sanitized molecules, computed 2D coordinates, and generation of molecule names from
 759     reactant names, type:
 760 
 761         % RDKitEnumerateCompoundLibrary.py -m RxnBySMARTS
 762           -s '[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]'
 763           -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.sdf
 764 
 765     To perform a combinatorial enumeration of a virtual compound library corresponding
 766     to an amide reaction specified using a SMARTS pattern  and write out a SD file containing
 767     unsanitized molecules, without generating 2D coordinates, and a sequential generation
 768     of molecule names, type:
 769 
 770         % RDKitEnumerateCompoundLibrary.py -m RxnBySMARTS -c no --sanitize no
 771           -p Sequential -s '[O:2]=[C:1][OH].[N:3]>>[O:2]=[C:1][N:3]'
 772           -i 'SampleAcids.smi,SampleAmines.smi' -o SampleOutCmpdLibrary.sdf
 773 
 774 Author:
 775     Manish Sud(msud@san.rr.com)
 776 
 777 See also:
 778     RDKitConvertFileFormat.py, RDKitFilterPAINS.py, RDKitSearchFunctionalGroups.py,
 779     RDKitSearchSMARTS.py
 780 
 781 Copyright:
 782     Copyright (C) 2024 Manish Sud. All rights reserved.
 783 
 784     The functionality available in this script is implemented using RDKit, an
 785     open source toolkit for cheminformatics developed by Greg Landrum.
 786 
 787     This file is part of MayaChemTools.
 788 
 789     MayaChemTools is free software; you can redistribute it and/or modify it under
 790     the terms of the GNU Lesser General Public License as published by the Free
 791     Software Foundation; either version 3 of the License, or (at your option) any
 792     later version.
 793 
 794 """
 795 
 796 if __name__ == "__main__":
 797     main()