MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitPerformSynthonSpaceSearch.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Acknowledgments: Dave Cosgrove
    7 #
    8 # Copyright (C) 2025 Manish Sud. All rights reserved.
    9 #
   10 # The functionality available in this script is implemented using RDKit, an
   11 # open source toolkit for cheminformatics developed by Greg Landrum.
   12 #
   13 # This file is part of MayaChemTools.
   14 #
   15 # MayaChemTools is free software; you can redistribute it and/or modify it under
   16 # the terms of the GNU Lesser General Public License as published by the Free
   17 # Software Foundation; either version 3 of the License, or (at your option) any
   18 # later version.
   19 #
   20 # MayaChemTools is distributed in the hope that it will be useful, but without
   21 # any warranty; without even the implied warranty of merchantability of fitness
   22 # for a particular purpose.  See the GNU Lesser General Public License for more
   23 # details.
   24 #
   25 # You should have received a copy of the GNU Lesser General Public License
   26 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   27 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   28 # Boston, MA, 02111-1307, USA.
   29 #
   30 
   31 from __future__ import print_function
   32 
   33 # Add local python path to the global path and import standard library modules...
   34 import os
   35 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   36 import time
   37 import re
   38 import multiprocessing as mp
   39 
   40 # RDKit imports...
   41 try:
   42     from rdkit import rdBase
   43     from rdkit import Chem
   44     from rdkit.Chem import AllChem
   45     from rdkit.Chem import rdSynthonSpaceSearch
   46     from rdkit.Chem import rdFingerprintGenerator
   47     from rdkit.Chem import rdRascalMCES
   48 except ImportError as ErrMsg:
   49     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   50     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   51     sys.exit(1)
   52 
   53 # MayaChemTools imports...
   54 try:
   55     from docopt import docopt
   56     import MiscUtil
   57     import RDKitUtil
   58 except ImportError as ErrMsg:
   59     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   60     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   61     sys.exit(1)
   62 
   63 ScriptName = os.path.basename(sys.argv[0])
   64 Options = {}
   65 OptionsInfo = {}
   66 
   67 def main():
   68     """Start execution of the script."""
   69     
   70     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
   71     
   72     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   73     
   74     # Retrieve command line arguments and options...
   75     RetrieveOptions()
   76     
   77     if  Options and Options["--list"]:
   78         # Process list option...
   79         ProcessListSynthonSearchSpace()
   80     else:
   81         # Process and validate command line arguments and options...
   82         ProcessOptions()
   83         
   84         # Perform actions required by the script...
   85         PerformSynthonSpaceSearch()
   86     
   87     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   88     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   89 
   90 def PerformSynthonSpaceSearch():
   91     """Perform synthon space search."""
   92 
   93     Mode = OptionsInfo["Mode"]
   94     if re.match("^FingerprintsGeneration$", Mode, re.I):
   95         GenerateFingerprints()
   96     elif re.match("^BinaryDBFileGeneration$", Mode, re.I):
   97         GenerateBinaryDatabaseFile()
   98     elif re.match("^LibraryEnumeration$", Mode, re.I):
   99         PerformLibraryEnumeration()
  100     elif re.match("^RascalSimilaritySearch$", Mode, re.I):
  101         PerformRascalSimilaritySearch()
  102     elif re.match("^SimilaritySearch$", Mode, re.I):
  103         PerformSimilaritySearch()
  104     elif re.match("^SubstructureSearch$", Mode, re.I):
  105         PerformSubtructureSearch()
  106     else:
  107         MiscUtil.PrintError("The value specified, %s, for option \"--mode\" is not valid." % Mode)
  108 
  109 def GenerateFingerprints():
  110     """Generate fingerprints for synthons and write out a binary file."""
  111 
  112     MiscUtil.PrintInfo("\nGenerating fingerprints (Mode: %s)..." % OptionsInfo["Mode"])
  113     
  114     SynthonSpace = ReadSynthonSpaceFile(OptionsInfo["Infile"])
  115 
  116     StartTime = time.perf_counter()
  117     
  118     MiscUtil.PrintInfo("\nGenerating fingerprints (Type: %s)..." % OptionsInfo["SpecifiedFingerprints"])
  119     FPGenerator = InitializeFingerprintsGenerator()
  120     SynthonSpace.BuildSynthonFingerprints(FPGenerator)
  121     
  122     TotalTime = time.perf_counter() - StartTime
  123     MiscUtil.PrintInfo("Total time: %.2f secs" % TotalTime)
  124     
  125     WriteSynthonSpaceBinaryFile(SynthonSpace, OptionsInfo["Outfile"])
  126     
  127 def GenerateBinaryDatabaseFile():
  128     """Write out a binary file for synthons."""
  129     
  130     MiscUtil.PrintInfo("\nGenerating binary database file (Mode: %s)..." % OptionsInfo["Mode"])
  131     
  132     SynthonSpace = ReadSynthonSpaceFile(OptionsInfo["Infile"])
  133     WriteSynthonSpaceBinaryFile(SynthonSpace, OptionsInfo["Outfile"])
  134     
  135 def PerformLibraryEnumeration():
  136     """Enumerate library using synthons and write out a SMILES file."""
  137 
  138     MiscUtil.PrintInfo("\nPerforming library enumeration (Mode: %s)..." % OptionsInfo["Mode"])
  139     
  140     SynthonSpace = ReadSynthonSpaceFile(OptionsInfo["Infile"])
  141     
  142     MiscUtil.PrintInfo("\nWriting file %s ..." % OptionsInfo["Outfile"])
  143     SynthonSpace.WriteEnumeratedFile(OptionsInfo["Outfile"])
  144 
  145 def PerformSimilaritySearch():
  146     """Perform similarity search."""
  147     
  148     SingleOutFileMode = OptionsInfo["SingleOutFileMode"]
  149     CountHitsMode = OptionsInfo["CountHitsMode"]
  150     SynthonSearchParams = OptionsInfo["SynthonSearchParams"]
  151     
  152     MiscUtil.PrintInfo("\nPerforming similiarity search (Fingerprints: %s; SimilarityCutoff: %s; MaxHits: %s)..." % (OptionsInfo["SpecifiedFingerprints"], SynthonSearchParams["SimilarityCutoff"], SynthonSearchParams["MaxHits"]))
  153     
  154     # Setup synthon space...
  155     SynthonSpace, FPGenerator = SetupSynthonSpaceForSimilaritySearch()
  156 
  157     # Setup out file writers...
  158     SingleOutFileWriter, HitsInfoWriter = SetupOutfileWriters()
  159     
  160     # Setup a molecule reader...
  161     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["QueryFile"])
  162     QueryMols  = RDKitUtil.ReadMolecules(OptionsInfo["QueryFile"], **OptionsInfo["QueryFileParams"])
  163 
  164     # Process query molecules...
  165     (QueryMolCount, ValidQueryMolCount) = [0] * 2
  166     for QueryMol in QueryMols:
  167         QueryMolCount += 1
  168         if QueryMol is None or RDKitUtil.IsMolEmpty(QueryMol):
  169             continue
  170         
  171         ValidQueryMolCount += 1
  172         QueryMolName = RDKitUtil.GetMolName(QueryMol, QueryMolCount)
  173         
  174         HitMols, HitMolsCount, MaxPossibleHits = PerformSynthonSpaceSimilaritySearch(SynthonSpace, FPGenerator, QueryMol)
  175 
  176         if CountHitsMode:
  177             WriteHitsInfo(HitsInfoWriter, [QueryMolName, MaxPossibleHits])
  178         else:
  179             WriteHitsInfo(HitsInfoWriter, [QueryMolName, HitMolsCount, MaxPossibleHits])
  180             
  181             Writer = SingleOutFileWriter if SingleOutFileMode else SetupMoleculeWriter(SingleOutFileMode, QueryMolCount)
  182             WriteMolecules(Writer, QueryMolName, HitMols)
  183             
  184             if not SingleOutFileMode:
  185                 if Writer is not None:
  186                     Writer.close()
  187     
  188     if SingleOutFileWriter is not None:
  189         SingleOutFileWriter.close()
  190     
  191     if HitsInfoWriter is not None:
  192         HitsInfoWriter.close()
  193     
  194     MiscUtil.PrintInfo("\nTotal number of query molecules: %d" % QueryMolCount)
  195     MiscUtil.PrintInfo("Number of valid query  molecules: %d" % ValidQueryMolCount)
  196     MiscUtil.PrintInfo("Number of ignored query molecules: %d" % (QueryMolCount - ValidQueryMolCount))
  197 
  198 def PerformSubtructureSearch():
  199     """Perform substructure search."""
  200     
  201     SingleOutFileMode = OptionsInfo["SingleOutFileMode"]
  202     CountHitsMode = OptionsInfo["CountHitsMode"]
  203     SynthonSearchParams = OptionsInfo["SynthonSearchParams"]
  204     
  205     MiscUtil.PrintInfo("\nPerforming substructue search (MaxHits: %s)..." % (SynthonSearchParams["MaxHits"]))
  206     
  207     # Setup synthon space...
  208     SynthonSpace = ReadSynthonSpaceFile(OptionsInfo["Infile"])
  209     
  210     # Setup out file writers...
  211     SingleOutFileWriter, HitsInfoWriter = SetupOutfileWriters()
  212     
  213     # Process query pattern molecules...
  214     MiscUtil.PrintInfo("\nProcessing query patterns...")
  215     
  216     QueryMolCount = 0
  217     for QueryMol in OptionsInfo["QueryPatternMols"]:
  218         QueryMolCount += 1
  219         QueryMolName = "Pattern%s" % QueryMolCount
  220     
  221         HitMols, HitMolsCount, MaxPossibleHits = PerformSynthonSpaceSubstructureSearch(SynthonSpace, QueryMol)
  222         
  223         if CountHitsMode:
  224             WriteHitsInfo(HitsInfoWriter, [QueryMolName, MaxPossibleHits])
  225         else:
  226             WriteHitsInfo(HitsInfoWriter, [QueryMolName, HitMolsCount, MaxPossibleHits])
  227             
  228             Writer = SingleOutFileWriter if SingleOutFileMode else SetupMoleculeWriter(SingleOutFileMode, QueryMolCount)
  229             WriteMolecules(Writer, QueryMolName, HitMols)
  230             
  231             if not SingleOutFileMode:
  232                 if Writer is not None:
  233                     Writer.close()
  234     
  235     if SingleOutFileWriter is not None:
  236         SingleOutFileWriter.close()
  237     
  238     if HitsInfoWriter is not None:
  239         HitsInfoWriter.close()
  240     
  241     MiscUtil.PrintInfo("\nTotal number of query patterns: %d" % QueryMolCount)
  242 
  243 def PerformRascalSimilaritySearch():
  244     """Perform RASCAL similarity search."""
  245     
  246     SingleOutFileMode = OptionsInfo["SingleOutFileMode"]
  247     CountHitsMode = OptionsInfo["CountHitsMode"]
  248     RascalSearchParams = OptionsInfo["RascalSearchParams"]
  249     SynthonSearchParams = OptionsInfo["SynthonSearchParams"]
  250 
  251     MiscUtil.PrintInfo("\nPerforming RASCAL similiarity search (SimilarityThreshold: %s; MaxHits: %s)..." % (RascalSearchParams["SimilarityThreshold"], SynthonSearchParams["MaxHits"]))
  252     
  253     # Setup synthon space...
  254     SynthonSpace = ReadSynthonSpaceFile(OptionsInfo["Infile"])
  255     
  256     # Setup out file writers...
  257     SingleOutFileWriter, HitsInfoWriter = SetupOutfileWriters()
  258     
  259     # Setup a molecule reader...
  260     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["QueryFile"])
  261     QueryMols  = RDKitUtil.ReadMolecules(OptionsInfo["QueryFile"], **OptionsInfo["QueryFileParams"])
  262 
  263     # Process query molecules...
  264     (QueryMolCount, ValidQueryMolCount) = [0] * 2
  265     for QueryMol in QueryMols:
  266         QueryMolCount += 1
  267         if QueryMol is None or RDKitUtil.IsMolEmpty(QueryMol):
  268             continue
  269         
  270         ValidQueryMolCount += 1
  271         QueryMolName = RDKitUtil.GetMolName(QueryMol, QueryMolCount)
  272         
  273         HitMols, HitMolsCount, MaxPossibleHits = PerformSynthonSpaceRascalSimilaritySearch(SynthonSpace, QueryMol)
  274 
  275         if CountHitsMode:
  276             WriteHitsInfo(HitsInfoWriter, [QueryMolName, MaxPossibleHits])
  277         else:
  278             WriteHitsInfo(HitsInfoWriter, [QueryMolName, HitMolsCount, MaxPossibleHits])
  279             
  280             Writer = SingleOutFileWriter if SingleOutFileMode else SetupMoleculeWriter(SingleOutFileMode, QueryMolCount)
  281             WriteMolecules(Writer, QueryMolName, HitMols)
  282             
  283             if not SingleOutFileMode:
  284                 if Writer is not None:
  285                     Writer.close()
  286     
  287     if SingleOutFileWriter is not None:
  288         SingleOutFileWriter.close()
  289     
  290     if HitsInfoWriter is not None:
  291         HitsInfoWriter.close()
  292     
  293     MiscUtil.PrintInfo("\nTotal number of query molecules: %d" % QueryMolCount)
  294     MiscUtil.PrintInfo("Number of valid query  molecules: %d" % ValidQueryMolCount)
  295     MiscUtil.PrintInfo("Number of ignored query molecules: %d" % (QueryMolCount - ValidQueryMolCount))
  296 
  297 def ProcessListSynthonSearchSpace():
  298     """Process list synthon search space information."""
  299 
  300     MiscUtil.PrintInfo("\nListing information...")
  301     
  302     # Validate infile..
  303     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
  304     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "txt csv spc")
  305     
  306     # Process infile..
  307     OptionsInfo["Infile"] = Options["--infile"]
  308     
  309     SynthonSpace = ReadSynthonSpaceFile(OptionsInfo["Infile"])
  310 
  311     MiscUtil.PrintInfo("\nSummary of synthon space:\n")
  312     SynthonSpace.Summarise()
  313 
  314     ListSynthonSpaceFingerprintsType(SynthonSpace)
  315 
  316 def PerformSynthonSpaceSimilaritySearch(SynthonSpace, FPGenerator, QueryMol):
  317     """Perform synthon space similarity search."""
  318 
  319     try:
  320         Results = SynthonSpace.FingerprintSearch(QueryMol, FPGenerator, params = OptionsInfo["RDKitSynthonSearchParams"])
  321     except Exception as ErrMsg:
  322         MiscUtil.PrintInfo("")
  323         MiscUtil.PrintError("Failed to perform synthon space fingerprints seach:\n%s\n" % (ErrMsg))
  324 
  325     HitMols, HitMolsCount, MaxPossibleHits = GetSynthonSpaceHitMolecules(Results)
  326     
  327     return (HitMols, HitMolsCount, MaxPossibleHits)
  328 
  329 def PerformSynthonSpaceRascalSimilaritySearch(SynthonSpace, QueryMol):
  330     """Perform synthon space RASCAL similarity search."""
  331 
  332     try:
  333         Results = SynthonSpace.RascalSearch(QueryMol, OptionsInfo["RDKitRascalSearchParams"], params = OptionsInfo["RDKitSynthonSearchParams"])
  334     except Exception as ErrMsg:
  335         MiscUtil.PrintInfo("")
  336         MiscUtil.PrintError("Failed to perform synthon space RASCAL similarity seach:\n%s\n" % (ErrMsg))
  337 
  338     HitMols, HitMolsCount, MaxPossibleHits = GetSynthonSpaceHitMolecules(Results)
  339     
  340     return (HitMols, HitMolsCount, MaxPossibleHits)
  341 
  342 def PerformSynthonSpaceSubstructureSearch(SynthonSpace, QueryMol):
  343     """Perform synthon space substructure search."""
  344 
  345     try:
  346         Results = SynthonSpace.SubstructureSearch(QueryMol, substructMatchParams = OptionsInfo["RDKitSubstructureMatchParams"], params = OptionsInfo["RDKitSynthonSearchParams"])
  347     except Exception as ErrMsg:
  348         MiscUtil.PrintInfo("")
  349         MiscUtil.PrintError("Failed to perform synthon space substructure seach:\n%s\n" % (ErrMsg))
  350     
  351     HitMols, HitMolsCount, MaxPossibleHits = GetSynthonSpaceHitMolecules(Results)
  352     
  353     return (HitMols, HitMolsCount, MaxPossibleHits)
  354 
  355 def GetSynthonSpaceHitMolecules(Results):
  356     """Retrieve synthon space hit molecues."""
  357     
  358     HitMols = Results.GetHitMolecules()
  359     
  360     HitMolsCount = len(HitMols)
  361     if HitMolsCount == 0:
  362         HitMols = None
  363         HitMolsCount = None
  364     
  365     MaxPossibleHits = Results.GetMaxNumResults()
  366 
  367     return (HitMols, HitMolsCount, MaxPossibleHits)
  368 
  369 def SetupSynthonSpaceForSimilaritySearch():
  370     """Setup synthon space for similarity search."""
  371 
  372     SynthonSpace = ReadSynthonSpaceFile(OptionsInfo["Infile"])
  373 
  374     FPType, FPInfo = GetSynthonFingerprintsInfo(SynthonSpace)
  375     if FPType is None:
  376         MiscUtil.PrintInfo("")
  377         MiscUtil.PrintError("The synthon space input file, %s, doesn't contain any fingerprints. You must specify a synthon space binary database file containing appropriate fingerprints for similarity search.." % OptionsInfo["Infile"])
  378     
  379     if not re.search("%s" % OptionsInfo["SpecifiedFingerprints"], FPType, re.I):
  380         MiscUtil.PrintInfo("")
  381         MiscUtil.PrintWarning("The fingerprints type, %s, in synthon space input file, %s, doesn't appear to match fingerprints, %s, specified using \"--fingerprints\" option for similarity search." % (FPType, OptionsInfo["Infile"], OptionsInfo["SpecifiedFingerprints"]))
  382     
  383     FPGenerator = InitializeFingerprintsGenerator()
  384 
  385     return (SynthonSpace, FPGenerator)
  386 
  387 def InitializeFingerprintsGenerator():
  388     """Initialize fingerprints generator."""
  389     
  390     FPGenerator = None
  391     SpecifiedFingerprints = OptionsInfo["SpecifiedFingerprints"]
  392     if re.match("^AtomPairs$", SpecifiedFingerprints, re.I):
  393         FPParamsInfo = OptionsInfo["FingerprintsParamsInfo"]["AtomPairs"]
  394         FPGenerator = rdFingerprintGenerator.GetAtomPairGenerator(minDistance = FPParamsInfo["MinLength"], maxDistance = FPParamsInfo["MaxLength"], includeChirality = FPParamsInfo["UseChirality"], use2D = FPParamsInfo["Use2D"], fpSize = FPParamsInfo["FPSize"])
  395     elif re.match("^Morgan$", SpecifiedFingerprints, re.I):
  396         FPParamsInfo = OptionsInfo["FingerprintsParamsInfo"]["Morgan"]
  397         FPGenerator = rdFingerprintGenerator.GetMorganGenerator(radius = FPParamsInfo["Radius"], includeChirality = FPParamsInfo["UseChirality"], useBondTypes = FPParamsInfo["UseBondTypes"], includeRingMembership = FPParamsInfo["UseRingMembership"], fpSize = FPParamsInfo["FPSize"])
  398     elif re.match("^MorganFeatures$", SpecifiedFingerprints, re.I):
  399         FPParamsInfo = OptionsInfo["FingerprintsParamsInfo"]["MorganFeatures"]
  400         FPGenerator = rdFingerprintGenerator.GetMorganGenerator(radius = FPParamsInfo["Radius"], includeChirality = FPParamsInfo["UseChirality"], useBondTypes = FPParamsInfo["UseBondTypes"], includeRingMembership = FPParamsInfo["UseRingMembership"], fpSize = FPParamsInfo["FPSize"], atomInvariantsGenerator = rdFingerprintGenerator.GetMorganAtomInvGen())
  401     elif re.match("^PathLength$", SpecifiedFingerprints, re.I):
  402         FPParamsInfo = OptionsInfo["FingerprintsParamsInfo"]["PathLength"]
  403         FPGenerator = rdFingerprintGenerator.GetRDKitFPGenerator(minPath = FPParamsInfo["MinPath"], maxPath = FPParamsInfo["MaxPath"], useHs = FPParamsInfo["UseExplicitHs"], branchedPaths = FPParamsInfo["UseBranchedPaths"], useBondOrder = FPParamsInfo["UseBondOrder"], fpSize = FPParamsInfo["FPSize"], numBitsPerFeature = FPParamsInfo["BitsPerHash"])
  404     elif re.match("^TopologicalTorsions$", SpecifiedFingerprints, re.I):
  405         FPParamsInfo = OptionsInfo["FingerprintsParamsInfo"]["TopologicalTorsions"]
  406         FPGenerator = rdFingerprintGenerator.GetTopologicalTorsionGenerator(includeChirality = FPParamsInfo["UseChirality"], fpSize = FPParamsInfo["FPSize"])
  407     else:
  408         MiscUtil.PrintError("The value specified, %s, for option \"--fingerprints\" is not valid.")
  409     
  410     return FPGenerator
  411 
  412 def ReadSynthonSpaceFile(Infile):
  413     """Read synthon space file."""
  414 
  415     MiscUtil.PrintInfo("\nReading synthon space file %s..." % Infile)
  416     SynthonSpace = rdSynthonSpaceSearch.SynthonSpace()
  417 
  418     StartTime = time.perf_counter()
  419     
  420     try:
  421         if MiscUtil.CheckFileExt(Infile, "spc"):
  422             SynthonSpace.ReadDBFile(Infile)
  423         else:
  424             SynthonSpace.ReadTextFile(Infile)
  425     except Exception as ErrMsg:
  426         MiscUtil.PrintInfo("")
  427         MiscUtil.PrintError("Failed to read synthon space file:\n%s\n" % (ErrMsg))
  428     
  429     TotalTime = time.perf_counter() - StartTime
  430     MiscUtil.PrintInfo("Total time: %.2f secs" % TotalTime)
  431 
  432     return SynthonSpace
  433 
  434 def WriteSynthonSpaceBinaryFile(SynthonSpace, Outfile):
  435     """Write synthon space binary file."""
  436 
  437     MiscUtil.PrintInfo("\nWriting synthon space file %s..." % Outfile)
  438     StartTime = time.perf_counter()
  439     
  440     try:
  441         SynthonSpace.WriteDBFile(Outfile)
  442     except Exception as ErrMsg:
  443         MiscUtil.PrintInfo("")
  444         MiscUtil.PrintError("Failed to write synthon space file:\n%s\n" % (ErrMsg))
  445     
  446     TotalTime = time.perf_counter() - StartTime
  447     MiscUtil.PrintInfo("Total time: %.2f secs" % TotalTime)
  448 
  449     return SynthonSpace
  450 
  451 def ListSynthonSpaceFingerprintsType(SynthonSpace):
  452     """List synthon space fingerprints type. """
  453 
  454     FPType, FPInfo = GetSynthonFingerprintsInfo(SynthonSpace)
  455 
  456     if FPInfo is None:
  457         MiscUtil.PrintInfo("\nFingerprints type: %s" % (FPInfo))
  458     else:
  459         MiscUtil.PrintInfo("\nFingerprints type: %s\nFingerprints Info: %s" % (FPType, FPInfo))
  460 
  461 def GetSynthonFingerprintsInfo(SynthonSpace):
  462     """Get synthon fingerprints information."""
  463 
  464     FPInfo = SynthonSpace.GetSynthonFingerprintType()
  465     if len(FPInfo) == 0:
  466         return (None, None)
  467     
  468     if re.search("AtomPairArguments", FPInfo, re.I):
  469         FPType = "AtomPairs"
  470     elif re.search("MorganArguments", FPInfo, re.I):
  471         FPType = "Morgan or MorganFeatures"
  472     elif re.search("RDKitFPArguments", FPInfo, re.I):
  473         FPType = "PathLength"
  474     elif re.search("TopologicalTorsionArguments", FPInfo, re.I):
  475         FPType = "TopologicalTorsions"
  476     else:
  477         FPType = "Unknown"
  478     
  479     return (FPType, FPInfo)
  480 
  481 def SetupMoleculeWriter(SIngleOutFile, MolCount = 0):
  482     """Setup molecule writer. """
  483     
  484     TextOutFileMode = OptionsInfo["TextOutFileMode"]
  485     TextOutFileDelim = OptionsInfo["TextOutFileDelim"]
  486     TextOutFileTitleLine = OptionsInfo["TextOutFileTitleLine"]
  487     
  488     if SIngleOutFile:
  489         Outfile = OptionsInfo["Outfile"]
  490     else:
  491         Outfile = "%s_%s%s.%s" % (OptionsInfo["OutFileRoot"], OptionsInfo["OutFileSuffix"], MolCount, OptionsInfo["OutFileExt"])
  492     
  493     if TextOutFileMode:
  494         Writer = open(Outfile, "w")
  495     else:
  496         Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"])
  497     if Writer is None:
  498         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile)
  499     
  500     if TextOutFileMode:
  501         if TextOutFileTitleLine:
  502             WriteTextFileHeaderLine(Writer, TextOutFileDelim)
  503         
  504     return Writer
  505 
  506 def WriteTextFileHeaderLine(Writer, TextOutFileDelim):
  507     """Write out a header line for text files including SMILES file."""
  508 
  509     Line = ""
  510     if OptionsInfo["SubstructureSearchMode"]:
  511         Line = TextOutFileDelim.join(["SMILES", "Name", "QueryPatternNumber"])
  512     elif OptionsInfo["SimilaritySearchMode"]:
  513         Line = TextOutFileDelim.join(["SMILES", "Name", "Similarity", "QueryMolName"])
  514     elif OptionsInfo["RascalSimilaritySearchMode"]:
  515         Line = TextOutFileDelim.join(["SMILES", "Name", "Similarity", "QueryMolName"])
  516     
  517     Writer.write("%s\n" % Line)
  518 
  519 def WriteMolecules(Writer, QueryMolName, HitMols):
  520     """Write hit molecules for similarity and substructure search."""
  521     
  522     RascalSimilaritySearchMode = OptionsInfo["RascalSimilaritySearchMode"]
  523     SimilaritySearchMode = OptionsInfo["SimilaritySearchMode"]
  524     SubstructureSearchMode = OptionsInfo["SubstructureSearchMode"]
  525     
  526     TextOutFileMode = OptionsInfo["TextOutFileMode"]
  527     TextOutFileDelim = OptionsInfo["TextOutFileDelim"]
  528 
  529     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
  530     
  531     SMILESIsomeric = OptionsInfo["OutfileParams"]["SMILESIsomeric"]
  532     SMILESKekulize = OptionsInfo["OutfileParams"]["SMILESKekulize"]
  533 
  534     HitMolCount = 0
  535     for HitMol in HitMols:
  536         HitMolCount += 1
  537 
  538         if TextOutFileMode:
  539             # Write out text file including SMILES file...
  540             LineWords = []
  541             LineWords.append(Chem.MolToSmiles(HitMol, isomericSmiles = SMILESIsomeric, kekuleSmiles = SMILESKekulize))
  542             LineWords.append(RDKitUtil.GetMolName(HitMol, HitMolCount))
  543             
  544             if SimilaritySearchMode or RascalSimilaritySearchMode:
  545                 Similarity = "%.2f" % float(HitMol.GetProp("Similarity"))
  546                 LineWords.append(Similarity)
  547             
  548             LineWords.append(QueryMolName)
  549                 
  550             Line = TextOutFileDelim.join(LineWords)
  551             Writer.write("%s\n" % Line)
  552         else:
  553             # Write out SD file...
  554             if SimilaritySearchMode or RascalSimilaritySearchMode:
  555                 HitMol.SetProp("QueryMolName", QueryMolName)
  556             elif SubstructureSearchMode:
  557                 HitMol.SetProp("QueryPatternNum", QueryMolName)
  558             
  559             if SimilaritySearchMode or RascalSimilaritySearchMode:
  560                 Similarity = "%.2f" % float(HitMol.GetProp("Similarity"))
  561                 HitMol.SetProp("Similarity", Similarity)
  562                 
  563             if Compute2DCoords:
  564                 AllChem.Compute2DCoords(HitMol)
  565             Writer.write(HitMol)
  566 
  567 def SetupOutfileWriters():
  568     """Setup outfile writers."""
  569     
  570     SingleOutFileWriter, HitsInfoWriter = [None] * 2
  571     
  572     if OptionsInfo["CountHitsMode"]:
  573         MiscUtil.PrintInfo("\nSkipping generation of output files containing hit structures and only counting hits (BuildHits: No)...")
  574     else:
  575         if OptionsInfo["SingleOutFileMode"]:
  576             SingleOutFileWriter = SetupMoleculeWriter(OptionsInfo["SingleOutFileMode"])
  577             MiscUtil.PrintInfo("\nGenerating output file %s..." % OptionsInfo["Outfile"])
  578         else:
  579             MiscUtil.PrintInfo("\nGenerating output file(s) %s_%s*.%s..." % (OptionsInfo["OutFileRoot"], OptionsInfo["OutFileSuffix"], OptionsInfo["OutFileExt"]))
  580     
  581     HitsInfoWriter = SetupHitsInfoWriter()
  582 
  583     return (SingleOutFileWriter, HitsInfoWriter)
  584 
  585 def SetupHitsInfoWriter():
  586     """Setup hits info writer."""
  587 
  588     HitsInfoOutFile = OptionsInfo["HitsInfoOutFile"]
  589     HitsInfoOutFileDelim = OptionsInfo["HitsInfoOutFileDelim"]
  590 
  591     MiscUtil.PrintInfo("\nGenerating output file %s..." % HitsInfoOutFile)
  592     
  593     Writer = open(HitsInfoOutFile, "w")
  594 
  595     # Setup and write out header...
  596     MolIDColName = "MolID"
  597     if OptionsInfo["SubstructureSearchMode"]:
  598         MolIDColName = "QueryPatternNumber"
  599     elif OptionsInfo["SimilaritySearchMode"]:
  600         MolIDColName = "QueryMolName"
  601     elif OptionsInfo["RascalSimilaritySearchMode"]:
  602         MolIDColName = "QueryMolName"
  603     
  604     if OptionsInfo["CountHitsMode"]:
  605         Line = HitsInfoOutFileDelim.join([MolIDColName, "MaxPossibleHits"])
  606     else:
  607         Line = HitsInfoOutFileDelim.join([MolIDColName, "HitsCount", "MaxPossibleHits"])
  608     
  609     Writer.write("%s\n" % Line)
  610     
  611     return Writer
  612 
  613 def WriteHitsInfo(Writer, HitsInfo):
  614     """Write hits info."""
  615     
  616     HitsInfoWords = ["%s" % HitInfo for HitInfo in HitsInfo]
  617     
  618     HitsInfoOutFileDelim = OptionsInfo["HitsInfoOutFileDelim"]
  619     Line = HitsInfoOutFileDelim.join(HitsInfoWords)
  620     
  621     Writer.write("%s\n" % Line)
  622 
  623 def ProcessFingerprintsParameters():
  624     """Set up and process fingerprints parameters."""
  625 
  626     SetupFingerprintsNamesAndParameters()
  627     
  628     ProcessSpecifiedFingerprintsName()
  629     ProcessSpecifiedFingerprintsParameters()
  630 
  631 def SetupFingerprintsNamesAndParameters():
  632     """Set up fingerprints parameters."""
  633 
  634     OptionsInfo["FingerprintsNames"] = ["AtomPairs", "Morgan", "MorganFeatures", "PathLength", "TopologicalTorsions"]
  635 
  636     OptionsInfo["FingerprintsParamsInfo"] = {}
  637     OptionsInfo["FingerprintsParamsInfo"]["AtomPairs"] = {"MinLength": 1, "MaxLength": 30, "UseChirality": False, "Use2D": True, "FPSize": 2048}
  638     OptionsInfo["FingerprintsParamsInfo"]["Morgan"] = {"Radius": 2, "UseChirality": False, "UseBondTypes": True, "UseRingMembership": True, "FPSize": 2048}
  639     OptionsInfo["FingerprintsParamsInfo"]["MorganFeatures"] = {"Radius": 2, "UseChirality": False, "UseBondTypes": True, "UseRingMembership": True, "FPSize": 2048}
  640     OptionsInfo["FingerprintsParamsInfo"]["PathLength"] = {"MinPath": 1, "MaxPath": 7, "UseExplicitHs": True, "UseBranchedPaths": True, "UseBondOrder": True, "FPSize": 2048, "BitsPerHash": 2}
  641     OptionsInfo["FingerprintsParamsInfo"]["TopologicalTorsions"] = {"UseChirality": False, "FPSize": 2048}
  642 
  643 def ProcessSpecifiedFingerprintsName():
  644     """Process specified fingerprints name."""
  645 
  646     #  Set up a canonical fingerprints name map...
  647     CanonicalFingerprintsNamesMap = {}
  648     for Name in OptionsInfo["FingerprintsNames"]:
  649         CanonicalName = Name.lower()
  650         CanonicalFingerprintsNamesMap[CanonicalName] = Name
  651 
  652     # Validate specified fingerprints name...
  653     CanonicalFingerprintsName = OptionsInfo["Fingerprints"].lower()
  654     if CanonicalFingerprintsName not in CanonicalFingerprintsNamesMap:
  655         MiscUtil.PrintError("The fingerprints name, %s, specified using \"-f, --fingerprints\" option is not a valid name." % (OptionsInfo["Fingerprints"]))
  656     
  657     OptionsInfo["SpecifiedFingerprints"] = CanonicalFingerprintsNamesMap[CanonicalFingerprintsName]
  658 
  659 def ProcessSpecifiedFingerprintsParameters():
  660     """Process specified fingerprints parameters."""
  661 
  662     if re.match("^auto$", OptionsInfo["FingerprintsParams"], re.I):
  663         # Nothing to process...
  664         return
  665 
  666     SpecifiedFingerprintsName = OptionsInfo["SpecifiedFingerprints"]
  667     
  668     # Parse specified fingerprints parameters...
  669     FingerprintsParams = re.sub(" ", "", OptionsInfo["FingerprintsParams"])
  670     if not FingerprintsParams:
  671         MiscUtil.PrintError("No valid parameter name and value pairs specified using \"--fingerprintsParams\" option corrresponding to fingerprints %s." % (SpecifiedFingerprintsName))
  672 
  673     FingerprintsParamsWords = FingerprintsParams.split(",")
  674     if len(FingerprintsParamsWords) % 2:
  675         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"--fingerprintsParams\" option must be an even number." % (len(FingerprintsParamsWords)))
  676 
  677     # Setup canonical parameter names for specified fingerprints...
  678     ValidParamNames = []
  679     CanonicalParamNamesMap = {}
  680     for ParamName in sorted(OptionsInfo["FingerprintsParamsInfo"][SpecifiedFingerprintsName]):
  681         ValidParamNames.append(ParamName)
  682         CanonicalParamNamesMap[ParamName.lower()] = ParamName
  683 
  684     # Validate and set paramater names and value...
  685     for Index in range(0, len(FingerprintsParamsWords), 2):
  686         Name = FingerprintsParamsWords[Index]
  687         Value = FingerprintsParamsWords[Index + 1]
  688 
  689         CanonicalName = Name.lower()
  690         if  not CanonicalName in CanonicalParamNamesMap:
  691             MiscUtil.PrintError("The parameter name, %s, specified using \"--fingerprintsParams\" option for fingerprints, %s, is not a valid name. Supported parameter names: %s" % (Name, SpecifiedFingerprintsName, " ".join(ValidParamNames)))
  692 
  693         ParamName = CanonicalParamNamesMap[CanonicalName]
  694         if re.match("^(UseChirality|Use2D|UseBondTypes|UseRingMembership|UseExplicitHs|UseBranchedPaths|UseBondOrder)$", ParamName, re.I):
  695             if not re.match("^(Yes|No|True|False)$", Value, re.I):
  696                 MiscUtil.PrintError("The parameter value, %s, specified using \"--fingerprintsParams\" option for fingerprints, %s, is not a valid value. Supported values: Yes No True False" % (Value, SpecifiedFingerprintsName))
  697             ParamValue = False
  698             if re.match("^(Yes|True)$", Value, re.I):
  699                 ParamValue = True
  700         else:
  701             ParamValue = int(Value)
  702             if ParamValue <= 0:
  703                 MiscUtil.PrintError("The parameter value, %s, specified using \"--fingerprintsParams\" option for fingerprints, %s, is not a valid value. Supported values: > 0" % (Value, SpecifiedFingerprintsName))
  704         
  705         # Set value...
  706         OptionsInfo["FingerprintsParamsInfo"][SpecifiedFingerprintsName][ParamName] = ParamValue
  707 
  708 def ProcessOutfileParameters():
  709     """Process outfile related parameters"""
  710 
  711     Mode = OptionsInfo["Mode"]
  712     
  713     OptionsInfo["Outfile"] = Options["--outfile"]
  714     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
  715 
  716     # OutfileMode is only used for similarity and substructure search...
  717     OptionsInfo["OutFileMode"] = Options["--outfileMode"]
  718     SingleOutFileMode = True
  719     if not re.match("^SingleFile$", Options["--outfileMode"], re.I):
  720         SingleOutFileMode = False
  721     OptionsInfo["SingleOutFileMode"] = SingleOutFileMode
  722 
  723     FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
  724     OptionsInfo["OutFileRoot"] = FileName
  725     OptionsInfo["OutFileExt"] = FileExt
  726 
  727     OutFileSuffix = ""
  728     if re.match("^SubstructureSearch$", Mode, re.I):
  729         OutFileSuffix = "Pattern"
  730     elif re.match("^SimilaritySearch$", Mode, re.I):
  731         OutFileSuffix = "Mol"
  732     OptionsInfo["OutFileSuffix"] = OutFileSuffix
  733 
  734     OptionsInfo["HitsInfoOutFile"] = "%s_HitCount.csv" % OptionsInfo["OutFileRoot"]
  735     OptionsInfo["HitsInfoOutFileDelim"] = ","
  736     
  737     TextOutFileMode, TextOutFileDelim, TextOutFileTitleLine = [None] * 3
  738     if re.match("^(SimilaritySearch|SubstructureSearch)$", Mode, re.I):
  739         TextOutFileMode = False
  740         TextOutFileDelim = ""
  741         TextOutFileTitleLine = True
  742         
  743         if MiscUtil.CheckFileExt(Options["--outfile"], "csv"):
  744             TextOutFileMode = True
  745             TextOutFileDelim = ","
  746         elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"):
  747             TextOutFileMode = True
  748             TextOutFileDelim = "\t"
  749         elif MiscUtil.CheckFileExt(Options["--outfile"], "smi"):
  750             TextOutFileMode = True
  751             TextOutFileDelim = OptionsInfo["OutfileParams"]["SMILESDelimiter"]
  752             TextOutFileTitleLine = OptionsInfo["OutfileParams"]["SMILESTitleLine"]
  753     
  754     OptionsInfo["TextOutFileMode"] = TextOutFileMode
  755     OptionsInfo["TextOutFileDelim"] = TextOutFileDelim
  756     OptionsInfo["TextOutFileTitleLine"] = TextOutFileTitleLine
  757     
  758     if not OptionsInfo["SingleOutFileMode"]:
  759         FilesSpec = "%s_%s*.%s" %  (OptionsInfo["OutFileRoot"], OptionsInfo["OutFileSuffix"], OptionsInfo["OutFileExt"])
  760         FileNames = MiscUtil.ExpandFileNames(FilesSpec)
  761         if len(FileNames):
  762             if not Options["--overwrite"]:
  763                 MiscUtil.PrintError("The output files, %s, corresponding to output file specified, %s, for option \"-o, --outfile\" already exist. Use option \"--ov\" or \"--overwrite\" and try again." % (FilesSpec, OptionsInfo["Outfile"]))
  764     
  765 def ProcessRascalSearchParametersOption():
  766     """Process option for RASCAL similarity search."""
  767     
  768     ParamsOptionName = "--rascalSearchParams"
  769     ParamsOptionValue = Options[ParamsOptionName]
  770     
  771     ParamsDefaultInfo = { "AllBestMCESs": ["bool", False], "CompleteAromaticRings": ["bool", True], "CompleteSmallestRings": ["bool", False], "ExactConnectionsMatch": ["bool", False], "IgnoreAtomAromaticity": ["bool", True], "IgnoreBondOrders": ["bool", False], "MaxBondMatchPairs": ["int", 1000], "MaxFragSeparation": ["int", -1], "MinCliqueSize": ["int", 0], "MinFragSize": ["int", -1], "ReturnEmptyMCES": ["bool", False], "RingMatchesRingOnly": ["bool", False], "SimilarityThreshold": ["float", 0.7], "SingleLargestFrag": ["bool", False], "Timeout": ["int", 60]}
  772     
  773     # Update default values to match RDKit default values...
  774     RDKitRascalSearchParams = rdRascalMCES.RascalOptions()
  775     for ParamName in ParamsDefaultInfo.keys():
  776         RDKitParamName = LowercaseFirstLetter(ParamName)
  777         if hasattr(RDKitRascalSearchParams, RDKitParamName):
  778             RDKitParamValue = getattr(RDKitRascalSearchParams, RDKitParamName)
  779             ParamsDefaultInfo[ParamName][1] = RDKitParamValue
  780         else:
  781             MiscUtil.PrintWarning("The RASCAL search parameter, %s, is not available in RDKit. Ignoring parameter..." % ParamName)
  782     
  783     RascalSearchParams = MiscUtil.ProcessOptionNameValuePairParameters(ParamsOptionName, ParamsOptionValue, ParamsDefaultInfo)
  784 
  785     for ParamName in ["MaxBondMatchPairs"]:
  786         ParamValue = RascalSearchParams[ParamName]
  787         if  ParamValue <= 0:
  788             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0\n" % (ParamValue, ParamName, ParamsOptionName))
  789     
  790     for ParamName in ["MinCliqueSize", "SimilarityThreshold"]:
  791         ParamValue = RascalSearchParams[ParamName]
  792         if  ParamValue < 0:
  793             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: >= 0\n" % (ParamValue, ParamName, ParamsOptionName))
  794         if re.match("^SimilarityThreshold$", ParamName, re.I):
  795             if  ParamValue > 1:
  796                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: <= 1\n" % (ParamValue, ParamName, ParamsOptionName))
  797     
  798     for ParamName in ["MaxFragSeparation", "MinFragSize", "Timeout"]:
  799         ParamValue = RascalSearchParams[ParamName]
  800         if  not (ParamValue == -1 or ParamValue > 0):
  801             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: -1 or > 0\n" % (ParamValue, ParamName, ParamsOptionName))
  802     
  803 
  804     # Setup RDKit object for RASCAL match parameters...
  805     RDKitRascalSearchParams = rdRascalMCES.RascalOptions()
  806     for ParamName in RascalSearchParams.keys():
  807         ParamValue = RascalSearchParams[ParamName]
  808         
  809         # Convert first letter to lower case for RDKit param name and set its value...
  810         RDKitParamName = LowercaseFirstLetter(ParamName)
  811         if hasattr(RDKitRascalSearchParams, RDKitParamName):
  812             setattr(RDKitRascalSearchParams, RDKitParamName, ParamValue)
  813         else:
  814             MiscUtil.PrintWarning("The RASCAL searh parameter, %s, is not available in RDKit. Ignoring parameter..." % ParamName)
  815     
  816     OptionsInfo["RascalSearchParams"] = RascalSearchParams
  817     OptionsInfo["RDKitRascalSearchParams"] = RDKitRascalSearchParams
  818 
  819     RDKitParamInfo = {}
  820     for ParamName in RascalSearchParams.keys():
  821         RDKitParamName = LowercaseFirstLetter(ParamName)
  822         RDKitParamValue = getattr(RDKitRascalSearchParams, RDKitParamName)
  823         RDKitParamInfo[RDKitParamName] = RDKitParamValue
  824 
  825 def ProcessSubstructureMatchParametersOption():
  826     """Process option for substructure match parameters. """
  827     
  828     ParamsOptionName = "--substructureMatchParams"
  829     ParamsOptionValue = Options[ParamsOptionName]
  830     
  831     ParamsDefaultInfo = { "AromaticMatchesConjugated": ["bool", False], "MaxMatches": ["int", 1000], "MaxRecursiveMatches": ["int", 1000], "RecursionPossible": ["bool", True], "SpecifiedStereoQueryMatchesUnspecified": ["bool", False], "Uniquify": ["bool", True], "UseChirality": ["bool", False], "UseEnhancedStereo": ["bool", False], "UseGenericMatchers": ["bool", False]}
  832     
  833     # Update default values to match RDKit default values...
  834     RDKitSubstructureMatchParams = Chem.SubstructMatchParameters()
  835     for ParamName in ParamsDefaultInfo.keys():
  836         RDKitParamName = LowercaseFirstLetter(ParamName)
  837         if hasattr(RDKitSubstructureMatchParams, RDKitParamName):
  838             RDKitParamValue = getattr(RDKitSubstructureMatchParams, RDKitParamName)
  839             ParamsDefaultInfo[ParamName][1] = RDKitParamValue
  840         else:
  841             MiscUtil.PrintWarning("The substructure match parameter, %s, is not available in RDKit. Ignoring parameter..." % ParamName)
  842     
  843     SubstructureMatchParams = MiscUtil.ProcessOptionNameValuePairParameters(ParamsOptionName, ParamsOptionValue, ParamsDefaultInfo)
  844     
  845     for ParamName in ["MaxMatches", "MaxRecursiveMatches"]:
  846         ParamValue = SubstructureMatchParams[ParamName]
  847         if  ParamValue <= 0:
  848             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0\n" % (ParamValue, ParamName, ParamsOptionName))
  849 
  850     # Setup RDKit object for substructure match parameters...
  851     RDKitSubstructureMatchParams = Chem.SubstructMatchParameters()
  852     for ParamName in SubstructureMatchParams.keys():
  853         ParamValue = SubstructureMatchParams[ParamName]
  854         
  855         # Convert first letter to lower case for RDKit param name and set its value...
  856         RDKitParamName = LowercaseFirstLetter(ParamName)
  857         if hasattr(RDKitSubstructureMatchParams, RDKitParamName):
  858             setattr(RDKitSubstructureMatchParams, RDKitParamName, ParamValue)
  859         else:
  860             MiscUtil.PrintWarning("The substructure match parameter, %s, is not available in RDKit. Ignoring parameter..." % ParamName)
  861     
  862     OptionsInfo["SubstructureMatchParams"] = SubstructureMatchParams
  863     OptionsInfo["RDKitSubstructureMatchParams"] = RDKitSubstructureMatchParams
  864 
  865 def ProcessSynthonSearchParamatersOption():
  866     """Process option for synthon search parameters. """
  867 
  868     ParamsOptionName = "--synthonSearchParams"
  869     ParamsOptionValue = Options[ParamsOptionName]
  870     
  871     ParamsDefaultInfo = {"ApproxSimilarityAdjuster": ["float", 0.1], "BuildHits": ["bool", True], "FragSimilarityAdjuster": ["float", 0.1], "HitStart": ["int", 0], "MaxHits": ["int", 1000], "MaxNumFrags": ["int", 100000], "NumThreads": ["int", 1], "RandomSample": ["bool", False], "RandomSeed": ["int", -1], "SimilarityCutoff": ["float", 0.5], "TimeOut": ["int", 600]}
  872 
  873     # Update default values to match RDKit default values...
  874     RDKitSynthonSearchParams = rdSynthonSpaceSearch.SynthonSpaceSearchParams()
  875     for ParamName in ParamsDefaultInfo.keys():
  876         RDKitParamName = LowercaseFirstLetter(ParamName)
  877         if hasattr(RDKitSynthonSearchParams, RDKitParamName):
  878             RDKitParamValue = getattr(RDKitSynthonSearchParams, RDKitParamName)
  879             ParamsDefaultInfo[ParamName][1] = RDKitParamValue
  880         else:
  881             MiscUtil.PrintWarning("The synthon space search paramater, %s, is not available in RDKit. Ignoring parameter..." % ParamName)
  882     
  883     SynthonSearchParams = MiscUtil.ProcessOptionNameValuePairParameters(ParamsOptionName, ParamsOptionValue, ParamsDefaultInfo)
  884     
  885     for ParamName in ["ApproxSimilarityAdjuster", "FragSimilarityAdjuster", "SimilarityCutoff", "HitStart"]:
  886         ParamValue = SynthonSearchParams[ParamName]
  887         if  ParamValue < 0:
  888             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: >= 0\n" % (ParamValue, ParamName, ParamsOptionName))
  889         if re.match("^SimilarityCutoff$", ParamName, re.I):
  890             if  ParamValue > 1:
  891                 MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: <= 1\n" % (ParamValue, ParamName, ParamsOptionName))
  892     
  893     for ParamName in ["MaxNumFrags", "TimeOut"]:
  894         ParamValue = SynthonSearchParams[ParamName]
  895         if  ParamValue <= 0:
  896             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: > 0\n" % (ParamValue, ParamName, ParamsOptionName))
  897     
  898     for ParamName in ["MaxHits", "RandomSeed"]:
  899         ParamValue = SynthonSearchParams[ParamName]
  900         if  not (ParamValue == -1 or ParamValue > 0):
  901             MiscUtil.PrintError("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is not a valid value. Supported values: -1 or > 0\n" % (ParamValue, ParamName, ParamsOptionName))
  902     
  903     ParamName = "NumThreads"
  904     ParamValue = SynthonSearchParams[ParamName]
  905     if ParamValue > 0:
  906         if ParamValue > mp.cpu_count():
  907             MiscUtil.PrintWarning("The parameter value, %s, specified for parameter name, %s, using \"%s\" option is greater than number of CPUs, %s, returned by mp.cpu_count()." % (ParamValue, ParamName, ParamsOptionName, mp.cpu_count()))
  908     elif ParamValue < 0:
  909         if abs(ParamValue) > mp.cpu_count():
  910             MiscUtil.PrintWarning("The absolute parameter value, %s, specified for parameter name, %s, using \"%s\" option is greater than number of CPUs, %s, returned by mp.cpu_count()." % (abs(ParamValue), ParamName, ParamsOptionName, mp.cpu_count()))
  911 
  912     # Setup RDKit object for synthon space search parameters...
  913     RDKitSynthonSearchParams = rdSynthonSpaceSearch.SynthonSpaceSearchParams()
  914     for ParamName in SynthonSearchParams.keys():
  915         ParamValue = SynthonSearchParams[ParamName]
  916         
  917         # Convert first letter to lower case for RDKit param name and set its value...
  918         RDKitParamName = LowercaseFirstLetter(ParamName)
  919         if hasattr(RDKitSynthonSearchParams, RDKitParamName):
  920             setattr(RDKitSynthonSearchParams, RDKitParamName, ParamValue)
  921         else:
  922             MiscUtil.PrintWarning("The synthon space search paramater, %s, is not available in RDKit. Ignoring parameter..." % ParamName)
  923 
  924     OptionsInfo["CountHitsMode"] = False if SynthonSearchParams["BuildHits"] else True
  925 
  926     OptionsInfo["SynthonSearchParams"] = SynthonSearchParams
  927     OptionsInfo["RDKitSynthonSearchParams"] = RDKitSynthonSearchParams
  928 
  929 def LowercaseFirstLetter(Text):
  930     """Convert first letter of a string to lowercase. """
  931 
  932     if Text is None or len(Text) == 0:
  933         return Text
  934     
  935     return Text[0].lower() + Text[1:]
  936     
  937 def ProcessQueryPatternOption():
  938     """Process query pattern option. """
  939     
  940     QueryPattern = None if re.match("^None$", Options["--queryPattern"], re.I) else Options["--queryPattern"]
  941     QueryPatternMols = None
  942 
  943     if QueryPattern is not None:
  944         QueryPatternMols = []
  945         Patterns = QueryPattern.split()
  946         for Pattern in Patterns:
  947             PatternMol = Chem.MolFromSmarts(Pattern)
  948             if PatternMol is None:
  949                 MiscUtil.PrintError("The value specified, %s, using option \"--queryPattern\" is not a valid SMARTS: Failed to create pattern molecule" % (Pattern))
  950             QueryPatternMols.append(PatternMol)
  951 
  952     OptionsInfo["QueryPattern"] = QueryPattern
  953     OptionsInfo["QueryPatternMols"] = QueryPatternMols
  954 
  955 def ProcessOptions():
  956     """Process and validate command line arguments and options."""
  957 
  958     MiscUtil.PrintInfo("Processing options...")
  959     
  960     # Validate options...
  961     ValidateOptions()
  962     
  963     OptionsInfo["Mode"] = Options["--mode"]
  964     OptionsInfo["RascalSimilaritySearchMode"] = True if re.match("^RASCALSimilaritySearch$", Options["--mode"], re.I) else False
  965     OptionsInfo["SimilaritySearchMode"] = True if re.match("^SimilaritySearch$", Options["--mode"], re.I) else False
  966     OptionsInfo["SubstructureSearchMode"] = True if re.match("^SubstructureSearch$", Options["--mode"], re.I) else False
  967     
  968     OptionsInfo["Fingerprints"] = Options["--fingerprints"]
  969 
  970     OptionsInfo["FingerprintsParams"] = Options["--fingerprintsParams"]
  971     ProcessFingerprintsParameters()
  972 
  973     OptionsInfo["Infile"] = Options["--infile"]
  974     
  975     ProcessOutfileParameters()
  976     
  977     OptionsInfo["Overwrite"] = Options["--overwrite"]
  978     
  979     ProcessQueryPatternOption()
  980     
  981     OptionsInfo["QueryFile"] = None if re.match("^none$", Options["--queryFile"]) else Options["--queryFile"]
  982     if OptionsInfo["QueryFile"] is None:
  983         OptionsInfo["QueryFileParams"] = None
  984     else:
  985         OptionsInfo["QueryFileParams"] = MiscUtil.ProcessOptionInfileParameters("--queryFileParams", Options["--queryFileParams"], Options["--queryFile"])
  986         
  987     ProcessRascalSearchParametersOption()
  988     
  989     ProcessSubstructureMatchParametersOption()
  990     ProcessSynthonSearchParamatersOption()
  991 
  992     OptionsInfo["Overwrite"] = Options["--overwrite"]
  993 
  994 def RetrieveOptions():
  995     """Retrieve command line arguments and options."""
  996     
  997     # Get options...
  998     global Options
  999     Options = docopt(_docoptUsage_)
 1000     
 1001     # Set current working directory to the specified directory...
 1002     WorkingDir = Options["--workingdir"]
 1003     if WorkingDir:
 1004         os.chdir(WorkingDir)
 1005     
 1006     # Handle examples option...
 1007     if "--examples" in Options and Options["--examples"]:
 1008         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 1009         sys.exit(0)
 1010     
 1011 def ValidateOptions():
 1012     """Validate option values."""
 1013     
 1014     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "FingerprintsGeneration BinaryDBFileGeneration LibraryEnumeration RASCALSimilaritySearch SimilaritySearch SubstructureSearch")
 1015     
 1016     MiscUtil.ValidateOptionTextValue("-f, --fingerprints", Options["--fingerprints"], "AtomPairs Morgan MorganFeatures PathLength TopologicalTorsions")
 1017     
 1018     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 1019     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "txt csv spc")
 1020     
 1021     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi csv tsv txt spc")
 1022     if re.match("^SingleFile$", Options["--outfileMode"], re.I):
 1023         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 1024     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 1025     
 1026     if re.match("^(FingerprintsGeneration|BinaryDBFileGeneration)$", Options["--mode"], re.I):
 1027         MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "spc")
 1028         if not MiscUtil.CheckFileExt(Options["--outfile"], "spc"):
 1029             MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid during, %s, value of \"--mode\" option. Supported file formats: spc\n" % (Options["--outfile"], Options["--mode"]))
 1030     elif re.match("^LibraryEnumeration$", Options["--mode"], re.I):
 1031         if not MiscUtil.CheckFileExt(Options["--outfile"], "smi"):
 1032             MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid during, %s, value of \"--mode\" option. Supported file formats: smi\n" % (Options["--outfile"], Options["--mode"]))
 1033     elif re.match("^(RASCALSimilaritySearch|SimilaritySearch|SubstructureSearch)$", Options["--mode"], re.I):
 1034         if not MiscUtil.CheckFileExt(Options["--outfile"], "sdf sd smi csv tsv txt"):
 1035             MiscUtil.PrintError("The file name specified , %s, for option \"--outfile\" is not valid during, %s, value of \"--mode\" option. Supported file formats: sdf sd smi csv tsv txt\n" % (Options["--outfile"], Options["--mode"]))
 1036     
 1037     MiscUtil.ValidateOptionTextValue("--outfileMode", Options["--outfileMode"], "SingleFile or MultipleFiles")
 1038 
 1039     QueryPattern = Options["--queryPattern"]
 1040     if re.match("^SubstructureSearch$", Options["--mode"], re.I):
 1041         if re.match("^None$", QueryPattern, re.I):
 1042             MiscUtil.PrintError("You must specify a valid SMARTS pattern(s) for option \"--queryPattern\" during, SubstructureSearch, value of \"-m, --mode\" option.")
 1043 
 1044     PatternMols = []
 1045     if not re.match("^None$", QueryPattern, re.I):
 1046         Patterns = QueryPattern.split()
 1047         for Pattern in Patterns:
 1048             PatternMol = Chem.MolFromSmarts(Pattern)
 1049             if PatternMol is None:
 1050                 MiscUtil.PrintError("The value specified, %s, using option \"--queryPattern\" is not a valid SMARTS: Failed to create pattern molecule" % (Pattern))
 1051             PatternMols.append(PatternMol)
 1052     
 1053     if re.match("^SubstructureSearch$", Options["--mode"], re.I):
 1054         if len(PatternMols) == 0:
 1055             MiscUtil.PrintError("You must specify a valid SMARTS pattern(s) for option \"--queryPattern\" during, SubstructureSearch, value of \"-m, --mode\" option.")
 1056     
 1057     if re.match("^(RASCALSimilaritySearch|SimilaritySearch)$", Options["--mode"], re.I):
 1058         if re.match("^None$", Options["--queryFile"], re.I):
 1059             MiscUtil.PrintError("You must specify a valid filename for option \"--queryFile\" during, SimilaritySearch, value of \"-m, --mode\" option.")
 1060         
 1061     if not re.match("^None$", Options["--queryFile"], re.I):
 1062         MiscUtil.ValidateOptionFilePath("--queryFile", Options["--queryFile"])
 1063         MiscUtil.ValidateOptionFileExt("--queryFile", Options["--queryFile"], "sdf sd smi csv tsv")
 1064 
 1065 # Setup a usage string for docopt...
 1066 _docoptUsage_ = """
 1067 RDKitPerformSynthonSpaceSearch.py - Perform a synthon space search
 1068 
 1069 Usage:
 1070     RDKitPerformSynthonSpaceSearch.py [--fingerprints <Morgan, PathLength...>] [--fingerprintsParams <Name,Value,...>]
 1071                                       [--mode <SubstructureSearch...>] [ --outfileParams <Name,Value,...>] [--outfileMode <SingleFile or MultipleFiles>]
 1072                                       [--overwrite] [--queryPattern <SMARTS>] [--queryFileParams <Name,Value,...>] [--queryFile <filename>]
 1073                                       [--rascalSearchParams <Name,Value,...>] [--substructureMatchParams <Name,Value,...>]
 1074                                       [--synthonSearchParams <Name,Value,...>] [-w <dir>] -i <infile> -o <outfile>
 1075     RDKitPerformSynthonSpaceSearch.py -l | --list -i <infile>
 1076     RDKitPerformSynthonSpaceSearch.py -h | --help | -e | --examples
 1077 
 1078 Description:
 1079     Perform a similarity or substructure search, using query molecules or SMARTS
 1080     patterns, against a synthon space [ Ref 174 ] in an input file, and write out the
 1081     hit molecules to output file(s). You may optionally count the hits without
 1082     building and writing them out.
 1083 
 1084     In addition, you may enumerate a combinatorial library corresponding to a
 1085     synthon space, generate fingerprints for a synthon space, or list information
 1086     about a synthon space.
 1087 
 1088     You must provide a valid synthon space text or binary database file supported
 1089     by RDKit module rdSynthonSpaceSearch.
 1090 
 1091     You may perform similarity search using fingerprints or employ RASCAL (RApid
 1092     Similarity CALculations using Maximum Edge Subgrahps) methodology [ Ref 175 ].
 1093 
 1094     A number of fingerprints are available for performing similarity search. The
 1095     similarity metric, however, is calculated using Tanimoto similarity on hashed
 1096     fingerprints. 
 1097 
 1098     The RASCAL similarity between two molecuels is calculated based on MCES
 1099     (Maximum Common Edge Subgraphs) and corresponds to Johnson similarity.
 1100 
 1101     The supported input file formats are: CSV/TXT synthon space (.csv, .txt) or
 1102     binary synthon space (.spc).
 1103 
 1104     The supported outfile formats, for different '--mode' values, are shown
 1105     below:
 1106         
 1107         BinaryDBFileGeneration: Binary database file (.spc)
 1108         FingerprintsGeneration: Binary database file (.spc)
 1109         LibraryEnumeration: SMILES (.smi)
 1110         SimilaritySearch or SubstructureSearch: SD (.sdf, .sd), SMILES (.smi),
 1111             CSV/TSV (.csv or .tsv)
 1112         
 1113     Possible output files:
 1114          
 1115         <OutfileRoot>.<sdf,sd,smi,csv,tsv>
 1116          
 1117         <OutfileRoot>_Mol<Num>.<sdf,sd,smi,csv,tsv>
 1118         <OutfileRoot>_Pattern<Num>.<sdf,sd,smi,csv,tsv>
 1119          
 1120          <OutfileRoot>_HitCount.csv
 1121          
 1122     The <OutfileRoot>_HitCount.csv contains aditional information regarding hit
 1123      counts and is writter out for both similarity and substructure search.
 1124 
 1125 Options:
 1126     -f, --fingerprints <Morgan, PathLength...>  [default: Morgan]
 1127         Fingerprints to use for performing synthon space similarity search.
 1128         Supported values: AtomPairs, Morgan, MorganFeatures, PathLength,
 1129         TopologicalTorsions. The PathLength fingerprints are Daylight like
 1130         fingerprints. The Morgan and MorganFeature fingerprints are circular
 1131         fingerprints, corresponding Scitegic's Extended Connectivity Fingerprints
 1132         (ECFP) and Features Connectivity Fingerprints (FCFP). The values of
 1133         default parameters for generating fingerprints can be modified using
 1134         '--fingerprintsParams' option.
 1135     --fingerprintsParams <Name,Value,...>  [default: auto]
 1136         Parameter values to use for generating fingerprints. The default values
 1137         are dependent on the value of '-f, --fingerprints' option. In general, it is a
 1138         comma delimited list of parameter name and value pairs for the name of
 1139         fingerprints specified using '-f, --fingerprints' option. The supported
 1140         parameter names along with their default values for valid fingerprints
 1141         names are shown below:
 1142             
 1143             AtomPairs: minLength,1 ,maxLength,useChirality,No,
 1144                 use2D, yes, fpSize, 2048
 1145             Morgan: radius,2, useChirality,No, useBondTypes, yes,
 1146                 useRingMembership, yes, fpSize, 2048
 1147             MorganFeatures: radius,2, useChirality,No, useBondTypes, yes,
 1148                 useRingMembership, yes, fpSize, 2048
 1149             PathLength: minPath,1, maxPath,7, useExplicitHs, yes,
 1150                 useBranchedPaths, yes,useBondOrder,yes, fpSize, 2048,
 1151                 bitsPerHash,2
 1152             TopologicalTorsions: useChirality,No, fpSize, 2048
 1153             
 1154         A brief description of parameters, taken from RDKit documentation, is
 1155         provided below:
 1156             
 1157             AtomPairs:
 1158             
 1159             minLength: Minimum distance between atoms.
 1160             maxLength: Maximum distance between atoms.
 1161             useChirality: Use chirality for atom invariants.
 1162             use2D: Use topological distance matrix.
 1163             fpSize: Size of the fingerpints bit vector.
 1164             
 1165             Morgan and MorganFeatures:
 1166             
 1167             radius: Neighborhood radius.
 1168             useChirality: Use chirality to generate fingerprints.
 1169             useBondTypes: Use bond type for the bond invariants.
 1170             useRingMembership: Use ring membership.
 1171             fpSize: Size of the fingerpints bit vector.
 1172             
 1173             PathLength:
 1174             
 1175             minPath: Minimum bond path length.
 1176             maxPath: Maximum bond path length.
 1177             useExplicitHs: Use explicit hydrogens.
 1178             useBranchedPaths: Use branched paths along with linear paths.
 1179             useBondOrder: Us bond order in the path hashes.
 1180             fpSize: Size of the fingerpints bit vector.
 1181             bitsPerHash: Number of bits set per path.
 1182             
 1183             TopologicalTorsions
 1184             
 1185             useChirality: Use chirality to generate fingerprints.
 1186             fpSize: Size of the fingerpints bit vector.
 1187             
 1188     -e, --examples
 1189         Print examples.
 1190     -h, --help
 1191         Print this help message.
 1192     -i, --infile <infile>
 1193         Synthon space Input file name.
 1194     -l, --list
 1195         List information about synthon space.
 1196     -m, --mode <SubstructureSearch...>  [default: SimilaritySearch]
 1197         Perform similarity or substructure search, enumerate synthon space,
 1198         or list information about a synthon space. The supported values along
 1199         with a brief explanation of the expected behavior are shown below:
 1200             
 1201             BinaryDBFileGeneration: Write out a binary database file for a
 1202                 synthon space.
 1203             FingerprintsGeneration: Generate fingerints for a synthon space and
 1204                write out a binary database file along with fingerprints.
 1205             LibraryEnumeration: Enumerate a combinatorial library for a synthon
 1206                 space and write out a SMILES file.
 1207             RASCALSimilaritySearch: Perform a RASCAL (RApid Similarity
 1208                 CALculations using Maximum Edge Subgrahps) similarity search.
 1209             SimilaritySearch: Perform a similarity search using fingerprints.
 1210             SubstructureSearch: Perform a substructure search using specified
 1211                 SMARTS patterns.
 1212             
 1213     -o, --outfile <outfile>
 1214         Output file name. The <OutfileRoot> and <OutfileExt> are used to generate
 1215         file names during 'MultipleFiles' value for '--outfileMode' option.
 1216     --outfileMode <SingleFile or MultipleFiles>  [default: SingleFile]
 1217         Write out a single file containing hit molecules for substructure or
 1218         similarity search or  generate an individual file for each query pattern
 1219         or molecule. Possible values: SingleFile or MultipleFiles. The query
 1220         pattern number or molecule name is written to output file(s). The query
 1221         pattern or molecule number is also appended to output file names during
 1222         the generation of multiple output files.
 1223     --outfileParams <Name,Value,...>  [default: auto]
 1224         A comma delimited list of parameter name and value pairs for writing
 1225         molecules to files during similarity and substructue search. The supported
 1226         parameter names for different file formats, along with their default values,
 1227         are shown below:
 1228             
 1229             SD: compute2DCoords,auto,kekulize,yes,forceV3000,no
 1230             SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 1231                 smilesTitleLine,yes
 1232             
 1233         Default value for compute2DCoords: yes for SMILES input file; no for all other
 1234         file types. The kekulize and smilesIsomeric parameters are also used during
 1235         generation of SMILES strings for CSV/TSV files.
 1236     --queryPattern <SMARTS SMARTS ...>  [default: none]
 1237         A space delimited list of SMARTS patterns for performing substructure
 1238         search. This is required for 'SubstructureSearch' value of '--mode' option.
 1239     --queryFile <filename>  [default: none]
 1240         Input file containing query molecules for performing similarity search. This
 1241         is required for 'SimilaritySearch' value of '--mode' option.
 1242     --queryFileParams <Name,Value,...>  [default: auto]
 1243         A comma delimited list of parameter name and value pairs for reading 
 1244         molecules from query files during similarity search. The supported
 1245         parameter names for different file formats, along with their default
 1246         values, are shown below:
 1247             
 1248             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 1249             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 1250                 smilesTitleLine,auto,sanitize,yes
 1251             
 1252         Possible values for smilesDelimiter: space, comma or tab.
 1253     --rascalSearchParams <Name,Value,...>  [default: auto]
 1254         Parameter values to use for RASCAL similarity search.
 1255         
 1256         The default values are automatically updated to match RDKit default values.
 1257         The supported parameter names along with their default values are
 1258         are shown below:
 1259             
 1260             allBestMCESs, no, completeAromaticRings, yes,
 1261             completeSmallestRings, no, exactConnectionsMatch, no, 
 1262             ignoreAtomAromaticity, yes, ignoreBondOrders, no,
 1263             maxBondMatchPairs, 1000, maxFragSeparation, -1, minCliqueSize, 0,
 1264             minFragSize, -1, returnEmptyMCES, false, ringMatchesRingOnly, false,
 1265             similarityThreshold, 0.7, singleLargestFrag, no,
 1266             timeout, 60
 1267             
 1268         A brief description of parameters, taken from RDKit documentation, is
 1269         provided below:
 1270             
 1271             allBestMCESs: Find all Maximum Common Edge Subgraphs (MCES).
 1272             completeAromaticRings: Use only complete aromatic rings.
 1273             completeSmallestRings: Only complete rings present in both
 1274                 molecules.
 1275             exactConnectionsMatch: Match atoms only when they have the same
 1276                 number of explicit connections.
 1277             ignoreAtomAromaticity: Ignore aromaticity during atom matching.
 1278             ignoreBondOrders: Ignore bond orders during atom matching.
 1279             maxBondMatchPairs: Maximum number of matching bond pairs.
 1280             maxFragSeparation: Maximum bond distance that bonds can match.
 1281                 value of -1 implies no maximum.
 1282             minCliqueSize: A value of > 0 overrides the similarityThreshold.
 1283                 This refers to the minimum number of bonds in the MCES.
 1284             minFragSize: Minimum number of atoms in a fragment. A value of -1
 1285                 implies no minimum.
 1286             returnEmptyMCES: Return empty MCES results.
 1287             ringMatchesRingOnly: Match ring bonds to only ring bonds.
 1288             similarityThreshold: Similarity threshold for matching and
 1289                 evaluating MCES.
 1290             singleLargestFrag: Find only a single fragment for the MCES. By
 1291                 default, multiple fragments are generated as necessary.
 1292             timeout: Max run time in seconds. A value of -1 implies no max.
 1293             
 1294     --substructureMatchParams <Name,Value,...>  [default: auto]
 1295         Parameter values to use for substructure match during synthon substructure
 1296         search.
 1297         
 1298         The default values are automatically updated to match RDKit default values.
 1299         The supported parameter names along with their default values are
 1300         are shown below:
 1301             
 1302             aromaticMatchesConjugated, no, maxMatches, 1000,
 1303             maxRecursiveMatches, 1000, recursionPossible, yes,
 1304             specifiedStereoQueryMatchesUnspecified, no,  uniquify, yes,
 1305             useChirality, no, useEnhancedStereo, no, useGenericMatchers, no,
 1306             
 1307         A brief description of parameters, taken from RDKit documentation, is
 1308         provided below:
 1309             
 1310             aromaticMatchesConjugated: Match aromatic and conjugated bonds.
 1311             maxMatches: Maximum number of matches.
 1312             maxRecursiveMatches: Maximum number of recursive matches.
 1313             recursionPossible: Allow recursive queries.
 1314             specifiedStereoQueryMatchesUnspecified: Match query atoms and bonds
 1315                 with specified stereochemistry to atoms and bonds with unspecified
 1316                 stereochemistry.
 1317             uniquify: Uniquify match results using atom indices.
 1318             useChirality: Use chirality to match atom and bonds.
 1319             useEnhancedStereo: Use enhanced stereochemistry during the use
 1320                 of chirality.
 1321             useGenericMatchers: Use generic groups as a post-filtering step.
 1322             
 1323     --synthonSearchParams <Name,Value,...>  [default: auto]
 1324         Parameter values to use for performing synthon substructure and similarity
 1325         search.
 1326         
 1327         The default values are automatically updated to match RDKit default values.
 1328         The supported parameter names along with their default values are
 1329         are shown below:
 1330             
 1331             approxSimilarityAdjuster, 0.1, [ Default value for Morgan FPs ]
 1332             buildHits, yes, fragSimilarityAdjuster, 0.1, hitStart, 0,
 1333             maxHits, 1000, [ A value of -1 retrives all hits ]
 1334             maxNumFrags, 100000,
 1335             numThreads, 1 [ 0: Use maximum number of threads supported by the
 1336                 hardware; Negative value: Added to the maxiumum number of
 1337                 threads supported by the hardware ]
 1338             randomSample, no,
 1339             randomSeed, -1 [  Default value implies use random seed ]
 1340             similarityCutoff, 0.5, [ Default for Morgan FPs. Ignored during RASCAL
 1341                 similarity search; instead, RASCAL parameter similarityThreshold is
 1342                 used.  ]
 1343             timeOut, 600 [ Unit: sec. The RASCAL searches take longer and may
 1344                 need a higher value for timeOut. For example: 3600 ]
 1345             
 1346         A brief description of parameters, taken from RDKit documentation, is
 1347         provided below:
 1348             
 1349             approxSimilarityAdjuster: Value used for reducing similarity cutoff
 1350                 during approximate similarity check for fingerprint search. A
 1351                 lower value leads to faster run times at the risk of missing
 1352                 some hits.
 1353             buildHits: A no value implies to report the maximum number of hits a
 1354                 search could generate without returning any hits.
 1355             fragSimilarityAdjuster: Value used for reducing fragment matching
 1356                 similarity cutoff to accommodate low bit densities for fragments.
 1357             hitStart: Return hits starting from the specified sequence number
 1358                 to support retrieval of hits in batches.
 1359             maxHits: Maximum number of hits to return. A value of -1 implies
 1360                 retrieve all hits.
 1361             maxNumFrags: Maximum number of fragments for breaking a query. 
 1362             numThreads: Number of threads to use for search. A value of 0 
 1363                 implies the use of all available hardware threads. A negative
 1364                 value is added to the number of available hardware threads to
 1365                 calculate number of threads to use.
 1366             randomSample: Return a random sample of hits up to maxHits.
 1367             randomSeed: Random number seed to use during search. A value of -1
 1368                 implies the use of a random seed.
 1369             similarityCutoff: Similarity cutoff for returning hits by fingerprint
 1370                 similarity search. A default value of 0.5 is set for Morgan
 1371                 fingeprints.
 1372             timeOut: Time limit for search, in seconds. A valus of  0 implies
 1373                 no timeout.
 1374             
 1375     --overwrite
 1376         Overwrite existing files.
 1377     -w, --workingdir <dir>
 1378         Location of working directory which defaults to the current directory.
 1379 
 1380 Examples:
 1381     To list information about a synthon space in a text file, type:
 1382 
 1383         % RDKitPerformSynthonSpaceSearch.py --list -i SampleSynthonSpace.csv
 1384 
 1385     To generate a binary database file for a synthon space in a text file, type:
 1386 
 1387         % RDKitPerformSynthonSpaceSearch.py -m BinaryDBFileGeneration
 1388           -i SampleSynthonSpace.csv -o SampleSynthonSpace.spc
 1389 
 1390     To enumerate a combnatorial library for a synthon space in a text file and
 1391     write out a SMILES file, type:
 1392 
 1393         % RDKitPerformSynthonSpaceSearch.py -m LibraryEnumeration
 1394           -i SampleSynthonSpace.csv -o SampleSynthonSpace_Library.smi
 1395 
 1396     To generate Morgan fingerprints for a synthon space in a text file, employing
 1397     radius of 2 and bit vector size of 2048, and write out a binary database file,
 1398     type:
 1399 
 1400         % RDKitPerformSynthonSpaceSearch.py -m FingerprintsGeneration
 1401           -i SampleSynthonSpace.csv -o SampleSynthonSpace_MorganFPs.spc
 1402 
 1403     To perform a similarity search using Morgan fingerprints for query molecules
 1404     in an input file, against a binary data base file synthon space containing
 1405     Morgan fingerprints, employing radius 2 and bit vector size of 2048, finding
 1406     a maximum of 1000 hits for each query molecule, and write out a single output
 1407     file containing hit molecules, type:
 1408 
 1409         % RDKitPerformSynthonSpaceSearch.py -m SimilaritySearch
 1410           -i SampleSynthonSpace_MorganFPs.spc
 1411           --queryFile SampleSynthonSpaceQuery.sdf
 1412           -o SampleSynthonSpace_SimilaritySearchResultsMorganFPs.sdf
 1413 
 1414     or only count hits without building hits and writing them to an output
 1415     file:
 1416 
 1417         % RDKitPerformSynthonSpaceSearch.py -m SimilaritySearch
 1418           -i SampleSynthonSpace_MorganFPs.spc
 1419           --queryFile SampleSynthonSpaceQuery.sdf
 1420           -o SampleSynthonSpace_SimilaritySearchResultsMorganFPs.sdf
 1421           --synthonSearchParams "buildHits,No"
 1422 
 1423     To run previous example for writing individual output files for each query
 1424     molecule, type:
 1425 
 1426         % RDKitPerformSynthonSpaceSearch.py -m SimilaritySearch
 1427           -i SampleSynthonSpace_MorganFPs.spc
 1428           --queryFile SampleSynthonSpaceQuery.sdf
 1429           -o SampleSynthonSpace_SimilaritySearchResultsMorganFPs.sdf
 1430           --outfileMode MultipleFiles
 1431 
 1432     To run previous example for retrieving all possible hits for query molecules
 1433     and write out individual output files for each query molecules, type:
 1434 
 1435         % RDKitPerformSynthonSpaceSearch.py -m SimilaritySearch
 1436           -i SampleSynthonSpace_MorganFPs.spc
 1437           --queryFile SampleSynthonSpaceQuery.sdf
 1438           -o SampleSynthonSpace_SimilaritySearchResultsMorganFPs.sdf
 1439           --outfileMode MultipleFiles
 1440           --synthonSearchParams "maxHits,-1"
 1441 
 1442     To run the previous example using multi-threading employing all available
 1443     threads on your machine, retrieve maximum of 1000 hits for each query
 1444     molecule and generate various output files, type:
 1445 
 1446         % RDKitPerformSynthonSpaceSearch.py -m SimilaritySearch
 1447           -i SampleSynthonSpace_MorganFPs.spc
 1448           --queryFile SampleSynthonSpaceQuery.smi
 1449           -o SampleSynthonSpace_SimilaritySearchResultsMorganFPs.smi
 1450           --outfileMode MultipleFiles
 1451           --synthonSearchParams "maxHits, 1000, numThreads, 0"
 1452 
 1453     To run the previous example using multi-threading employing all but one
 1454     available threads on your machine, type:
 1455 
 1456         % RDKitPerformSynthonSpaceSearch.py -m SimilaritySearch
 1457           -i SampleSynthonSpace_MorganFPs.spc
 1458           --queryFile SampleSynthonSpaceQuery.smi
 1459           -o SampleSynthonSpace_SimilaritySearchResultsMorganFPs.smi
 1460           --outfileMode MultipleFiles
 1461           --synthonSearchParams "maxHits, 1000, numThreads, -1"
 1462 
 1463     To perform a substructure search using query pattern SMARTS against a synthon
 1464     space file, finding a maximum of 1000 hits for each query pattern and write out
 1465     a single output file containing hit molecules, type:
 1466 
 1467         % RDKitPerformSynthonSpaceSearch.py -m SubstructureSearch
 1468           -i SampleSynthonSpace.spc
 1469           --queryPattern "c12ccc(C)cc1[nH]nc2C(=O)NCc1cncs1"
 1470           -o SampleSynthonSpace_SubstructureSearchResults.sdf
 1471 
 1472         % RDKitPerformSynthonSpaceSearch.py -m SubstructureSearch
 1473           -i SampleSynthonSpace.csv
 1474           --queryPattern 'c1c[n,s,o][n,s,o,c]c1C(=O)[$(N1CCCCC1),$(N1CCCC1)]'
 1475           -o SampleSynthonSpace_SubstructureSearchResults.sdf
 1476 
 1477     To run previous example for retrieving for writing out individual output files
 1478     for each query molecules, type:
 1479 
 1480         % RDKitPerformSynthonSpaceSearch.py -m SubstructureSearch
 1481           -i SampleSynthonSpace.spc
 1482           --queryPattern "CCN(C(=O)c1cc2cc(OC)ccc2nc1C)C1CCCN(C(=O)OC(C)(C)C)C1 
 1483           C=CCc1c(N[C@H](C)c2cccc(C)c2)ncnc1N(C)CCCC(=O)OC"
 1484           -o SampleSynthonSpace_SubstructureSearchResults.sdf
 1485           --outfileMode MultipleFiles
 1486 
 1487     To perform RASCAL similarity search for query molecules in an input file,
 1488     against a binary data base file synthon space, finding a maximum of 1000 hits
 1489     for each query molecule, using multi-threadsing employing all available CPUs,
 1490     timing out after 3600 seconds, and write out a single output file containing
 1491     hit molecules, type:
 1492 
 1493         % RDKitPerformSynthonSpaceSearch.py -m RASCALSimilaritySearch
 1494           -i SampleSynthonSpace.spc
 1495           --queryFile SampleSynthonSpaceQuery.sdf
 1496           -o SampleSynthonSpace_RASCALSimilaritySearchResults.sdf
 1497           --synthonSearchParams "maxHits, 1000, numThreads, 0, timeOut, 3600"
 1498 
 1499 Author:
 1500     Manish Sud(msud@san.rr.com)
 1501 
 1502 Acknowledgments:
 1503     Dave Cosgrove
 1504 
 1505 See also:
 1506     RDKitConvertFileFormat.py, RDKitPickDiverseMolecules.py, RDKitSearchFunctionalGroups.py,
 1507     RDKitSearchSMARTS.py
 1508 
 1509 Copyright:
 1510     Copyright (C) 2025 Manish Sud. All rights reserved.
 1511 
 1512     The functionality available in this script is implemented using RDKit, an
 1513     open source toolkit for cheminformatics developed by Greg Landrum.
 1514 
 1515     This file is part of MayaChemTools.
 1516 
 1517     MayaChemTools is free software; you can redistribute it and/or modify it under
 1518     the terms of the GNU Lesser General Public License as published by the Free
 1519     Software Foundation; either version 3 of the License, or (at your option) any
 1520     later version.
 1521 
 1522 """
 1523 
 1524 if __name__ == "__main__":
 1525     main()