MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitSearchSMARTS.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43 except ImportError as ErrMsg:
  44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  46     sys.exit(1)
  47 
  48 # MayaChemTools imports...
  49 try:
  50     from docopt import docopt
  51     import MiscUtil
  52     import RDKitUtil
  53 except ImportError as ErrMsg:
  54     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  55     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  56     sys.exit(1)
  57 
  58 ScriptName = os.path.basename(sys.argv[0])
  59 Options = {}
  60 OptionsInfo = {}
  61 
  62 def main():
  63     """Start execution of the script."""
  64     
  65     MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime()))
  66     
  67     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  68     
  69     # Retrieve command line arguments and options...
  70     RetrieveOptions()
  71     
  72     # Process and validate command line arguments and options...
  73     ProcessOptions()
  74     
  75     # Perform actions required by the script...
  76     PerformSearch()
  77     
  78     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  79     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  80 
  81 def PerformSearch():
  82     """Perform search using specified SMARTS pattern."""
  83     
  84     # Set up a pattern molecule...
  85     PatternMol = Chem.MolFromSmarts(OptionsInfo["Pattern"])
  86     
  87     # Setup a molecule reader...
  88     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  89     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  90     
  91     # Set up molecule writers...
  92     Writer, WriterFiltered = SetupMoleculeWriters()
  93     
  94     MolCount, ValidMolCount, RemainingMolCount = ProcessMolecules(Mols, PatternMol, Writer, WriterFiltered)
  95     
  96     if Writer is not None:
  97         Writer.close()
  98     if WriterFiltered is not None:
  99         WriterFiltered.close()
 100     
 101     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 102     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 103     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
 104 
 105     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
 106     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
 107 
 108 def ProcessMolecules(Mols, PatternMol, Writer, WriterFiltered):
 109     """Process and filter molecules."""
 110     
 111     if OptionsInfo["MPMode"]:
 112         return ProcessMoleculesUsingMultipleProcesses(Mols, PatternMol, Writer, WriterFiltered)
 113     else:
 114         return ProcessMoleculesUsingSingleProcess(Mols, PatternMol, Writer, WriterFiltered)
 115 
 116 def ProcessMoleculesUsingSingleProcess(Mols, PatternMol, Writer, WriterFiltered):
 117     """Process and filter molecules using a single process."""
 118     
 119     NegateMatch = OptionsInfo["NegateMatch"]
 120     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
 121     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 122     SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]
 123     
 124     MiscUtil.PrintInfo("\nFiltering molecules...")
 125     
 126     (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3
 127     FirstMol = True
 128     for Mol in Mols:
 129         MolCount += 1
 130         
 131         if Mol is None:
 132             continue
 133         
 134         if RDKitUtil.IsMolEmpty(Mol):
 135             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 136             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 137             continue
 138         
 139         ValidMolCount += 1
 140         if FirstMol:
 141             FirstMol = False
 142             if SetSMILESMolProps:
 143                 if Writer is not None:
 144                     RDKitUtil.SetWriterMolProps(Writer, Mol)
 145                 if WriterFiltered is not None:
 146                     RDKitUtil.SetWriterMolProps(WriterFiltered, Mol)
 147         
 148         MolMatched = DoesMoleculeContainsPattern(Mol, PatternMol)
 149         if MolMatched != NegateMatch:
 150             RemainingMolCount += 1
 151             WriteMolecule(Writer, Mol, Compute2DCoords)
 152         else:
 153             if OutfileFilteredMode:
 154                 WriteMolecule(WriterFiltered, Mol, Compute2DCoords)
 155     
 156     return (MolCount, ValidMolCount, RemainingMolCount)
 157     
 158 def ProcessMoleculesUsingMultipleProcesses(Mols, PatternMol, Writer, WriterFiltered):
 159     """Process and filter molecules using multiprocessing."""
 160     
 161     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
 162     
 163     MPParams = OptionsInfo["MPParams"]
 164     NegateMatch = OptionsInfo["NegateMatch"]
 165     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
 166     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 167     SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]
 168     
 169     # Setup data for initializing a worker process...
 170     MiscUtil.PrintInfo("Encoding options info and pattern molecule...")
 171     OptionsInfo["EncodedPatternMol"] = RDKitUtil.MolToBase64EncodedMolString(PatternMol)
 172     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 173 
 174     # Setup a encoded mols data iterable for a worker process...
 175     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 176 
 177     # Setup process pool along with data initialization for each process...
 178     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 179     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 180     
 181     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 182     
 183     # Start processing...
 184     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 185         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 186     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 187         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 188     else:
 189         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 190     
 191     (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3
 192     FirstMol = True
 193     for Result in Results:
 194         MolCount += 1
 195         MolIndex, EncodedMol, MolMatched = Result
 196         
 197         if EncodedMol is None:
 198             continue
 199         ValidMolCount += 1
 200         
 201         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 202         
 203         if FirstMol:
 204             FirstMol = False
 205             if SetSMILESMolProps:
 206                 if Writer is not None:
 207                     RDKitUtil.SetWriterMolProps(Writer, Mol)
 208                 if WriterFiltered is not None:
 209                     RDKitUtil.SetWriterMolProps(WriterFiltered, Mol)
 210         
 211         if MolMatched != NegateMatch:
 212             RemainingMolCount += 1
 213             WriteMolecule(Writer, Mol, Compute2DCoords)
 214         else:
 215             if OutfileFilteredMode:
 216                 WriteMolecule(WriterFiltered, Mol, Compute2DCoords)
 217     
 218     return (MolCount, ValidMolCount, RemainingMolCount)
 219 
 220 def InitializeWorkerProcess(*EncodedArgs):
 221     """Initialize data for a worker process."""
 222 
 223     global Options, OptionsInfo
 224     
 225     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 226 
 227     # Decode Options and OptionInfo...
 228     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 229     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 230 
 231     # Decode PatternMol...
 232     OptionsInfo["PatternMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedPatternMol"])
 233     
 234 def WorkerProcess(EncodedMolInfo):
 235     """Process data for a worker process."""
 236 
 237     MolIndex, EncodedMol = EncodedMolInfo
 238 
 239     if EncodedMol is None:
 240         return [MolIndex, None, False]
 241         
 242     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 243     if RDKitUtil.IsMolEmpty(Mol):
 244         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 245         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 246         return [MolIndex, None, False]
 247         
 248     MolMatched = DoesMoleculeContainsPattern(Mol, OptionsInfo["PatternMol"])
 249 
 250     return [MolIndex, EncodedMol, MolMatched]
 251 
 252 def WriteMolecule(Writer, Mol, Compute2DCoords):
 253     """Write out molecule."""
 254     
 255     if OptionsInfo["CountMode"]:
 256         return
 257     
 258     if Compute2DCoords:
 259         AllChem.Compute2DCoords(Mol)
 260     
 261     Writer.write(Mol)
 262     
 263 def SetupMoleculeWriters():
 264     """Setup molecule writers."""
 265     
 266     Writer = None
 267     WriterFiltered = None
 268 
 269     if OptionsInfo["CountMode"]:
 270         return (Writer, WriterFiltered)
 271 
 272     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 273     if Writer is None:
 274         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 275     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
 276     
 277     if OptionsInfo["OutfileFilteredMode"]:
 278         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
 279         if WriterFiltered is None:
 280             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
 281         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
 282     
 283     return (Writer, WriterFiltered)
 284 
 285 def DoesMoleculeContainsPattern(Mol, PatternMol):
 286     """Check presence of pattern in the molecule."""
 287 
 288     return True if Mol.HasSubstructMatch(PatternMol, useChirality = OptionsInfo["UseChirality"]) else False
 289 
 290 def ProcessOptions():
 291     """Process and validate command line arguments and options."""
 292     
 293     MiscUtil.PrintInfo("Processing options...")
 294     
 295     # Validate options...
 296     ValidateOptions()
 297     
 298     OptionsInfo["Infile"] = Options["--infile"]
 299     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 300     
 301     OptionsInfo["Outfile"] = Options["--outfile"]
 302     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 303     
 304     OptionsInfo["OutfileFiltered"] = ""
 305     if Options["--outfile"]:
 306         FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 307         OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 308         OptionsInfo["OutfileFiltered"] = OutfileFiltered
 309     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 310         
 311     OptionsInfo["Overwrite"] = Options["--overwrite"]
 312 
 313     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 314     OptionsInfo["NegateMatch"] = True if re.match("^yes$", Options["--negate"], re.I) else False
 315     
 316     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 317     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 318     
 319     OptionsInfo["Pattern"] = Options["--pattern"]
 320     OptionsInfo["UseChirality"] = True if re.match("^yes$", Options["--useChirality"], re.I) else False
 321 
 322 def RetrieveOptions():
 323     """Retrieve command line arguments and options."""
 324     
 325     # Get options...
 326     global Options
 327     Options = docopt(_docoptUsage_)
 328     
 329     # Set current working directory to the specified directory...
 330     WorkingDir = Options["--workingdir"]
 331     if WorkingDir:
 332         os.chdir(WorkingDir)
 333     
 334     # Handle examples option...
 335     if "--examples" in Options and Options["--examples"]:
 336         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 337         sys.exit(0)
 338 
 339 def ValidateOptions():
 340     """Validate option values."""
 341     
 342     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 343     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd smi txt csv tsv")
 344     if Options["--outfile"]:
 345         MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi")
 346         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 347         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 348         
 349     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 350     
 351     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "retrieve count")
 352     if re.match("^retrieve$", Options["--mode"], re.I):
 353         if not Options["--outfile"]:
 354             MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"retrieve\" value of \"-m, --mode\" option")
 355         
 356     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 357     MiscUtil.ValidateOptionTextValue("-n, --negate", Options["--negate"], "yes no")
 358     
 359     PatternMol = Chem.MolFromSmarts(Options["--pattern"])
 360     if PatternMol is None:
 361         MiscUtil.PrintError("The value specified, %s, using option \"-p, --pattern\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--pattern"])
 362     
 363     MiscUtil.ValidateOptionTextValue("--useChirality", Options["--useChirality"], "yes no")
 364 
 365 # Setup a usage string for docopt...
 366 _docoptUsage_ = """
 367 RDKitSearchSMARTS.py - Perform a substructure search using SMARTS pattern
 368 
 369 Usage:
 370     RDKitSearchSMARTS.py  [--infileParams <Name,Value,...>] [--mode <retrieve or count>]
 371                           [--mp <yes or no>] [--mpParams <Name,Value,...>] [--negate <yes or no>]
 372                           [--outfileFiltered <yes or no>] [--outfileParams <Name,Value,...>] [--overwrite]
 373                           [--useChirality <yes or no>] [-w <dir>] [-o <outfile>] -p <SMARTS> -i <infile>
 374     RDKitSearchSMARTS.py -h | --help | -e | --examples
 375 
 376 Description:
 377     Perform a substructure search in an input file using specified SMARTS pattern and
 378     write out the matched molecules to an output file or simply count the number
 379     of matches.
 380 
 381     The supported input file formats are: SD (.sdf, .sd), SMILES (.smi., csv, .tsv, .txt)
 382 
 383     The supported output file formats are: SD (.sdf, .sd), SMILES (.smi)
 384 
 385 Options:
 386     -e, --examples
 387         Print examples.
 388     -h, --help
 389         Print this help message.
 390     -i, --infile <infile>
 391         Input file name.
 392     --infileParams <Name,Value,...>  [default: auto]
 393         A comma delimited list of parameter name and value pairs for reading
 394         molecules from files. The supported parameter names for different file
 395         formats, along with their default values, are shown below:
 396             
 397             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 398             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 399                 smilesTitleLine,auto,sanitize,yes
 400             
 401         Possible values for smilesDelimiter: space, comma or tab.
 402     -m, --mode <retrieve or count>  [default: retrieve]
 403         Specify whether to retrieve and write out matched molecules to an output
 404         file or simply count the number of matches.
 405     --mp <yes or no>  [default: no]
 406         Use multiprocessing.
 407          
 408         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 409         function employing lazy RDKit data iterable. This allows processing of
 410         arbitrary large data sets without any additional requirements memory.
 411         
 412         All input data may be optionally loaded into memory by mp.Pool.map()
 413         before starting worker processes in a process pool by setting the value
 414         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 415         
 416         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 417         data mode may adversely impact the performance. The '--mpParams' section
 418         provides additional information to tune the value of 'chunkSize'.
 419     --mpParams <Name,Value,...>  [default: auto]
 420         A comma delimited list of parameter name and value pairs to configure
 421         multiprocessing.
 422         
 423         The supported parameter names along with their default and possible
 424         values are shown below:
 425         
 426             chunkSize, auto
 427             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 428             numProcesses, auto   [ Default: mp.cpu_count() ]
 429         
 430         These parameters are used by the following functions to configure and
 431         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 432         mp.Pool.imap().
 433         
 434         The chunkSize determines chunks of input data passed to each worker
 435         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 436         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 437         
 438         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 439         automatically converts RDKit data iterable into a list, loads all data into
 440         memory, and calculates the default chunkSize using the following method
 441         as shown in its code:
 442         
 443             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 444             if extra: chunkSize += 1
 445         
 446         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 447         and 100 data items.
 448         
 449         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 450         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 451         data into memory. Consequently, the size of input data is not known a priori.
 452         It's not possible to estimate an optimal value for the chunkSize. The default 
 453         chunkSize is set to 1.
 454         
 455         The default value for the chunkSize during 'Lazy' data mode may adversely
 456         impact the performance due to the overhead associated with exchanging
 457         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 458         a larger value during 'Lazy' input data mode, based on the size of your input
 459         data and number of processes in the process pool.
 460         
 461         The mp.Pool.map() function waits for all worker processes to process all
 462         the data and return the results. The mp.Pool.imap() function, however,
 463         returns the the results obtained from worker processes as soon as the
 464         results become available for specified chunks of data.
 465         
 466         The order of data in the results returned by both mp.Pool.map() and 
 467         mp.Pool.imap() functions always corresponds to the input data.
 468     -n, --negate <yes or no>  [default: no]
 469         Specify whether to find molecules not matching the specified SMARTS pattern.
 470     -o, --outfile <outfile>
 471         Output file name.
 472     --outfileFiltered <yes or no>  [default: no]
 473         Write out a file containing filtered molecules. Its name is automatically
 474         generated from the specified output file. Default: <OutfileRoot>_
 475         Filtered.<OutfileExt>.
 476     --outfileParams <Name,Value,...>  [default: auto]
 477         A comma delimited list of parameter name and value pairs for writing
 478         molecules to files. The supported parameter names for different file
 479         formats, along with their default values, are shown below:
 480             
 481             SD: compute2DCoords,auto,kekulize,yes,forceV3000,no
 482             SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 483                 smilesTitleLine,yes,smilesMolName,yes,smilesMolProps,no
 484             
 485         Default value for compute2DCoords: yes for SMILES input file; no for all other
 486         file types.
 487     --overwrite
 488         Overwrite existing files.
 489     -p, --pattern <SMARTS>  [default: none]
 490         SMARTS pattern for performing search.
 491     -u, --useChirality <yes or no>  [default: no]
 492         Use stereochemistry information for SMARTS search.
 493     -w, --workingdir <dir>
 494         Location of working directory which defaults to the current directory.
 495 
 496 Examples:
 497     To retrieve molecules containing the substructure corresponding to a specified
 498     SMARTS pattern and write out a SMILES file, type: 
 499 
 500         % RDKitSearchSMARTS.py -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 501 
 502     To retrieve molecules containing the substructure corresponding to a specified
 503     SMARTS pattern,  perform filtering in multiprocessing mode on all available
 504     CPUs without loading all data into memory, and write out a SMILES file, type: 
 505 
 506         % RDKitSearchSMARTS.py --mp yes -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 507 
 508     To retrieve molecules containing the substructure corresponding to a specified
 509     SMARTS pattern,  perform filtering in multiprocessing mode on all available
 510     CPUs by loading all data into memory, and write out a SMILES file, type: 
 511 
 512         % RDKitSearchSMARTS.py --mp yes --mpParams "inputDataMode,InMemory"
 513           -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 514 
 515     To retrieve molecules containing the substructure corresponding to a specified
 516     SMARTS pattern,  perform filtering in multiprocessing mode on specific number
 517     of CPUs and chunk size without loading all data into memory, and write out
 518     a SMILES file, type: 
 519 
 520         % RDKitSearchSMARTS.py --mp yes --mpParams "inputDataMode,Lazy,
 521           numProcesses,4,chunkSize,8" -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 522 
 523     To only count the number of molecules containing the substructure corresponding
 524     to a specified SMARTS pattern without writing out any file, type: 
 525 
 526         % RDKitSearchSMARTS.py -m count -p 'c1ccccc1' -i Sample.smi
 527 
 528     To count the number of molecules in a SD file not containing the substructure
 529     corresponding to a specified SMARTS pattern and write out a SD file, type: 
 530 
 531         % RDKitSearchSMARTS.py -n yes -p 'c1ccccc1' -i Sample.sdf -o SampleOut.sdf
 532 
 533     To retrieve molecules containing the substructure corresponding to a specified
 534     SMARTS pattern from a CSV SMILES file, SMILES strings in column 1, name in
 535     and write out a SD file, type: 
 536 
 537         % RDKitSearchSMARTS.py -p 'c1ccccc1' --infileParams
 538           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 539           smilesNameColumn,2" --outfileParams "compute2DCoords,yes"
 540           -i SampleSMILES.csv -o SampleOut.sdf
 541 
 542 Author:
 543     Manish Sud(msud@san.rr.com)
 544 
 545 See also:
 546     RDKitConvertFileFormat.py, RDKitFilterPAINS.py, RDKitSearchFunctionalGroups.py 
 547 
 548 Copyright:
 549     Copyright (C) 2024 Manish Sud. All rights reserved.
 550 
 551     The functionality available in this script is implemented using RDKit, an
 552     open source toolkit for cheminformatics developed by Greg Landrum.
 553 
 554     This file is part of MayaChemTools.
 555 
 556     MayaChemTools is free software; you can redistribute it and/or modify it under
 557     the terms of the GNU Lesser General Public License as published by the Free
 558     Software Foundation; either version 3 of the License, or (at your option) any
 559     later version.
 560 
 561 """
 562 
 563 if __name__ == "__main__":
 564     main()