MayaChemTools

   1 #!/bin/env python
   2 #
   3 # File: RDKitSearchSMARTS.py
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2019 Manish Sud. All rights reserved.
   7 #
   8 # The functionality available in this script is implemented using RDKit, an
   9 # open source toolkit for cheminformatics developed by Greg Landrum.
  10 #
  11 # This file is part of MayaChemTools.
  12 #
  13 # MayaChemTools is free software; you can redistribute it and/or modify it under
  14 # the terms of the GNU Lesser General Public License as published by the Free
  15 # Software Foundation; either version 3 of the License, or (at your option) any
  16 # later version.
  17 #
  18 # MayaChemTools is distributed in the hope that it will be useful, but without
  19 # any warranty; without even the implied warranty of merchantability of fitness
  20 # for a particular purpose.  See the GNU Lesser General Public License for more
  21 # details.
  22 #
  23 # You should have received a copy of the GNU Lesser General Public License
  24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  26 # Boston, MA, 02111-1307, USA.
  27 #
  28 
  29 from __future__ import print_function
  30 
  31 # Add local python path to the global path and import standard library modules...
  32 import os
  33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
  34 import time
  35 import re
  36 import multiprocessing as mp
  37 
  38 # RDKit imports...
  39 try:
  40     from rdkit import rdBase
  41     from rdkit import Chem
  42     from rdkit.Chem import AllChem
  43 except ImportError as ErrMsg:
  44     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
  45     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
  46     sys.exit(1)
  47 
  48 # MayaChemTools imports...
  49 try:
  50     from docopt import docopt
  51     import MiscUtil
  52     import RDKitUtil
  53 except ImportError as ErrMsg:
  54     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
  55     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
  56     sys.exit(1)
  57 
  58 ScriptName = os.path.basename(sys.argv[0])
  59 Options = {}
  60 OptionsInfo = {}
  61 
  62 def main():
  63     """Start execution of the script"""
  64     
  65     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
  66     
  67     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
  68     
  69     # Retrieve command line arguments and options...
  70     RetrieveOptions()
  71     
  72     # Process and validate command line arguments and options...
  73     ProcessOptions()
  74     
  75     # Perform actions required by the script...
  76     PerformSearch()
  77     
  78     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
  79     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
  80 
  81 def PerformSearch():
  82     """Perform search using specified SMARTS pattern."""
  83     
  84     # Set up a pattern molecule...
  85     PatternMol = Chem.MolFromSmarts(OptionsInfo["Pattern"])
  86     
  87     # Setup a molecule reader...
  88     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
  89     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
  90     
  91     # Set up molecule writers...
  92     Writer, WriterFiltered = SetupMoleculeWriters()
  93     
  94     MolCount, ValidMolCount, RemainingMolCount = ProcessMolecules(Mols, PatternMol, Writer, WriterFiltered)
  95     
  96     if Writer is not None:
  97         Writer.close()
  98     if WriterFiltered is not None:
  99         WriterFiltered.close()
 100     
 101     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
 102     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
 103     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount))
 104 
 105     MiscUtil.PrintInfo("\nNumber of remaining molecules: %d" % RemainingMolCount)
 106     MiscUtil.PrintInfo("Number of filtered molecules: %d" % (ValidMolCount - RemainingMolCount))
 107 
 108 def ProcessMolecules(Mols, PatternMol, Writer, WriterFiltered):
 109     """Process and filter molecules. """
 110     
 111     if OptionsInfo["MPMode"]:
 112         return ProcessMoleculesUsingMultipleProcesses(Mols, PatternMol, Writer, WriterFiltered)
 113     else:
 114         return ProcessMoleculesUsingSingleProcess(Mols, PatternMol, Writer, WriterFiltered)
 115 
 116 def ProcessMoleculesUsingSingleProcess(Mols, PatternMol, Writer, WriterFiltered):
 117     """Process and filter molecules using a single process."""
 118     
 119     NegateMatch = OptionsInfo["NegateMatch"]
 120     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
 121     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 122     
 123     MiscUtil.PrintInfo("\nFiltering molecules...")
 124     
 125     (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3
 126     for Mol in Mols:
 127         MolCount += 1
 128         
 129         if Mol is None:
 130             continue
 131         
 132         if RDKitUtil.IsMolEmpty(Mol):
 133             MolName = RDKitUtil.GetMolName(Mol, MolCount)
 134             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 135             continue
 136         
 137         ValidMolCount += 1
 138         
 139         MolMatched = DoesMoleculeContainsPattern(Mol, PatternMol)
 140         if MolMatched != NegateMatch:
 141             RemainingMolCount += 1
 142             WriteMolecule(Writer, Mol, Compute2DCoords)
 143         else:
 144             if OutfileFilteredMode:
 145                 WriteMolecule(WriterFiltered, Mol, Compute2DCoords)
 146     
 147     return (MolCount, ValidMolCount, RemainingMolCount)
 148     
 149 def ProcessMoleculesUsingMultipleProcesses(Mols, PatternMol, Writer, WriterFiltered):
 150     """Process and filter molecules using multiprocessing."""
 151     
 152     MiscUtil.PrintInfo("\nFiltering molecules using multiprocessing...")
 153     
 154     MPParams = OptionsInfo["MPParams"]
 155     NegateMatch = OptionsInfo["NegateMatch"]
 156     OutfileFilteredMode = OptionsInfo["OutfileFilteredMode"]
 157     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
 158     
 159     # Setup data for initializing a worker process...
 160     MiscUtil.PrintInfo("Encoding options info and pattern molecule...")
 161     OptionsInfo["EncodedPatternMol"] = RDKitUtil.MolToBase64EncodedMolString(PatternMol)
 162     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
 163 
 164     # Setup a encoded mols data iterable for a worker process...
 165     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
 166 
 167     # Setup process pool along with data initialization for each process...
 168     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
 169     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
 170     
 171     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
 172     
 173     # Start processing...
 174     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
 175         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 176     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
 177         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
 178     else:
 179         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
 180     
 181     (MolCount, ValidMolCount, RemainingMolCount) = [0] * 3
 182     for Result in Results:
 183         MolCount += 1
 184         MolIndex, EncodedMol, MolMatched = Result
 185         
 186         if EncodedMol is None:
 187             continue
 188         ValidMolCount += 1
 189         
 190         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 191         if MolMatched != NegateMatch:
 192             RemainingMolCount += 1
 193             WriteMolecule(Writer, Mol, Compute2DCoords)
 194         else:
 195             if OutfileFilteredMode:
 196                 WriteMolecule(WriterFiltered, Mol, Compute2DCoords)
 197     
 198     return (MolCount, ValidMolCount, RemainingMolCount)
 199 
 200 def InitializeWorkerProcess(*EncodedArgs):
 201     """Initialize data for a worker process."""
 202 
 203     global Options, OptionsInfo
 204     
 205     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
 206 
 207     # Decode Options and OptionInfo...
 208     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
 209     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
 210 
 211     # Decode PatternMols...
 212     OptionsInfo["PatternMol"] = RDKitUtil.MolFromBase64EncodedMolString(OptionsInfo["EncodedPatternMol"])
 213     
 214 def WorkerProcess(EncodedMolInfo):
 215     """Process data for a worker process."""
 216 
 217     MolIndex, EncodedMol = EncodedMolInfo
 218 
 219     if EncodedMol is None:
 220         return [MolIndex, None, False]
 221         
 222     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
 223     if RDKitUtil.IsMolEmpty(Mol):
 224         MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
 225         MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 226         return [MolIndex, None, False]
 227         
 228     MolMatched = DoesMoleculeContainsPattern(Mol, OptionsInfo["PatternMol"])
 229 
 230     return [MolIndex, EncodedMol, MolMatched]
 231 
 232 def WriteMolecule(Writer, Mol, Compute2DCoords):
 233     """Write out molecule."""
 234     
 235     if OptionsInfo["CountMode"]:
 236         return
 237     
 238     if Compute2DCoords:
 239         AllChem.Compute2DCoords(Mol)
 240     
 241     Writer.write(Mol)
 242     
 243 def SetupMoleculeWriters():
 244     """Setup molecule writers."""
 245     
 246     Writer = None
 247     WriterFiltered = None
 248 
 249     if OptionsInfo["CountMode"]:
 250         return (Writer, WriterFiltered)
 251 
 252     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
 253     if Writer is None:
 254         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
 255     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
 256     
 257     if OptionsInfo["OutfileFilteredMode"]:
 258         WriterFiltered = RDKitUtil.MoleculesWriter(OptionsInfo["OutfileFiltered"], **OptionsInfo["OutfileParams"])
 259         if WriterFiltered is None:
 260             MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["OutfileFiltered"])
 261         MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["OutfileFiltered"])
 262     
 263     return (Writer, WriterFiltered)
 264 
 265 def DoesMoleculeContainsPattern(Mol, PatternMol):
 266     """Check presence of pattern in the molecule"""
 267 
 268     return True if Mol.HasSubstructMatch(PatternMol, useChirality = OptionsInfo["UseChirality"]) else False
 269 
 270 def ProcessOptions():
 271     """Process and validate command line arguments and options"""
 272     
 273     MiscUtil.PrintInfo("Processing options...")
 274     
 275     # Validate options...
 276     ValidateOptions()
 277     
 278     OptionsInfo["Infile"] = Options["--infile"]
 279     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
 280     
 281     OptionsInfo["Outfile"] = Options["--outfile"]
 282     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
 283     
 284     OptionsInfo["OutfileFiltered"] = ""
 285     if Options["--outfile"]:
 286         FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"])
 287         OutfileFiltered = "%s_Filtered.%s" % (FileName, FileExt)
 288         OptionsInfo["OutfileFiltered"] = OutfileFiltered
 289     OptionsInfo["OutfileFilteredMode"] = True if re.match("^yes$", Options["--outfileFiltered"], re.I) else False
 290         
 291     OptionsInfo["Overwrite"] = Options["--overwrite"]
 292 
 293     OptionsInfo["CountMode"] = True if re.match("^count$", Options["--mode"], re.I) else False
 294     OptionsInfo["NegateMatch"] = True if re.match("^yes$", Options["--negate"], re.I) else False
 295     
 296     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
 297     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
 298     
 299     OptionsInfo["Pattern"] = Options["--pattern"]
 300     OptionsInfo["UseChirality"] = True if re.match("^yes$", Options["--useChirality"], re.I) else False
 301 
 302 def RetrieveOptions():
 303     """Retrieve command line arguments and options"""
 304     
 305     # Get options...
 306     global Options
 307     Options = docopt(_docoptUsage_)
 308     
 309     # Set current working directory to the specified directory...
 310     WorkingDir = Options["--workingdir"]
 311     if WorkingDir:
 312         os.chdir(WorkingDir)
 313     
 314     # Handle examples option...
 315     if "--examples" in Options and Options["--examples"]:
 316         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
 317         sys.exit(0)
 318 
 319 def ValidateOptions():
 320     """Validate option values"""
 321     
 322     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
 323     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd smi txt csv tsv")
 324     if Options["--outfile"]:
 325         MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi")
 326         MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
 327         MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
 328         
 329     MiscUtil.ValidateOptionTextValue("--outfileFiltered", Options["--outfileFiltered"], "yes no")
 330     
 331     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "retrieve count")
 332     if re.match("^retrieve$", Options["--mode"], re.I):
 333         if not Options["--outfile"]:
 334             MiscUtil.PrintError("The outfile must be specified using \"-o, --outfile\" during \"retrieve\" value of \"-m, --mode\" option")
 335         
 336     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
 337     MiscUtil.ValidateOptionTextValue("-n, --negate", Options["--negate"], "yes no")
 338     
 339     PatternMol = Chem.MolFromSmarts(Options["--pattern"])
 340     if PatternMol is None:
 341         MiscUtil.PrintError("The value specified, %s, using option \"-p, --pattern\" is not a valid SMARTS: Failed to create pattern molecule" % Options["--pattern"])
 342     
 343     MiscUtil.ValidateOptionTextValue("--useChirality", Options["--useChirality"], "yes no")
 344 
 345 # Setup a usage string for docopt...
 346 _docoptUsage_ = """
 347 RDKitSearchSMARTS.py - Perform a substructure search using SMARTS pattern
 348 
 349 Usage:
 350     RDKitSearchSMARTS.py  [--infileParams <Name,Value,...>] [--mode <retrieve or count>]
 351                           [--mp <yes or no>] [--mpParams <Name.Value,...>] [--negate <yes or no>]
 352                           [--outfileFiltered <yes or no>] [--outfileParams <Name,Value,...>] [--overwrite]
 353                           [--useChirality <yes or no>] [-w <dir>] [-o <outfile>] -p <SMARTS> -i <infile>
 354     RDKitSearchSMARTS.py -h | --help | -e | --examples
 355 
 356 Description:
 357     Perform a substructure search in an input file using specified SMARTS pattern and
 358     write out the matched molecules to an output file or simply count the number
 359     of matches.
 360 
 361     The supported input file formats are: SD (.sdf, .sd), SMILES (.smi., csv, .tsv, .txt)
 362 
 363     The supported output file formats are: SD (.sdf, .sd), SMILES (.smi)
 364 
 365 Options:
 366     -e, --examples
 367         Print examples.
 368     -h, --help
 369         Print this help message.
 370     -i, --infile <infile>
 371         Input file name.
 372     --infileParams <Name,Value,...>  [default: auto]
 373         A comma delimited list of parameter name and value pairs for reading
 374         molecules from files. The supported parameter names for different file
 375         formats, along with their default values, are shown below:
 376             
 377             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
 378             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
 379                 smilesTitleLine,auto,sanitize,yes
 380             
 381         Possible values for smilesDelimiter: space, comma or tab.
 382     -m, --mode <retrieve or count>  [default: retrieve]
 383         Specify whether to retrieve and write out matched molecules to an output
 384         file or simply count the number of matches.
 385     --mp <yes or no>  [default: no]
 386         Use multiprocessing.
 387          
 388         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
 389         function employing lazy RDKit data iterable. This allows processing of
 390         arbitrary large data sets without any additional requirements memory.
 391         
 392         All input data may be optionally loaded into memory by mp.Pool.map()
 393         before starting worker processes in a process pool by setting the value
 394         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
 395         
 396         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
 397         data mode may adversely impact the performance. The '--mpParams' section
 398         provides additional information to tune the value of 'chunkSize'.
 399     --mpParams <Name,Value,...>  [default: auto]
 400         A comma delimited list of parameter name and value pairs for to
 401         configure multiprocessing.
 402         
 403         The supported parameter names along with their default and possible
 404         values are shown below:
 405         
 406             chunkSize, auto
 407             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
 408             numProcesses, auto   [ Default: mp.cpu_count() ]
 409         
 410         These parameters are used by the following functions to configure and
 411         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
 412         mp.Pool.imap().
 413         
 414         The chunkSize determines chunks of input data passed to each worker
 415         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
 416         The default value of chunkSize is dependent on the value of 'inputDataMode'.
 417         
 418         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
 419         automatically converts RDKit data iterable into a list, loads all data into
 420         memory, and calculates the default chunkSize using the following method
 421         as shown in its code:
 422         
 423             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
 424             if extra: chunkSize += 1
 425         
 426         For example, the default chunkSize will be 7 for a pool of 4 worker processes
 427         and 100 data items.
 428         
 429         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
 430         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
 431         data into memory. Consequently, the size of input data is not known a priori.
 432         It's not possible to estimate an optimal value for the chunkSize. The default 
 433         chunkSize is set to 1.
 434         
 435         The default value for the chunkSize during 'Lazy' data mode may adversely
 436         impact the performance due to the overhead associated with exchanging
 437         small chunks of data. It is generally a good idea to explicitly set chunkSize to
 438         a larger value during 'Lazy' input data mode, based on the size of your input
 439         data and number of processes in the process pool.
 440         
 441         The mp.Pool.map() function waits for all worker processes to process all
 442         the data and return the results. The mp.Pool.imap() function, however,
 443         returns the the results obtained from worker processes as soon as the
 444         results become available for specified chunks of data.
 445         
 446         The order of data in the results returned by both mp.Pool.map() and 
 447         mp.Pool.imap() functions always corresponds to the input data.
 448     -n, --negate <yes or no>  [default: no]
 449         Specify whether to find molecules not matching the specified SMARTS pattern.
 450     -o, --outfile <outfile>
 451         Output file name.
 452     --outfileFiltered <yes or no>  [default: no]
 453         Write out a file containing filtered molecules. Its name is automatically
 454         generated from the specified output file. Default: <OutfileRoot>_
 455         Filtered.<OutfileExt>.
 456     --outfileParams <Name,Value,...>  [default: auto]
 457         A comma delimited list of parameter name and value pairs for writing
 458         molecules to files. The supported parameter names for different file
 459         formats, along with their default values, are shown below:
 460             
 461             SD: compute2DCoords,auto,kekulize,no
 462             SMILES: kekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 463                 smilesTitleLine,yes
 464             
 465         Default value for compute2DCoords: yes for SMILES input file; no for all other
 466         file types.
 467     --overwrite
 468         Overwrite existing files.
 469     -p, --pattern <SMARTS>  [default: none]
 470         SMARTS pattern for performing search.
 471     -u, --useChirality <yes or no>  [default: no]
 472         Use stereochemistry information for SMARTS search.
 473     -w, --workingdir <dir>
 474         Location of working directory which defaults to the current directory.
 475 
 476 Examples:
 477     To retrieve molecules containing the substructure corresponding to a specified
 478     SMARTS pattern and write out a SMILES file, type: 
 479 
 480         % RDKitSearchSMARTS.py -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 481 
 482     To retrieve molecules containing the substructure corresponding to a specified
 483     SMARTS pattern,  perform filtering in multiprocessing mode on all available
 484     CPUs without loading all data into memory, and write out a SMILES file, type: 
 485 
 486         % RDKitSearchSMARTS.py --mp yes -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 487 
 488     To retrieve molecules containing the substructure corresponding to a specified
 489     SMARTS pattern,  perform filtering in multiprocessing mode on all available
 490     CPUs by loading all data into memory, and write out a SMILES file, type: 
 491 
 492         % RDKitSearchSMARTS.py --mp yes --mpParams "inputDataMode,InMemory"
 493           -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 494 
 495     To retrieve molecules containing the substructure corresponding to a specified
 496     SMARTS pattern,  perform filtering in multiprocessing mode on specific number
 497     of CPUs and chunk size without loading all data into memory, and write out
 498     a SMILES file, type: 
 499 
 500         % RDKitSearchSMARTS.py --mp yes --mpParams "inputDataMode,Lazy,
 501           numProcesses,4,chunkSize,8" -p 'c1ccccc1' -i Sample.smi -o SampleOut.smi
 502 
 503     To only count the number of molecules containing the substructure corresponding
 504     to a specified SMARTS pattern without writing out any file, type: 
 505 
 506         % RDKitSearchSMARTS.py -m count -p 'c1ccccc1' -i Sample.smi
 507 
 508     To count the number of molecules in a SD file not containing the substructure
 509     corresponding to a specified SMARTS pattern and write out a SD file, type: 
 510 
 511         % RDKitSearchSMARTS.py -n yes -p 'c1ccccc1' -i Sample.sdf -o SampleOut.sdf
 512 
 513     To retrieve molecules containing the substructure corresponding to a specified
 514     SMARTS pattern from a CSV SMILES file, SMILES strings in column 1, name in
 515     and write out a SD file, type: 
 516 
 517         % RDKitSearchSMARTS.py -p 'c1ccccc1' --infileParams
 518           "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1,
 519           smilesNameColumn,2" --outfileParams "compute2DCoords,yes"
 520           -i SampleSMILES.csv -o SampleOut.sdf
 521 
 522 Author:
 523     Manish Sud(msud@san.rr.com)
 524 
 525 See also:
 526     RDKitConvertFileFormat.py, RDKitFilterPAINS.py, RDKitSearchFunctionalGroups.py 
 527 
 528 Copyright:
 529     Copyright (C) 2019 Manish Sud. All rights reserved.
 530 
 531     The functionality available in this script is implemented using RDKit, an
 532     open source toolkit for cheminformatics developed by Greg Landrum.
 533 
 534     This file is part of MayaChemTools.
 535 
 536     MayaChemTools is free software; you can redistribute it and/or modify it under
 537     the terms of the GNU Lesser General Public License as published by the Free
 538     Software Foundation; either version 3 of the License, or (at your option) any
 539     later version.
 540 
 541 """
 542 
 543 if __name__ == "__main__":
 544     main()