MayaChemTools

    1 #!/bin/env python
    2 #
    3 # File: RDKitPerformPositionalAnalogueScan.py
    4 # Author: Manish Sud <msud@san.rr.com>
    5 #
    6 # Copyright (C) 2020 Manish Sud. All rights reserved.
    7 #
    8 # The functionality available in this script is implemented using RDKit, an
    9 # open source toolkit for cheminformatics developed by Greg Landrum.
   10 #
   11 # This file is part of MayaChemTools.
   12 #
   13 # MayaChemTools is free software; you can redistribute it and/or modify it under
   14 # the terms of the GNU Lesser General Public License as published by the Free
   15 # Software Foundation; either version 3 of the License, or (at your option) any
   16 # later version.
   17 #
   18 # MayaChemTools is distributed in the hope that it will be useful, but without
   19 # any warranty; without even the implied warranty of merchantability of fitness
   20 # for a particular purpose.  See the GNU Lesser General Public License for more
   21 # details.
   22 #
   23 # You should have received a copy of the GNU Lesser General Public License
   24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   26 # Boston, MA, 02111-1307, USA.
   27 #
   28 
   29 from __future__ import print_function
   30 
   31 # Add local python path to the global path and import standard library modules...
   32 import os
   33 import sys;  sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python"))
   34 import time
   35 import re
   36 import multiprocessing as mp
   37 from itertools import combinations
   38 from itertools import permutations
   39 
   40 # RDKit imports...
   41 try:
   42     from rdkit import rdBase
   43     from rdkit import Chem
   44     from rdkit.Chem import AllChem
   45 except ImportError as ErrMsg:
   46     sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg)
   47     sys.stderr.write("Check/update your RDKit environment and try again.\n\n")
   48     sys.exit(1)
   49 
   50 # MayaChemTools imports...
   51 try:
   52     from docopt import docopt
   53     import MiscUtil
   54     import RDKitUtil
   55 except ImportError as ErrMsg:
   56     sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg)
   57     sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n")
   58     sys.exit(1)
   59 
   60 ScriptName = os.path.basename(sys.argv[0])
   61 Options = {}
   62 OptionsInfo = {}
   63 
   64 def main():
   65     """Start execution of the script"""
   66     
   67     MiscUtil.PrintInfo("\n%s (RDK v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, time.asctime()))
   68     
   69     (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime()
   70     
   71     # Retrieve command line arguments and options...
   72     RetrieveOptions()
   73     
   74     # Process and validate command line arguments and options...
   75     ProcessOptions()
   76     
   77     # Perform actions required by the script...
   78     PerformPositionalAnalogueScan()
   79     
   80     MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName)
   81     MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime))
   82 
   83 def PerformPositionalAnalogueScan():
   84     """Perform positional analogue scan."""
   85     
   86     # Setup a molecule reader for input file...
   87     MiscUtil.PrintInfo("\nProcessing file %s..." % OptionsInfo["Infile"])
   88     OptionsInfo["InfileParams"]["AllowEmptyMols"] = True
   89     Mols  = RDKitUtil.ReadMolecules(OptionsInfo["Infile"], **OptionsInfo["InfileParams"])
   90 
   91     # Set up a molecule writer...
   92     Writer = RDKitUtil.MoleculesWriter(OptionsInfo["Outfile"], **OptionsInfo["OutfileParams"])
   93     if Writer is None:
   94         MiscUtil.PrintError("Failed to setup a writer for output fie %s " % OptionsInfo["Outfile"])
   95     MiscUtil.PrintInfo("Generating file %s..." % OptionsInfo["Outfile"])
   96 
   97     MolCount, ValidMolCount, SearchPatternMissingCount, AnaloguesGenerationFailedCount, AnaloguesCount = ProcessMolecules(Mols, Writer)
   98 
   99     if Writer is not None:
  100         Writer.close()
  101     
  102     MiscUtil.PrintInfo("\nTotal number of molecules: %d" % MolCount)
  103     MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount)
  104     if not OptionsInfo["RxnSMARTSMode"]:
  105         MiscUtil.PrintInfo("Number of molecules with missing search pattern: %d" % SearchPatternMissingCount)
  106     MiscUtil.PrintInfo("Number of molecules failed during generation of analogues: %d" % AnaloguesGenerationFailedCount)
  107     MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount + SearchPatternMissingCount + AnaloguesGenerationFailedCount))
  108     
  109     MiscUtil.PrintInfo("\nTotal number of analogues for valid molecules: %d" % AnaloguesCount)
  110     MiscUtil.PrintInfo("Average number of analogues per valid molecule: %d" % (int(AnaloguesCount/(ValidMolCount))))
  111 
  112 def ProcessMolecules(Mols, Writer):
  113     """Process molecules to generate analogues."""
  114     
  115     if OptionsInfo["MPMode"]:
  116         return ProcessMoleculesUsingMultipleProcesses( Mols, Writer)
  117     else:
  118         return ProcessMoleculesUsingSingleProcess(Mols, Writer)
  119 
  120 def ProcessMoleculesUsingSingleProcess(Mols, Writer):
  121     """Process molecules to generate analogues using a single process."""
  122 
  123     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
  124     SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]
  125 
  126     (MolCount, ValidMolCount, SearchPatternMissingCount, AnaloguesGenerationFailedCount, AnaloguesCount) = [0] * 5
  127 
  128     # Initialize search and target patterns info...
  129     SetupSearchAndTargetPatternsInfo()
  130 
  131     FirstMol = True
  132     for Mol in Mols:
  133         MolCount += 1
  134         
  135         if Mol is None:
  136             continue
  137         
  138         if RDKitUtil.IsMolEmpty(Mol):
  139             if not OptionsInfo["QuietMode"]:
  140                 MolName = RDKitUtil.GetMolName(Mol, MolCount)
  141                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  142             continue
  143         ValidMolCount += 1
  144 
  145         MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus = GenerateMolAnalogues(Mol, MolCount)
  146         
  147         if FirstMol:
  148             FirstMol = False
  149             if SetSMILESMolProps:
  150                 if Writer is not None:
  151                     RDKitUtil.SetWriterMolProps(Writer, Mol)
  152         
  153         if not OptionsInfo["RxnSMARTSMode"]:
  154             if SearchPatternMissingStatus:
  155                 SearchPatternMissingCount += 1
  156                 WriteMolecule(Writer, Mol, Compute2DCoords)
  157                 continue
  158         
  159         if not AnaloguesGenerationStatus:
  160             AnaloguesGenerationFailedCount += 1
  161             WriteMolecule(Writer, Mol, Compute2DCoords)
  162             continue
  163         
  164         # Write out molecues and analogues...
  165         WriteMolAnalogues(Writer, Mol, MolCount, MolAnalogues, Compute2DCoords)
  166         
  167         AnaloguesCount += len(MolAnalogues)
  168 
  169     return (MolCount, ValidMolCount, SearchPatternMissingCount, AnaloguesGenerationFailedCount, AnaloguesCount)
  170 
  171 def ProcessMoleculesUsingMultipleProcesses(Mols, Writer):
  172     """Process molecules to generate analogues using multiprocessing."""
  173 
  174     MPParams = OptionsInfo["MPParams"]
  175     
  176     # Setup data for initializing a worker process...
  177     MiscUtil.PrintInfo("Encoding options info...")
  178 
  179     InitializeWorkerProcessArgs = (MiscUtil.ObjectToBase64EncodedString(Options), MiscUtil.ObjectToBase64EncodedString(OptionsInfo))
  180     
  181     # Setup a encoded mols data iterable for a worker process...
  182     WorkerProcessDataIterable = RDKitUtil.GenerateBase64EncodedMolStrings(Mols)
  183 
  184     # Setup process pool along with data initialization for each process...
  185     MiscUtil.PrintInfo("\nConfiguring multiprocessing using %s method..." % ("mp.Pool.imap()" if re.match("^Lazy$", MPParams["InputDataMode"], re.I) else "mp.Pool.map()"))
  186     MiscUtil.PrintInfo("NumProcesses: %s; InputDataMode: %s; ChunkSize: %s\n" % (MPParams["NumProcesses"], MPParams["InputDataMode"], ("automatic" if MPParams["ChunkSize"] is None else MPParams["ChunkSize"])))
  187     
  188     ProcessPool = mp.Pool(MPParams["NumProcesses"], InitializeWorkerProcess, InitializeWorkerProcessArgs)
  189     
  190     # Start processing...
  191     if re.match("^Lazy$", MPParams["InputDataMode"], re.I):
  192         Results = ProcessPool.imap(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  193     elif re.match("^InMemory$", MPParams["InputDataMode"], re.I):
  194         Results = ProcessPool.map(WorkerProcess, WorkerProcessDataIterable, MPParams["ChunkSize"])
  195     else:
  196         MiscUtil.PrintError("The value, %s, specified for \"--inputDataMode\" is not supported." % (MPParams["InputDataMode"]))
  197     
  198     Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"]
  199     SetSMILESMolProps = OptionsInfo["OutfileParams"]["SetSMILESMolProps"]
  200 
  201     (MolCount, ValidMolCount, SearchPatternMissingCount, AnaloguesGenerationFailedCount, AnaloguesCount) = [0] * 5
  202     FirstMol = True
  203     for Result in Results:
  204         MolCount += 1
  205         MolIndex, EncodedMol, EncodedMolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus  = Result
  206         
  207         if EncodedMol is None:
  208             continue
  209         ValidMolCount += 1
  210 
  211         Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  212         
  213         if FirstMol:
  214             FirstMol = False
  215             if SetSMILESMolProps:
  216                 if Writer is not None:
  217                     RDKitUtil.SetWriterMolProps(Writer, Mol)
  218         
  219         if not OptionsInfo["RxnSMARTSMode"]:
  220             if SearchPatternMissingStatus:
  221                 SearchPatternMissingCount += 1
  222                 WriteMolecule(Writer, Mol, Compute2DCoords)
  223                 continue
  224         
  225         if not AnaloguesGenerationStatus:
  226             AnaloguesGenerationFailedCount += 1
  227             WriteMolecule(Writer, Mol, Compute2DCoords)
  228             continue
  229         
  230         MolAnalogues = [RDKitUtil.MolFromBase64EncodedMolString(EncodedMolAnalogue) for EncodedMolAnalogue in EncodedMolAnalogues]
  231         
  232         # Write out molecues and analogues...
  233         WriteMolAnalogues(Writer, Mol, MolCount, MolAnalogues, Compute2DCoords)
  234         
  235         AnaloguesCount += len(MolAnalogues)
  236     
  237     return (MolCount, ValidMolCount, SearchPatternMissingCount, AnaloguesGenerationFailedCount, AnaloguesCount)
  238     
  239 def InitializeWorkerProcess(*EncodedArgs):
  240     """Initialize data for a worker process."""
  241     
  242     global Options, OptionsInfo
  243 
  244     MiscUtil.PrintInfo("Starting process (PID: %s)..." % os.getpid())
  245 
  246     # Decode Options and OptionInfo...
  247     Options = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[0])
  248     OptionsInfo = MiscUtil.ObjectFromBase64EncodedString(EncodedArgs[1])
  249     
  250     # Initialize search and target  patterns info...
  251     SetupSearchAndTargetPatternsInfo()
  252 
  253 def WorkerProcess(EncodedMolInfo):
  254     """Process data for a worker process."""
  255 
  256     MolIndex, EncodedMol = EncodedMolInfo
  257 
  258     MolAnalogues = None
  259     SearchPatternMissingStatus = False
  260     AnaloguesGenerationStatus = True
  261     
  262     if EncodedMol is None:
  263         return [MolIndex, None, MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus]
  264 
  265     Mol = RDKitUtil.MolFromBase64EncodedMolString(EncodedMol)
  266     if RDKitUtil.IsMolEmpty(Mol):
  267         if not OptionsInfo["QuietMode"]:
  268             MolName = RDKitUtil.GetMolName(Mol, (MolIndex + 1))
  269             MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
  270         return [MolIndex, None, MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus]
  271 
  272     MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus = GenerateMolAnalogues(Mol, (MolIndex +1))
  273 
  274     EncodedMolAnalogues = None
  275     if MolAnalogues is not None:
  276         EncodedMolAnalogues = [RDKitUtil.MolToBase64EncodedMolString(MolAnalogue, PropertyPickleFlags = Chem.PropertyPickleOptions.MolProps | Chem.PropertyPickleOptions.PrivateProps) for MolAnalogue in MolAnalogues]
  277     
  278     return [MolIndex, EncodedMol, EncodedMolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus]
  279 
  280 def GenerateMolAnalogues(Mol, MolNum = None):
  281     """Generate analogues."""
  282 
  283     if OptionsInfo["AttachAtomsMode"] or OptionsInfo["ReplaceAtomsMode"] or OptionsInfo["AttachSMILESMode"]:
  284         return GenerateMolAnaloguesForMultipleScanModes(Mol, MolNum)
  285     elif OptionsInfo["RxnSMARTSMode"]:
  286         return GenerateMolAnaloguesForRxnSMARTSMode(Mol, MolNum)
  287     elif OptionsInfo["TandemScanMode"]:
  288         return GenerateMolAnaloguesForTandemScanMode(Mol, MolNum)
  289     else:
  290         MiscUtil.PrintError("Failed to generate analogues.  The mode value, %s, specified using \"-m, --mode\" is not supported." % (OptionsInfo["Mode"]))
  291 
  292 def GenerateMolAnaloguesForMultipleScanModes(Mol, MolNum = None):
  293     """Generate analogues for the following scan modes: AttachAtom, ReplaceAtom
  294     or AttachSMILES."""
  295 
  296     MolAnalogues = None
  297     SearchPatternMissingStatus = False
  298     AnaloguesGenerationStatus = True
  299 
  300     # Retrieve matched atoms...
  301     MolMatchedAtomIndices = SetupMolMatchedAtomIndices(Mol, MolNum)
  302     if MolMatchedAtomIndices is None:
  303         SearchPatternMissingStatus = True
  304         return (MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus)
  305 
  306     MolAnalogues = []
  307     MolAnaloguesSet = set()
  308 
  309     TargetPatternInfo = OptionsInfo["TargetPatternInfo"]
  310     for TargetPatternIndex, TargetPatternObject in enumerate(TargetPatternInfo["PatternObjects"]):
  311         for AtomIndicesCombination in combinations(MolMatchedAtomIndices, OptionsInfo["ComboSearchAtoms"]):
  312             MolAnalogue = Chem.RWMol(Mol)
  313         
  314             for AtomIndex in AtomIndicesCombination:
  315                 if OptionsInfo["AttachAtomsMode"]:
  316                     AtomicNumber = TargetPatternObject
  317                     MolAnalogue = AttachAtom(MolAnalogue, AtomIndex, AtomicNumber, BondOrder = Chem.rdchem.BondType.SINGLE)
  318                 elif OptionsInfo["ReplaceAtomsMode"]:
  319                     AtomicNumber = TargetPatternObject
  320                     MolAnalogue = ReplaceAtom(MolAnalogue, AtomIndex, AtomicNumber)
  321                 elif OptionsInfo["AttachSMILESMode"]:
  322                     AttachAtomIndex = MolAnalogue.GetNumAtoms() - 1 + TargetPatternInfo["AttachSMILESAtomIndexOffsets"][TargetPatternIndex]
  323                     SMILESMol = TargetPatternObject
  324                     MolAnalogue = AttachMol(MolAnalogue, AtomIndex, SMILESMol, AttachAtomIndex, BondOrder = Chem.rdchem.BondType.SINGLE)
  325                 
  326             if  SanitizeMolAnalogue(MolAnalogue, MolNum):
  327                 TrackAnalogues(MolAnalogue, MolAnaloguesSet, MolAnalogues)
  328         
  329     if len(MolAnalogues) == 0:
  330         AnaloguesGenerationStatus = False
  331         
  332     return (MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus)
  333 
  334 def GenerateMolAnaloguesForRxnSMARTSMode(Mol, MolNum = None):
  335     """Generate analogues for reaction SMARTS mode."""
  336 
  337     SearchPatternMissingStatus = False
  338     AnaloguesGenerationStatus = True
  339 
  340     MolAnalogues = []
  341     MolAnaloguesSet = set()
  342     
  343     TargetPatternInfo = OptionsInfo["TargetPatternInfo"]
  344     for Rxn in TargetPatternInfo["PatternObjects"]:
  345         RxnProducts = Rxn.RunReactants((Mol,))
  346         
  347         if not len(RxnProducts):
  348             continue
  349         
  350         for RxnProduct in RxnProducts:
  351             MolAnalogue = RxnProduct[0]
  352             
  353             if  SanitizeMolAnalogue(MolAnalogue, MolNum):
  354                 TrackAnalogues(MolAnalogue, MolAnaloguesSet, MolAnalogues)
  355     
  356     if len(MolAnalogues) == 0:
  357         AnaloguesGenerationStatus = False
  358         
  359     return (MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus)
  360 
  361 def GenerateMolAnaloguesForTandemScanMode(Mol, MolNum = None):
  362     """Generate analogues for tandem scan mode."""
  363 
  364     MolAnalogues = None
  365     SearchPatternMissingStatus = False
  366     AnaloguesGenerationStatus = True
  367 
  368     # Retrieve matched atoms...
  369     MolMatchedAtomIndices = SetupMolMatchedAtomIndices(Mol, MolNum)
  370     if MolMatchedAtomIndices is None:
  371         SearchPatternMissingStatus = True
  372         return (MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus)
  373 
  374     TargetPatternTandemScanInfo = OptionsInfo["TargetPatternTandemScanInfo"]
  375     NumOfTandemOperations = 0
  376     if TargetPatternTandemScanInfo is not None and len(TargetPatternTandemScanInfo):
  377         NumOfTandemOperations = len(TargetPatternTandemScanInfo[0]["Names"])
  378     
  379     if  len(MolMatchedAtomIndices) < NumOfTandemOperations:
  380         if not OptionsInfo["QuietMode"]:
  381             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  382             MiscUtil.PrintWarning("Number of matched search pattern atoms, %s, must be >= number of specified operations, %s, during TandemScan mode for molecule %s..." % (len(MolMatchedAtomIndices), NumOfTandemOperations, MolName))
  383         SearchPatternMissingStatus = True
  384         return (MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus)
  385     
  386     MolAnalogues = []
  387     MolAnaloguesSet = set()
  388     for AtomIndicesCombination in combinations(MolMatchedAtomIndices, NumOfTandemOperations):
  389         AtomIndicesPermutations = []
  390         if OptionsInfo["PermuteTandemScan"]:
  391             for AtomIndicesPermutation in permutations(AtomIndicesCombination, NumOfTandemOperations):
  392                 AtomIndicesPermutations.append(list(AtomIndicesPermutation))
  393         else:
  394             AtomIndicesPermutations.append(list(AtomIndicesCombination))
  395         
  396         for AtomIndicesPermutation in AtomIndicesPermutations:
  397             for PatternInfo in TargetPatternTandemScanInfo:
  398                 MolAnalogue = Chem.RWMol(Mol)
  399                 
  400                 for NameIndex, Name in enumerate(PatternInfo["Names"]):
  401                     AtomIndex = AtomIndicesPermutation[NameIndex]
  402                     PatternObject = PatternInfo["PatternObjects"][Name]
  403                     
  404                     if re.match("^AttachAtoms$", Name, re.I):
  405                         MolAnalogue = AttachAtom(MolAnalogue, AtomIndex, PatternObject, BondOrder = Chem.rdchem.BondType.SINGLE)
  406                     elif re.match("^ReplaceAtoms$", Name, re.I):
  407                         MolAnalogue = ReplaceAtom(MolAnalogue, AtomIndex, PatternObject)
  408                     elif re.match("^AttachSMILES$", Name, re.I):
  409                         AttachAtomIndex = MolAnalogue.GetNumAtoms() - 1 + PatternInfo["AttachSMILESAtomIndexOffsets"][Name]
  410                         MolAnalogue = AttachMol(MolAnalogue, AtomIndex, PatternObject, AttachAtomIndex, BondOrder = Chem.rdchem.BondType.SINGLE)
  411                         MolAnalogue = Chem.RWMol(MolAnalogue)
  412                     
  413                 if not SanitizeMolAnalogue(MolAnalogue, MolNum):
  414                     continue
  415                 
  416                 TrackAnalogues(MolAnalogue, MolAnaloguesSet, MolAnalogues)
  417             
  418     if len(MolAnalogues) == 0:
  419         AnaloguesGenerationStatus = False
  420         
  421     return (MolAnalogues, SearchPatternMissingStatus, AnaloguesGenerationStatus)
  422 
  423 def TrackAnalogues(MolAnalogue, MolAnaloguesSet, MolAnalogues):
  424     """Track analogues. """
  425     
  426     if OptionsInfo["UniqueAnalogues"]:
  427         AnalogueSMILES = Chem.MolToSmiles(Chem.RemoveHs(MolAnalogue))
  428         if AnalogueSMILES not in MolAnaloguesSet:
  429             MolAnaloguesSet.add(AnalogueSMILES)
  430             MolAnalogues.append(MolAnalogue)
  431     else:
  432         MolAnalogues.append(MolAnalogue)
  433     
  434 def ReplaceAtom(Mol, AtomIndex, AtomicNumber):
  435     """Replace an existing atom with a new atom in a molecule."""
  436     
  437     Atom = Mol.GetAtomWithIdx(AtomIndex)
  438     Atom.SetAtomicNum(AtomicNumber)
  439 
  440     return Mol
  441 
  442 def AttachAtom(Mol, AtomIndex, AtomicNumber, BondOrder = Chem.rdchem.BondType.SINGLE):
  443     """Attach atom to an existing atom in a molecule."""
  444     
  445     NewAtomIndex = Mol.AddAtom(Chem.Atom(AtomicNumber))
  446     Mol.AddBond(AtomIndex, NewAtomIndex, BondOrder)
  447 
  448     return Mol
  449 
  450 def AttachMol(Mol, AtomIndex, AttachMol, AttachAtomIndex, BondOrder = Chem.rdchem.BondType.SINGLE):
  451     """Attach molecule to an exisiting molecule."""
  452     
  453     NewMol = Chem.CombineMols(Mol, AttachMol)
  454     
  455     NewMol = Chem.EditableMol(NewMol)
  456     NewMol.AddBond(AtomIndex, AttachAtomIndex, order = BondOrder)
  457     NewMol = NewMol.GetMol()
  458     
  459     return NewMol
  460 
  461 def SanitizeMolAnalogue(Mol, MolNum):
  462     """Sanitize analogue and catch any errors. """
  463     
  464     Status = True
  465     try:
  466         Chem.SanitizeMol(Mol)
  467     except Exception as ErrMsg:
  468         if not OptionsInfo["QuietMode"]:
  469             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  470             MiscUtil.PrintInfo("\n%s\nFailed to sanitize analogue of molecule %s...\nIgnoring analogue..." % (ErrMsg, MolName))
  471         Status = False
  472 
  473     return Status
  474 
  475 def SetupMolMatchedAtomIndices(Mol, MolNum):
  476     """Setup a list of matched atom indices corresponding to first atom in search
  477     SMARTS pattern for a molecule."""
  478 
  479     MatchedAtoms = []
  480     
  481     # Retrieve first atom match for PAS...
  482     MatchedAtomIndices = [AtomIndices[0] for AtomIndices in Mol.GetSubstructMatches(OptionsInfo["SearchPatternMol"])]
  483     if len(MatchedAtomIndices) == 0:
  484         if not OptionsInfo["QuietMode"]:
  485             MolName = RDKitUtil.GetMolName(Mol, MolNum)
  486             MiscUtil.PrintWarning("No search pattern match for molecule %s..." % MolName)
  487         return None
  488     
  489     return MatchedAtomIndices
  490 
  491 def WriteMolAnalogues(Writer, Mol, MolNum, MolAnalogues, Compute2DCoords):
  492     """Write molecule analogues. """
  493 
  494     # Write out molecule...
  495     WriteMolecule(Writer, Mol, Compute2DCoords)
  496     
  497     if MolAnalogues is None:
  498         return
  499 
  500     # Write out analogues...
  501     for Index, MolAnalogue in enumerate(MolAnalogues):
  502         MolName = RDKitUtil.GetMolName(Mol, MolNum)
  503         SetMolAnalogueName(MolAnalogue, MolName, (Index + 1))
  504 
  505         WriteMolecule(Writer, MolAnalogue, Compute2DCoords)
  506     
  507 def SetMolAnalogueName(MolAnalogue, MolName, AnalogueCount):
  508     """Set analogue name."""
  509     
  510     NameSuffix = OptionsInfo["NameSuffix"]
  511     AnalogueName = "%s_%s%d" % (MolName, NameSuffix, AnalogueCount)
  512     MolAnalogue.SetProp("_Name", AnalogueName)
  513 
  514 def WriteMolecule(Writer, Mol, Compute2DCoords):
  515     """Write out molecule."""
  516     
  517     if Compute2DCoords:
  518         AllChem.Compute2DCoords(Mol)
  519     
  520     Writer.write(Mol)
  521 
  522 def SetupSearchAndTargetPatternsInfo():
  523     """Setup search and target patterns info."""
  524 
  525     # Setup search pattern molecule...
  526     SearchPattern = OptionsInfo["SearchPattern"]
  527     SearchPatternMol = None
  528     if SearchPattern is not None:
  529         SearchPatternMol = Chem.MolFromSmarts(SearchPattern)
  530         if SearchPatternMol is None:
  531             MiscUtil.PrintError("Failed to create search pattern molecule. The torsion SMILES/SMARTS pattern, \"%s\", specified using \"-s, --searchPattern\" option is not valid." % (SearchPattern))
  532     OptionsInfo["SearchPatternMol"] = SearchPatternMol
  533 
  534     # Setup target pattern information...
  535     Mode = OptionsInfo["Mode"]
  536     TargetPattern = OptionsInfo["TargetPattern"]
  537 
  538     TargetPatternInfo = OptionsInfo["TargetPatternInfo"]
  539     if TargetPatternInfo is not None:
  540         TargetPatternInfo["PatternObjects"] = []
  541         TargetPatternInfo["AttachSMILESAtomIndexOffsets"] = []
  542             
  543     if re.match("^(AttachAtoms|ReplaceAtoms)$", Mode, re.I):
  544         for Pattern in TargetPatternInfo["Patterns"]:
  545             TargetPatternInfo["PatternObjects"].append(Chem.GetPeriodicTable().GetAtomicNumber(Pattern))
  546             TargetPatternInfo["AttachSMILESAtomIndexOffsets"].append(None)
  547     elif re.match("^AttachSMILES$", Mode, re.I):
  548         for Pattern in TargetPatternInfo["Patterns"]:
  549             SMILESMol = Chem.MolFromSmiles(Pattern)
  550             AttachAtomIndexOffset = GetAttachSMILESAtomIndexOffset(SMILESMol)
  551             
  552             TargetPatternInfo["PatternObjects"].append(SMILESMol)
  553             TargetPatternInfo["AttachSMILESAtomIndexOffsets"].append(AttachAtomIndexOffset)
  554     elif re.match("^RxnSMARTS$", Mode, re.I):
  555         for Pattern in TargetPatternInfo["Patterns"]:
  556             TargetPatternInfo["PatternObjects"].append(AllChem.ReactionFromSmarts(Pattern))
  557             TargetPatternInfo["AttachSMILESAtomIndexOffsets"].append(None)
  558     elif re.match("^TandemScan$", Mode, re.I):
  559         TargetPatternTandemScanInfo = OptionsInfo["TargetPatternTandemScanInfo"]
  560         for PatternInfo in TargetPatternTandemScanInfo:
  561             PatternInfo["PatternObjects"] = {}
  562             PatternInfo["AttachSMILESAtomIndexOffsets"] = {}
  563             for Name in PatternInfo["Names"]:
  564                 Pattern = PatternInfo["Pattern"][Name]
  565                 if re.match("^(AttachAtoms|ReplaceAtoms)$", Name, re.I):
  566                     PatternInfo["PatternObjects"][Name] = Chem.GetPeriodicTable().GetAtomicNumber(Pattern)
  567                     PatternInfo["AttachSMILESAtomIndexOffsets"][Name] = None
  568                 elif re.match("^AttachSMILES$", Name, re.I):
  569                     SMILESMol = Chem.MolFromSmiles(Pattern)
  570                     AttachAtomIndexOffset = GetAttachSMILESAtomIndexOffset(SMILESMol)
  571                     
  572                     PatternInfo["PatternObjects"][Name] = SMILESMol
  573                     PatternInfo["AttachSMILESAtomIndexOffsets"][Name] = AttachAtomIndexOffset
  574 
  575 def GetAttachSMILESAtomIndexOffset(Mol):
  576     """Get attach atom index offset and optionally clear atom map numbers. """
  577     
  578     AttachAtomIndexOffset = 1
  579     
  580     AtomMapIndices  = RDKitUtil.GetAtomMapIndices(Mol)
  581     if AtomMapIndices is not None and len(AtomMapIndices):
  582         # Use the atom index corresponding to the lowest atom map number
  583         # as the attachement atom. The AtomMapIndices list contains atom indices
  584         # sorted by atom map numbers.
  585         
  586         AttachAtomIndexOffset = AtomMapIndices[0] + 1
  587         
  588         if OptionsInfo["ClearAtomMapNumbers"]:
  589             RDKitUtil.ClearAtomMapNumbers(Mol)
  590 
  591     return AttachAtomIndexOffset
  592     
  593 def ProcessOptions():
  594     """Process and validate command line arguments and options"""
  595     
  596     MiscUtil.PrintInfo("Processing options...")
  597     
  598     # Validate options...
  599     ValidateOptions()
  600 
  601     OptionsInfo["ComboSearchAtoms"] = int(Options["--comboSearchAtoms"])
  602     OptionsInfo["ClearAtomMapNumbers"] = True if re.match("^yes$", Options["--clearAtomMapNumbers"], re.I) else False
  603 
  604     OptionsInfo["Infile"] = Options["--infile"]
  605     OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"])
  606     
  607     OptionsInfo["Outfile"] = Options["--outfile"]
  608     OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"])
  609     OptionsInfo["Overwrite"] = Options["--overwrite"]
  610 
  611     OptionsInfo["Mode"] = Options["--mode"]
  612     (AttachAtomsMode, ReplaceAtomsMode, AttachSMILESMode, RxnSMARTSMode, TandemScanMode) = [False] * 5
  613     Mode = OptionsInfo["Mode"]
  614     if re.match("^AttachAtoms$", Mode, re.I):
  615         AttachAtomsMode = True
  616     elif re.match("^ReplaceAtoms$", Mode, re.I):
  617         ReplaceAtomsMode = True
  618     elif re.match("^AttachSMILES$", Mode, re.I):
  619         AttachSMILESMode = True
  620     elif re.match("^RxnSMARTS$", Mode, re.I):
  621         RxnSMARTSMode = True
  622     elif re.match("^TandemScan$", Mode, re.I):
  623         TandemScanMode = True
  624     else:
  625         MiscUtil.PrintError("The mode value , %s, specified using \"-m, --mode\" option is not valid" % (Mode))
  626     OptionsInfo["AttachAtomsMode"] = AttachAtomsMode
  627     OptionsInfo["ReplaceAtomsMode"] = ReplaceAtomsMode
  628     OptionsInfo["AttachSMILESMode"] = AttachSMILESMode
  629     OptionsInfo["RxnSMARTSMode"] = RxnSMARTSMode
  630     OptionsInfo["TandemScanMode"] = TandemScanMode
  631 
  632     OptionsInfo["MPMode"] = True if re.match("^yes$", Options["--mp"], re.I) else False
  633     OptionsInfo["MPParams"] = MiscUtil.ProcessOptionMultiprocessingParameters("--mpParams", Options["--mpParams"])
  634     
  635     OptionsInfo["NameSuffix"] = Options["--nameSuffix"]
  636     OptionsInfo["PermuteTandemScan"] = True if re.match("^yes$", Options["--permuteTandemScan"], re.I) else False
  637     OptionsInfo["QuietMode"] = True if re.match("^yes$", Options["--quiet"], re.I) else False
  638     
  639     OptionsInfo["UniqueAnalogues"] = True if re.match("^yes$", Options["--uniqueAnalogues"], re.I) else False
  640     
  641     # Process and validate search pattern...
  642     SearchPattern = re.sub(" ", "", Options["--searchPattern"])
  643     if MiscUtil.IsEmpty(SearchPattern):
  644         MiscUtil.PrintError("No value specified using \"-s, --searchPattern\" option.")
  645     if re.match("^auto$", SearchPattern, re.I):
  646         if re.match("^(AttachAtoms|ReplaceAtoms|AttachSMILES|TandemScan)$", Mode, re.I):
  647             SearchPattern = "[cH]"
  648         else:
  649             SearchPattern = None
  650     else:
  651         if re.match("^RxnSMARTS$", Mode, re.I):
  652             MiscUtil.PrintError("The specification of SMARTS search pattern, %s,  using \"-s, --searchPattern\" option is not allowed during, %s, value of \"-m, --mode\"." % (SearchPattern, Mode))
  653 
  654     if SearchPattern is not None:
  655         SearchPatternMol = Chem.MolFromSmarts(SearchPattern)
  656         if SearchPatternMol is None:
  657             MiscUtil.PrintError("Failed to create search pattern molecule. The torsion SMILES/SMARTS pattern, \"%s\", specified using \"-s, --searchPattern\" option is not valid." % (SearchPattern))
  658     OptionsInfo["SearchPattern"] = SearchPattern
  659         
  660     # Process and validate target pattern...
  661     TargetPattern = Options["--targetPattern"]
  662     if MiscUtil.IsEmpty(TargetPattern):
  663         MiscUtil.PrintError("No value specified using \"-t, --targetPattern\" option.")
  664     TargetPattern = TargetPattern.strip()
  665         
  666     if re.match("^auto$", TargetPattern, re.I):
  667         # Setup default target pattern...
  668         if re.match("^AttachAtoms$", Mode, re.I):
  669             TargetPattern = "F"
  670         elif re.match("^ReplaceAtoms$", Mode, re.I):
  671             TargetPattern = "N"
  672         elif re.match("^AttachSMILES$", Mode, re.I):
  673             TargetPattern = "C(F)(F)(F)"
  674         elif re.match("^RxnSMARTS$", Mode, re.I):
  675             TargetPattern = "[cH:1]>>[N:1]"
  676         elif re.match("^TandemScan$", Mode, re.I):
  677             TargetPattern = "ReplaceAtoms,N,AttachAtoms,F"
  678         else:
  679             MiscUtil.PrintError("Failed to setup default target pattern for mode, %s, specified using \"-m, --mode\"" % (Mode))
  680     
  681     TargetPatternInfo = None
  682     TargetPatternTandemScanInfo = None
  683     
  684     if re.match("^(AttachAtoms|ReplaceAtoms)$", Mode, re.I):
  685         TargetPatternInfo = InitializeTargetPatternInfo(TargetPattern)
  686         for ElementSymbol in TargetPatternInfo["Patterns"]:
  687             if not RDKitUtil.IsValidElementSymbol(ElementSymbol):
  688                 MiscUtil.PrintError("The target pattern, \"%s\", specified using \"-t, --targetPattern\" must be a valid atomic symbol during, %s, value of \"-m, --mode\" option." % (ElementSymbol, Mode))
  689     elif re.match("^AttachSMILES$", Mode, re.I):
  690         TargetPatternInfo = InitializeTargetPatternInfo(TargetPattern)
  691         for SMILES in TargetPatternInfo["Patterns"]:
  692             ValidateSMILESTargetPattern(SMILES, Mode)
  693     elif re.match("^RxnSMARTS$", Mode, re.I):
  694         TargetPatternInfo = InitializeTargetPatternInfo(TargetPattern)
  695         for RxnPattern in TargetPatternInfo["Patterns"]:
  696             try:
  697                 Rxn = AllChem.ReactionFromSmarts(RxnPattern)
  698             except Exception as ErrMsg:
  699                 MiscUtil.PrintError("The target pattern, \"%s\", specified using \"-t, --targetPattern\" must be a valid RxnSMARTS  during, %s, value of \"-m, --mode\" option.\nErrMsg: %s" % (RxnPattern, Mode, ErrMsg))
  700     elif re.match("^TandemScan$", Mode, re.I):
  701         TargetPatternTandemScanInfo = ProcessTandemScanTargetPattern(TargetPattern)
  702 
  703     # Track target pattern info...
  704     OptionsInfo["TargetPattern"] = TargetPattern
  705     OptionsInfo["TargetPatternInfo"] = TargetPatternInfo
  706     OptionsInfo["TargetPatternTandemScanInfo"] = TargetPatternTandemScanInfo
  707     
  708 def InitializeTargetPatternInfo(TargetPattern):
  709     """Initialize target pattern information. """
  710     
  711     TargetPatternInfo = {}
  712     TargetPatternInfo["Patterns"]  = TargetPattern.split()
  713 
  714     return TargetPatternInfo
  715 
  716 def ProcessTandemScanTargetPattern(TargetPattern):
  717     """Process target pattern for tandem scan. """
  718 
  719     # Validate target pattern...
  720     ValidatedTargetPatternInfo = ValidateTandemScanTargetPattern(TargetPattern)
  721 
  722     # Expand target patterns to cover various combinations...
  723     #
  724     # For example: "ReplaceAtoms,N,Attach Atoms, F Cl" expands to
  725     # "ReplaceAtoms,N,AttachAtoms, F,"ReplaceAtoms,N,AttachAtoms,Cl"
  726     #
  727     ExpandedPatterns = None
  728     for Name in ValidatedTargetPatternInfo["Names"]:
  729         Patterns = ValidatedTargetPatternInfo["Patterns"][Name]
  730         
  731         if ExpandedPatterns is None:
  732             ExpandedPatterns = []
  733             for Pattern in Patterns:
  734                 ExpandedPatterns.append([Name, Pattern])
  735             continue
  736 
  737         NewExpandedPatterns = []
  738         for ExpandedPattern in  ExpandedPatterns:
  739             for Pattern in Patterns:
  740                 NewExpandedPattern = []
  741                 NewExpandedPattern.extend(ExpandedPattern)
  742                 
  743                 NewExpandedPattern.append(Name)
  744                 NewExpandedPattern.append(Pattern)
  745                 
  746                 NewExpandedPatterns.append(NewExpandedPattern)
  747         
  748         ExpandedPatterns = NewExpandedPatterns
  749 
  750     # Track target pattern info...
  751     TargetPatternTandemScanInfo = []
  752     for ExpandedPattern in ExpandedPatterns:
  753         PatternInfo = {}
  754         PatternInfo["Names"] = []
  755         PatternInfo["Pattern"] = {}
  756 
  757         for Index in range(0, len(ExpandedPattern), 2):
  758             Name = ExpandedPattern[Index].strip()
  759             Pattern = ExpandedPattern[Index + 1].strip()
  760             PatternInfo["Names"].append(Name)
  761             PatternInfo["Pattern"][Name] = Pattern
  762 
  763         TargetPatternTandemScanInfo.append(PatternInfo)
  764 
  765     return TargetPatternTandemScanInfo
  766     
  767 def ValidateTandemScanTargetPattern(TargetPattern):
  768     """Validate target pattern for tandem scan. """
  769 
  770     ValidatedTargetPatternInfo = {}
  771     ValidatedTargetPatternInfo["Names"] = []
  772     ValidatedTargetPatternInfo["Patterns"] = {}
  773 
  774     Mode = OptionsInfo["Mode"]
  775     
  776     TargetPatternWords = TargetPattern.split(",")
  777     if len(TargetPatternWords) % 2:
  778         MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-t --targetPattern\" option must be an even number during, %s, value of \"-m, --mode\" option." % (len(TargetPatternWords), Mode))
  779     
  780     ValidOperationNames = ["AttachAtoms", "ReplaceAtoms", "AttachSMILES"]
  781     CanonicalOperationNamesMap = {}
  782     for OperationName in ValidOperationNames:
  783         CanonicalOperationNamesMap[OperationName.lower()] = OperationName
  784     
  785     # Validate PAS operation name and value pairs...
  786     for Index in range(0, len(TargetPatternWords), 2):
  787         OperationName = TargetPatternWords[Index].strip()
  788         OperationPattern = TargetPatternWords[Index + 1].strip()
  789         
  790         CanonicalOperationName = OperationName.lower()
  791         if  not CanonicalOperationName in CanonicalOperationNamesMap:
  792             MiscUtil.PrintError("The operation name, %s, specified using \"-t, --targetPattern\" is not a valid during, %s, value of \"-m, --mode\" option. Supported operation  names: %s" % (OperationName, Mode, " ".join(ValidOperationNames)))
  793             
  794         if not OperationPattern:
  795             MiscUtil.PrintError("Empty target pattern specified using \"-t, --targetPattern\" must be a valid atomic symbol for operation, %s, during, %s, value of \"-m, --mode\" option." % (OperationName, Mode))
  796 
  797         Name = CanonicalOperationNamesMap[CanonicalOperationName]
  798         if Name in ValidatedTargetPatternInfo["Names"]:
  799             MiscUtil.PrintError("Duplicate operation value, %s, specified using \"-t, --targetPattern\" during, %s, value of \"-m, --mode\" option." % (OperationName, Mode))
  800         
  801         OperationPatterns = OperationPattern.split()
  802         if re.match("^(AttachAtoms|ReplaceAtoms)$", OperationName, re.I):
  803             for Pattern in OperationPatterns:
  804                 if not RDKitUtil.IsValidElementSymbol(Pattern):
  805                     MiscUtil.PrintError("The target pattern, \"%s\", specified using \"-t, --targetPattern\" must be a valid atomic symbol for operation, %s, during, %s, value of \"-m, --mode\" option." % (Pattern, OperationName, Mode))
  806         elif re.match("^AttachSMILES$", OperationName, re.I):
  807             for Pattern in OperationPatterns:
  808                 ValidateSMILESTargetPattern(Pattern, Mode, OperationName)
  809 
  810         ValidatedTargetPatternInfo["Names"].append(Name)
  811         ValidatedTargetPatternInfo["Patterns"][Name] = OperationPatterns
  812 
  813     return ValidatedTargetPatternInfo
  814 
  815 def ValidateSMILESTargetPattern(TargetPattern, Mode, OperationName = None):
  816     """Validate SMILES pattern."""
  817 
  818     Mol = Chem.MolFromSmiles(TargetPattern)
  819     if Mol is None:
  820         if OperationName is None:
  821             MiscUtil.PrintError("The target pattern, \"%s\", specified using \"-t, --targetPattern\" must be a valid SMILES during, %s, value of \"-m, --mode\" option." % (TargetPattern, Mode))
  822         else:
  823             MiscUtil.PrintError("The target pattern, \"%s\", specified using \"-t, --targetPattern\" must be a valid SMILES for operation, %s, during, %s, value of \"-m, --mode\" option." % (TargetPattern, OperationName, Mode))
  824 
  825 def RetrieveOptions():
  826     """Retrieve command line arguments and options"""
  827     
  828     # Get options...
  829     global Options
  830     Options = docopt(_docoptUsage_)
  831 
  832     # Set current working directory to the specified directory...
  833     WorkingDir = Options["--workingdir"]
  834     if WorkingDir:
  835         os.chdir(WorkingDir)
  836     
  837     # Handle examples option...
  838     if "--examples" in Options and Options["--examples"]:
  839         MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_))
  840         sys.exit(0)
  841 
  842 def ValidateOptions():
  843     """Validate option values"""
  844     
  845     MiscUtil.ValidateOptionIntegerValue("-c, --comboSearchAtoms", Options["--comboSearchAtoms"], {">": 0})
  846     MiscUtil.ValidateOptionTextValue("--clearAtomMapNumbers", Options["--clearAtomMapNumbers"], "yes no")
  847     
  848     MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"])
  849     MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi txt csv tsv")
  850 
  851     MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi txt csv tsv")
  852     MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"])
  853     MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"])
  854     
  855     MiscUtil.ValidateOptionTextValue("-m, --mode", Options["--mode"], "ReplaceAtoms AttachAtoms AttachSMILES RxnSMARTS TandemScan")
  856     
  857     MiscUtil.ValidateOptionTextValue("--mp", Options["--mp"], "yes no")
  858     MiscUtil.ValidateOptionTextValue("-p, --permuteTandemScan", Options["--permuteTandemScan"], "yes no")
  859     MiscUtil.ValidateOptionTextValue("-q, --quiet", Options["--quiet"], "yes no")
  860     
  861     MiscUtil.ValidateOptionTextValue(" -u, --uniqueAnalogues", Options["--uniqueAnalogues"], "yes no")
  862 
  863 # Setup a usage string for docopt...
  864 _docoptUsage_ = """
  865 RDKitPerformPositionalAnalogueScan.py - Positional analogue scanning.
  866 
  867 Usage:
  868     RDKitPerformPositionalAnalogueScan.py [--comboSearchAtoms <number>] [--clearAtomMapNumbers <yes or no>]
  869                                           [--infileParams <Name,Value,...>] [--mode <ReplaceAtoms, AttachAtoms, AttachSMILES, RxnSMARTS, TandemScan>]
  870                                           [--mp <yes or no>] [--mpParams <Name,Value,...>] [ --nameSuffix <text>] [ --outfileParams <Name,Value,...> ]
  871                                           [--overwrite] [--permuteTandemScan <yes or no>] [--quiet <yes or no>] [--searchPattern <SMARTSPattern>]
  872                                           [--targetPattern <ElementSymbol, SMILES, RxnSMARTS,...>] [--uniqueAnalogues <yes or no> ]
  873                                           [-w <dir>] -i <infile>  -o <outfile> 
  874     RDKitPerformPositionalAnalogueScan.py -h | --help | -e | --examples
  875 
  876 Description:
  877     Perform Positional Analogue Scanning (PAS) to generate analogues of molecules
  878     by applying chemical transformations to molecules [ Ref 147-148 ]. The chemical
  879     transformations are defined using SMARTS, element symbols, SMILES, and 
  880     RxnSMARTS. Four different types of chemical transformations are available for
  881     for generating analogues of molecules: replace atoms, attach atoms, attach SMILES,
  882     and RxnSMARTS. Tandem positional analogue scanning may be performed by the
  883     concurrent application of multiple chemical transformations.
  884     
  885     A SMARTS search pattern identifies atoms in molecules for attachment or
  886     replacement points during positional analogue scanning. It may retrieve multiple
  887     substructure matches in a molecule. The first matched atom in each substructure
  888     match comprises a set of attachment or replacement points.
  889     
  890     A target pattern encompasses information regarding element symbol, SMILES, and
  891     reaction SMARTS for replacing and attaching atoms, attaching SMILES, and applying
  892     reaction SMARTS. In addition, multiple concurrent chemical transformations may
  893     be specified during tandem positional analogue scanning. 
  894     
  895     The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi,
  896     .csv, .tsv .txt)
  897 
  898     The supported output file formats are: SD (.sdf, .sd), SMILES (.smi, .csv,
  899     .tsv .txt)
  900 
  901 Options:
  902     -c, --comboSearchAtoms <number>  [default: 1]
  903         Number of concurrent search pattern match atoms to use as attachment or
  904         replacement points during positional analogue scanning in 'AttachAtoms', 
  905         'AttachSMILES', and 'ReplaceAtoms' modes. This value is ignored during 
  906         'RxnSMARTS' and 'TandemScan' values of  '-m, --mode' option.
  907     --clearAtomMapNumbers <yes or no>  [default: yes]
  908         Clear any atom map numbers in target SMILES specification for 'AttachSMILES'
  909         before generating analogues.
  910     -e, --examples
  911         Print examples.
  912     -h, --help
  913         Print this help message.
  914     -i, --infile <infile>
  915         Input file name.
  916     --infileParams <Name,Value,...>  [default: auto]
  917         A comma delimited list of parameter name and value pairs for reading
  918         molecules from files. The supported parameter names for different file
  919         formats, along with their default values, are shown below:
  920             
  921             SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes
  922             SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space,
  923                 smilesTitleLine,auto,sanitize,yes
  924             
  925         Possible values for smilesDelimiter: space, comma or tab.
  926     -m, --mode <ReplaceAtoms, AttachAtoms,...>  [default: ReplaceAtoms]
  927         Type of operations to perform during positional analogue scanning. The
  928         supported values, along with a brief description, are shown below:
  929             
  930             Mode           Description
  931             ReplaceAtoms   Replace atoms with new atoms
  932             AttachAtoms    Attach new atoms to atoms
  933             AttachSMILES   Attach SMILES to atoms
  934             RxnSMARTS      Run reaction SMARTS
  935             TandemScan     Perform tandem scan by combining ReplaceAtoms,
  936                 AttachAtoms, and AttachSMILES                    
  937              
  938         The chemical transformations of input molecules is dependent on the
  939         values of '-s, --searchPattern' and  '-t, --targetPattern' options. For
  940         example, nitrogen-walk or nitrogen scan is performed by '[cH]' and 'N'
  941         values for '-s, --searchPattern' and  '-t, --targetPattern' options.
  942     --mp <yes or no>  [default: no]
  943         Use multiprocessing.
  944          
  945         By default, input data is retrieved in a lazy manner via mp.Pool.imap()
  946         function employing lazy RDKit data iterable. This allows processing of
  947         arbitrary large data sets without any additional requirements memory.
  948         
  949         All input data may be optionally loaded into memory by mp.Pool.map()
  950         before starting worker processes in a process pool by setting the value
  951         of 'inputDataMode' to 'InMemory' in '--mpParams' option.
  952         
  953         A word to the wise: The default 'chunkSize' value of 1 during 'Lazy' input
  954         data mode may adversely impact the performance. The '--mpParams' section
  955         provides additional information to tune the value of 'chunkSize'.
  956     --mpParams <Name,Value,...>  [default: auto]
  957         A comma delimited list of parameter name and value pairs for to
  958         configure multiprocessing.
  959         
  960         The supported parameter names along with their default and possible
  961         values are shown below:
  962         
  963             chunkSize, auto
  964             inputDataMode, Lazy   [ Possible values: InMemory or Lazy ]
  965             numProcesses, auto   [ Default: mp.cpu_count() ]
  966         
  967         These parameters are used by the following functions to configure and
  968         control the behavior of multiprocessing: mp.Pool(), mp.Pool.map(), and
  969         mp.Pool.imap().
  970         
  971         The chunkSize determines chunks of input data passed to each worker
  972         process in a process pool by mp.Pool.map() and mp.Pool.imap() functions.
  973         The default value of chunkSize is dependent on the value of 'inputDataMode'.
  974         
  975         The mp.Pool.map() function, invoked during 'InMemory' input data mode,
  976         automatically converts RDKit data iterable into a list, loads all data into
  977         memory, and calculates the default chunkSize using the following method
  978         as shown in its code:
  979         
  980             chunkSize, extra = divmod(len(dataIterable), len(numProcesses) * 4)
  981             if extra: chunkSize += 1
  982         
  983         For example, the default chunkSize will be 7 for a pool of 4 worker processes
  984         and 100 data items.
  985         
  986         The mp.Pool.imap() function, invoked during 'Lazy' input data mode, employs
  987         'lazy' RDKit data iterable to retrieve data as needed, without loading all the
  988         data into memory. Consequently, the size of input data is not known a priori.
  989         It's not possible to estimate an optimal value for the chunkSize. The default 
  990         chunkSize is set to 1.
  991         
  992         The default value for the chunkSize during 'Lazy' data mode may adversely
  993         impact the performance due to the overhead associated with exchanging
  994         small chunks of data. It is generally a good idea to explicitly set chunkSize to
  995         a larger value during 'Lazy' input data mode, based on the size of your input
  996         data and number of processes in the process pool.
  997         
  998         The mp.Pool.map() function waits for all worker processes to process all
  999         the data and return the results. The mp.Pool.imap() function, however,
 1000         returns the the results obtained from worker processes as soon as the
 1001         results become available for specified chunks of data.
 1002         
 1003         The order of data in the results returned by both mp.Pool.map() and 
 1004         mp.Pool.imap() functions always corresponds to the input data.
 1005     -n, --nameSuffix <text>  [default: Analogue]
 1006         Name suffix for generating molecule names of analogues. Format of analogue
 1007         names: <MolName>_<NameSuffix>_<MolNum>
 1008     -o, --outfile <outfile>
 1009         Output file name.
 1010     --outfileParams <Name,Value,...>  [default: auto]
 1011         A comma delimited list of parameter name and value pairs for writing
 1012         molecules to files. The supported parameter names for different file
 1013         formats, along with their default values, are shown below:
 1014             
 1015             SD: compute2DCoords,auto,kekulize,no
 1016             SMILES: kekulize,no,smilesDelimiter,space, smilesIsomeric,yes,
 1017                 smilesTitleLine,yes,smilesMolName,yes,smilesMolProps,no
 1018             
 1019     --overwrite
 1020         Overwrite existing files.
 1021     -p, --permuteTandemScan <yes or no>  [default: yes]
 1022         Permute atom positions matched by SMARTS search pattern in a molecule
 1023         to generate all possible analogues during tandem positional analogue
 1024         scanning.
 1025         
 1026         This option is only valid for 'TandemScan' value of '-m, --mode' option.
 1027     -q, --quiet <yes or no>  [default: no]
 1028         Use quiet mode. The warning and information messages will not be printed.
 1029     -s, --searchPattern <SMARTSPattern>  [default: auto]
 1030         SMARTS search pattern identifying atoms in molecules for attachment or
 1031         replacement points during positional analogue scanning. The SMARTS search
 1032         pattern may retrieve multiple substructure matches in a molecule. The first
 1033         matched atom in each substructure match comprises a set of attachment or
 1034         replacement points.
 1035         
 1036         The default values, dependent on the value of '-m, --mode' option, are
 1037         shown below:
 1038             
 1039             Mode            Default   Description
 1040             ReplaceAtoms    [cH]      Aromatic carbon  
 1041             AttachAtoms     [cH]      Aromatic carbon
 1042             AttachSMILES    [cH]      Aromatic carbon
 1043             RxnSMARTS       None      Not applicable
 1044             TandemScan      [cH]      Aromatic carbon
 1045             
 1046         This option is ignored during 'RxnSMARTS' value of '-m, --mode' option.
 1047     -t, --targetPattern <ElementSymbol, SMILES, RxnSMARTS...>  [default: auto]
 1048         Target pattern for performing chemical transformations during positional
 1049         analogue scanning. These values are used in conjunction with the value of
 1050         '-s, --searchPattern' to generate appropriate analogues.
 1051         
 1052         The default values, dependent on the values of '-m, --mode' and 
 1053         '-s, --searchPattern' options, are shown below:
 1054             
 1055             Mode            Default        Description
 1056             ReplaceAtoms    N              Element symbol for nitrogen
 1057             AttachAtoms     F              Element symbol for fluorine
 1058             AttachSMILES    C(F)(F)(F)     SMILES for CF3
 1059             RxnSMARTS       [cH:1]>>[N:1] Replace aromatic carbon by nitrogen
 1060             TandemScan      ReplaceAtoms,N,AttachAtoms,F  Replace and attach
 1061             
 1062         The target specifications may contain multiple space delimted values. For
 1063         example:
 1064             
 1065             Mode            Example
 1066             ReplaceAtoms    "N P"
 1067             AttachAtoms     "F Cl Br"
 1068             AttachSMILES    "C(F)(F)(F) CO"
 1069             RxnSMARTS       "[cH:1]>>[N:1] [cH:1]>>[P:1]"
 1070             TandemScan      "ReplaceAtoms,N P,AttachAtoms,F Cl"
 1071             
 1072         The target specification for 'AttachSMILES' may contain atom map numbers.
 1073         The attachment point corresponds to the matched atom with the lowest
 1074         atom map number. Otherwise, the first atom in SMILES target specification
 1075         represents the attachment point. For example:
 1076             
 1077             Mode            Example
 1078             AttachSMILES    "C(C)(C)(C) CO"
 1079             AttachSMILES    "C([C:1])(C)(C) C(F)(F)(F)"
 1080             
 1081         Multiple concurrent chemical transformations are allowed during 'TandemScan'. The
 1082         target pattern specification for 'TandemScan' is a comma delimited list of operation
 1083         type and target pattern. Format: OperationType,TargetPattern,...
 1084         
 1085         The supported operation types and target pattern for 'TandemScan' are shown
 1086         below:
 1087             
 1088             ReplaceAtoms,<ElementSymbol>
 1089             AttachAtoms,<ElementSymbol>
 1090             AttachSMILES,<SMILES> 
 1091             
 1092             ReplaceAtoms,"<ElementSymbol> <ElementSymbol>..."
 1093             AttachAtoms,"<ElementSymbol> <ElementSymbol>..."
 1094             AttachSMILES,"<SMILES> <SMIELS>..."
 1095             
 1096         For example:
 1097             
 1098             ReplaceAtoms,N,AttachAtoms,F
 1099             ReplaceAtoms,N,AttachAtoms,F,AttachSMILES,C(F)(F)(F) 
 1100             
 1101             "ReplaceAtoms,N,AttachAtoms,F Cl"
 1102             "ReplaceAtoms,N P,AttachAtoms,F Cl"
 1103             "ReplaceAtoms,N P,AttachAtoms,F Cl,AttachSMILES,C(F)(F)(F) CO" 
 1104                     
 1105         The number of chemical transformations  in 'TandemScan' must be less than or
 1106         equal to the total number atoms matched by SMARTS search pattern in a molecule.
 1107         Otherwise, it is not possible to perform a 'TandemScan'. The matched atom positions
 1108         may be optionally permuted to generate all possible analogues during  positional
 1109         analogue scanning using '-p, --permuteTandemScan' option.
 1110     -u, --uniqueAnalogues <yes or no>  [default: yes]
 1111         Keep only unique analogues of a molecule corresponding to unique SMILES
 1112         strings. The duplicate SMILES string may be generated during PAS due to
 1113         symmetric replacement or attachment points in molecules.
 1114     -w, --workingdir <dir>
 1115         Location of working directory which defaults to the current directory.
 1116 
 1117 Examples:
 1118     To perform a nitrogen-walk or nitrogen scan by replacing aromatic carbons by
 1119     nitrogens in molecules in a SMILES file and write out a SMILES file, type:
 1120 
 1121         % RDKitPerformPositionalAnalogueScan.py  -i Sample.smi -o SampleOut.smi
 1122 
 1123     To run the first example by explicitly specifying search and target patterns
 1124     for replacing aromatic carbons by nitogens in molecules in a SD file and write
 1125     out a SD file, type:
 1126 
 1127         % RDKitPerformPositionalAnalogueScan.py  -m ReplaceAtoms -s "[cH]"
 1128           -t "N" -i Sample.sdf -o SampleOut.sdf
 1129 
 1130     To run the previous example for replacing aromatic carbons by N and P, type:
 1131 
 1132         % RDKitPerformPositionalAnalogueScan.py  -m ReplaceAtoms -s "[cH]"
 1133           -t "N P" -i Sample.sdf -o SampleOut.sdf
 1134 
 1135     To run the first example in multiprocessing mode on all available CPUs
 1136     without loading all data into memory and write out a SD file, type:
 1137 
 1138         % RDKitPerformPositionalAnalogueScan.py  -i Sample.smi -o SampleOut.sdf
 1139           --mp yes
 1140      
 1141     To run the previous example in multiprocessing mode on all available CPUs
 1142     by loading all data into memory and write out a SD file, type:
 1143 
 1144         % RDKitPerformPositionalAnalogueScan.py  -i Sample.smi -o SampleOut.sdf
 1145           --mp yes --mpParams "inputDataMode,InMemory"
 1146      
 1147     To run the previous example in multiprocessing mode on specific number of
 1148     CPUs and chunk size without loading all data into memory and write out a SD file,
 1149     type:
 1150 
 1151         % RDKitPerformPositionalAnalogueScan.py  -i Sample.smi -o SampleOut.sdf
 1152           --mpParams "inputDataMode,Lazy,numProcesses,4,chunkSize,8"
 1153      
 1154     To perform positional analogue scanning by simultaneously attaching fluorines
 1155     to two aromatic carbons in molecules in a SMILES file and write out a SD file,
 1156     type:
 1157 
 1158         % RDKitPerformPositionalAnalogueScan.py  -m AttachAtoms -s "[cH]"
 1159           -t "F" -c 2 -i Sample.smi -o SampleOut.sdf
 1160 
 1161     To perform positional analogue scanning by attaching SMILES to aromatic
 1162     carbons in molecules in a SMILES file and write out a SD file, type:
 1163 
 1164         % RDKitPerformPositionalAnalogueScan.py  -m AttachSMILES -s "[cH]"
 1165           -t "F" -i Sample.smi -o SampleOut.sdf
 1166 
 1167     To run the previous example for attaching F and CF3 to aromatic carbons,
 1168     type:
 1169 
 1170         % RDKitPerformPositionalAnalogueScan.py  -m AttachSMILES -s "[cH]"
 1171           -t "F C(F)(F)(F)" -i Sample.smi -o SampleOut.sdf
 1172 
 1173     To run the previous example for attaching "C(C)(C)(C)" at a specific attachment
 1174     point corresponding to the lowest atom map number,  type:
 1175 
 1176         % RDKitPerformPositionalAnalogueScan.py  -m AttachSMILES -s "[cH]"
 1177           -t "C([C:1])([C:2])(C)" -i Sample.smi -o SampleOut.sdf
 1178 
 1179     To perform a nitrogen-walk or nitrogen scan by using reaction SMARTS to replace
 1180     aromatic carbons by nitrogens in molecules in a SMILES file and write out a  SMILES
 1181     file,  type:
 1182 
 1183         % RDKitPerformPositionalAnalogueScan.py  -m RxnSMARTS
 1184           -t "[cH:1]>>[N:1]" -i Sample.smi -o SampleOut.smi
 1185 
 1186     To perform a tandem positional analogue scan by concurrently applying multiple
 1187     chemical transformations to aromatic carbons, permute all matched search
 1188     atom positions during analogue generation, and write out a SD file, type:
 1189 
 1190         % RDKitPerformPositionalAnalogueScan.py  -m TandemScan -s "[cH]"
 1191           -t "ReplaceAtoms,N,AttachAtoms,F,AttachSMILES,OC"
 1192           -p yes  -i Sample.smi -o SampleOut.smi
 1193 
 1194     To run the previous example for attaching multiple atoms and SMILES to
 1195     aromatic carbons, type:
 1196 
 1197         % RDKitPerformPositionalAnalogueScan.py  -m TandemScan -s "[cH]"
 1198           -t "ReplaceAtoms,N,AttachAtoms,F Cl,AttachSMILES,C(F)(F)(F) OC"
 1199           -p yes  -i Sample.smi -o SampleOut.smi
 1200 
 1201     To perform a nitrogen-walk or nitrogen scan by replacing aromatic carbons by
 1202     nitrogens in molecules in a SMILES CSV fileS, MILES strings in column 1, name
 1203     in column 2, and write out a SD file, type:
 1204 
 1205         % RDKitPerformPositionalAnalogueScan.py  -m ReplaceAtoms -s "[cH]"
 1206           -t "N" --infileParams "smilesDelimiter,comma, smilesTitleLine,yes,
 1207           smilesColumn,1,smilesNameColumn,2"
 1208           -i SampleSMILES.csv -o SampleOut.sdf
 1209 
 1210 Author:
 1211     Manish Sud(msud@san.rr.com)
 1212 
 1213 See also:
 1214     RDKitConvertFileFormat.py, RDKitEnumerateCompoundLibrary.py,
 1215     RDKitPerformTorsionScan.py
 1216 
 1217 Copyright:
 1218     Copyright (C) 2020 Manish Sud. All rights reserved.
 1219 
 1220     The functionality available in this script is implemented using RDKit, an
 1221     open source toolkit for cheminformatics developed by Greg Landrum.
 1222 
 1223     This file is part of MayaChemTools.
 1224 
 1225     MayaChemTools is free software; you can redistribute it and/or modify it under
 1226     the terms of the GNU Lesser General Public License as published by the Free
 1227     Software Foundation; either version 3 of the License, or (at your option) any
 1228     later version.
 1229 
 1230 """
 1231 
 1232 if __name__ == "__main__":
 1233     main()