MayaChemTools

    1 #!/bin/env python
    2 # File: RDKitUtil.py
    3 # Author: Manish Sud <msud@san.rr.com>
    4 #
    5 # Copyright (C) 2024 Manish Sud. All rights reserved.
    6 #
    7 # The functionality available in this file is implemented using RDKit, an
    8 # open source toolkit for cheminformatics developed by Greg Landrum.
    9 #
   10 # This file is part of MayaChemTools.
   11 #
   12 # MayaChemTools is free software; you can redistribute it and/or modify it under
   13 # the terms of the GNU Lesser General Public License as published by the Free
   14 # Software Foundation; either version 3 of the License, or (at your option) any
   15 # later version.
   16 #
   17 # MayaChemTools is distributed in the hope that it will be useful, but without
   18 # any warranty; without even the implied warranty of merchantability of fitness
   19 # for a particular purpose.  See the GNU Lesser General Public License for more
   20 # details.
   21 #
   22 # You should have received a copy of the GNU Lesser General Public License
   23 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
   24 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
   25 # Boston, MA, 02111-1307, USA.
   26 #
   27 
   28 from __future__ import print_function
   29 
   30 import os
   31 import sys
   32 import re
   33 import base64
   34 import pickle
   35 
   36 from rdkit import Chem
   37 from rdkit.Chem import AllChem
   38 from rdkit.Chem import Draw
   39 
   40 import MiscUtil
   41 
   42 __all__ = ["AreAtomIndicesSequentiallyConnected", "AreAtomMapNumbersPresentInMol", "AreHydrogensMissingInMolecule", "CalculateFormalCharge", "CalculateSpinMultiplicity", "ClearAtomMapNumbers", "ConstrainAndEmbed", "FilterSubstructureMatchByAtomMapNumbers", "FilterSubstructureMatchesByAtomMapNumbers", "GetAtomIndices", "GetAtomMapIndices", "GetAtomMapIndicesAndMapNumbers", "GetAtomSymbols", "GetAtomPositions", "GetFormalCharge", "GetHeavyAtomNeighbors", "GetInlineSVGForMolecule", "GetInlineSVGForMolecules", "GetMolName", "GetNumFragments", "GetNumHeavyAtomNeighbors", "GetSpinMultiplicity", "GetSVGForMolecule", "GetSVGForMolecules", "GetPsi4XYZFormatString", "GetTorsionsAroundRotatableBonds", "GenerateBase64EncodedMolStrings", "GenerateBase64EncodedMolStringWithConfIDs", "IsAtomSymbolPresentInMol", "IsMolEmpty", "IsValidElementSymbol", "IsValidAtomIndex", "MolFromBase64EncodedMolString", "GenerateBase64EncodedMolStringsWithIDs", "MolToBase64EncodedMolString", "MolFromSubstructureMatch", "MolsFromSubstructureMatches", "ReadMolecules", "ReadAndValidateMolecules", "ReadMoleculesFromSDFile", "ReadMoleculesFromMolFile", "ReadMoleculesFromMol2File", "ReadMoleculesFromPDBFile", "ReadMoleculesFromSMILESFile", "ReorderAtomIndicesInSequentiallyConnectedManner", "SetAtomPositions", "SetWriterMolProps", "ValidateElementSymbols", "WriteMolecules"]
   43 
   44 def GetMolName(Mol, MolNum = None):
   45     """Get molecule name.
   46     
   47     Arguments:
   48         Mol (object): RDKit molecule object.
   49         MolNum (int or None): Molecule number in input file.
   50 
   51     Returns:
   52         str : Molname corresponding to _Name property of a molecule, generated
   53             from specieid MolNum using the format "Mol%d" % MolNum, or an
   54             empty string.
   55 
   56     """
   57     
   58     MolName = ''
   59     if Mol.HasProp("_Name"):
   60         MolName = Mol.GetProp("_Name")
   61 
   62     if not len(MolName):
   63         if MolNum is not None:
   64             MolName = "Mol%d" % MolNum
   65     
   66     return MolName
   67 
   68 def GetInlineSVGForMolecule(Mol, Width, Height, Legend = None, AtomListToHighlight = None, BondListToHighlight = None, BoldText = True, Base64Encoded = True):
   69     """Get SVG image text for a molecule suitable for inline embedding into a HTML page.
   70     
   71     Arguments:
   72         Mol (object): RDKit molecule object.
   73         Width (int): Width of a molecule image in pixels.
   74         Height (int): Height of a molecule image in pixels.
   75         Legend (str): Text to display under the image.
   76         AtomListToHighlight (list): List of atoms to highlight.
   77         BondListToHighlight (list): List of bonds to highlight.
   78         BoldText (bool): Flag to make text bold in the image of molecule. 
   79         Base64Encoded (bool): Flag to return base64 encoded string. 
   80 
   81     Returns:
   82         str : SVG image text for inline embedding into a HTML page using "img"
   83             tag: <img src="data:image/svg+xml;charset=UTF-8,SVGImageText> or
   84             tag: <img src=">
   85 
   86     """
   87 
   88     SVGText = GetSVGForMolecule(Mol, Width, Height, Legend, AtomListToHighlight, BondListToHighlight, BoldText)
   89     return _ModifySVGForInlineEmbedding(SVGText, Base64Encoded)
   90     
   91 def GetInlineSVGForMolecules(Mols, MolsPerRow, MolWidth, MolHeight, Legends = None, AtomListsToHighlight = None, BondListsToHighLight = None, BoldText = True, Base64Encoded = True):
   92     """Get SVG image text for  molecules suitable for inline embedding into a HTML page.
   93     
   94     Arguments:
   95         Mols (list): List of RDKit molecule objects.
   96         MolsPerRow (int): Number of molecules per row.
   97         Width (int): Width of a molecule image in pixels.
   98         Height (int): Height of a molecule image in pixels.
   99         Legends (list): List containing strings to display under images.
  100         AtomListsToHighlight (list): List of lists containing atoms to highlight
  101             for molecules.
  102         BondListsToHighlight (list): List of lists containing bonds to highlight
  103             for molecules
  104         BoldText (bool): Flag to make text bold in the image of molecules. 
  105         Base64Encoded (bool): Flag to return base64 encoded string. 
  106 
  107     Returns:
  108         str : SVG image text for inline embedding into a HTML page using "img"
  109             tag: <img src="data:image/svg+xml;charset=UTF-8,SVGImageText> or
  110             tag: <img src=">
  111 
  112     """
  113     
  114     SVGText = GetSVGForMolecules(Mols, MolsPerRow, MolWidth, MolHeight, Legends, AtomListsToHighlight, BondListsToHighLight, BoldText)
  115     return _ModifySVGForInlineEmbedding(SVGText, Base64Encoded)
  116 
  117 def _ModifySVGForInlineEmbedding(SVGText, Base64Encoded):
  118     """Modify SVG for inline embedding into a HTML page using "img" tag
  119     along with performing base64 encoding.
  120     """
  121     
  122     # Take out all tags till the start of '<svg' tag...
  123     Pattern = re.compile("^.*<svg", re.I | re.S)
  124     SVGText = Pattern.sub("<svg", SVGText)
  125     
  126     # Add an extra space before the "width=..." tag. Otherwise, inline embedding may
  127     # cause the following XML error on some browsers due to start of the "width=..."
  128     # at the begining of the line in <svg ...> tag:
  129     #
  130     #  XML5607: Whitespace expected.
  131     #
  132     SVGText = re.sub("width='", " width='", SVGText, flags = re.I)
  133     
  134     # Take out trailing new line...
  135     SVGText = SVGText.strip()
  136 
  137     # Perform base64 encoding by turning text into byte stream using string
  138     # encode and transform byte stream returned by b64encode into a string
  139     # by string decode...
  140     #
  141     if Base64Encoded:
  142         SVGText = base64.b64encode(SVGText.encode()).decode()
  143 
  144     return SVGText
  145 
  146 def GetSVGForMolecule(Mol, Width, Height, Legend = None, AtomListToHighlight = None, BondListToHighlight = None, BoldText = True):
  147     """Get SVG image text for a molecule suitable for viewing in a browser.
  148     
  149     Arguments:
  150         Mol (object): RDKit molecule object.
  151         Width (int): Width of a molecule image in pixels.
  152         Height (int): Height of a molecule image in pixels.
  153         Legend (str): Text to display under the image.
  154         AtomListToHighlight (list): List of atoms to highlight.
  155         BondListToHighlight (list): List of bonds to highlight.
  156         BoldText (bool): Flag to make text bold in the image of molecule. 
  157 
  158     Returns:
  159         str : SVG image text for writing to a SVG file for viewing in a browser.
  160 
  161     """
  162     
  163     Mols = [Mol]
  164     
  165     MolsPerRow = 1
  166     MolWidth = Width
  167     MolHeight = Height
  168     
  169     Legends = [Legend] if Legend is not None else None
  170     AtomListsToHighlight = [AtomListToHighlight] if AtomListToHighlight is not None else None
  171     BondListsToHighLight = [BondListsToHighLight] if BondListToHighlight is not None else None
  172     
  173     return GetSVGForMolecules(Mols, MolsPerRow, MolWidth, MolHeight, Legends, AtomListsToHighlight, BondListsToHighLight, BoldText)
  174 
  175 def GetSVGForMolecules(Mols, MolsPerRow, MolWidth, MolHeight, Legends = None, AtomListsToHighlight = None, BondListsToHighlight = None, BoldText = True):
  176     """Get SVG image text for molecules suitable for viewing in a browser.
  177     
  178     Arguments:
  179         Mols (list): List of RDKit molecule objects.
  180         MolsPerRow (int): Number of molecules per row.
  181         Width (int): Width of a molecule image in pixels.
  182         Height (int): Height of a molecule image in pixels.
  183         Legends (list): List containing strings to display under images.
  184         AtomListsToHighlight (list): List of lists containing atoms to highlight
  185             for molecules.
  186         BondListsToHighlight (list): List of lists containing bonds to highlight
  187             for molecules
  188         BoldText (bool): Flag to make text bold in the image of molecules. 
  189 
  190     Returns:
  191         str : SVG image text for writing to a SVG file for viewing in a browser.
  192 
  193     """
  194     
  195     SVGText = Draw.MolsToGridImage(Mols, molsPerRow = MolsPerRow, subImgSize = (MolWidth,MolHeight), legends = Legends, highlightAtomLists = AtomListsToHighlight, highlightBondLists = BondListsToHighlight, useSVG = True)
  196     
  197     return _ModifySVGForBrowserViewing(SVGText, BoldText)
  198 
  199 def _ModifySVGForBrowserViewing(SVGText, BoldText = True):
  200     """Modify SVG for loading into a browser."""
  201     
  202     # It appears that the string 'xmlns:svg' needs to be replaced with 'xmlns' in the
  203     # SVG image string generated by older versions of RDKit. Otherwise, the image
  204     # doesn't load in web browsers.
  205     #
  206     if re.search("xmlns:svg", SVGText, re.I):
  207         SVGText = re.sub("xmlns:svg", "xmlns", SVGText, flags = re.I)
  208     
  209     # Make text bold...
  210     if BoldText:
  211         SVGText = re.sub("font-weight:normal;", "font-weight:bold;", SVGText, flags = re.I)
  212     
  213     return SVGText
  214 
  215 def IsMolEmpty(Mol):
  216     """Check for the presence of atoms in a molecule.
  217     
  218     Arguments:
  219         Mol (object): RDKit molecule object.
  220 
  221     Returns:
  222         bool : True - No atoms in molecule; Otherwise, false. 
  223 
  224     """
  225 
  226     Status = False if Mol.GetNumAtoms() else True
  227     
  228     return Status
  229 
  230 def IsAtomSymbolPresentInMol(Mol, AtomSymbol, IgnoreCase = True):
  231     """ Check for the presence of an atom symbol in a molecule.
  232     
  233     Arguments:
  234         Mol (object): RDKit molecule object.
  235         AtomSymbol (str): Atom symbol.
  236 
  237     Returns:
  238         bool : True - Atom symbol in molecule; Otherwise, false. 
  239 
  240     """
  241     
  242     for Atom in Mol.GetAtoms():
  243         Symbol = Atom.GetSymbol()
  244         if IgnoreCase:
  245             if re.match("^%s$" % AtomSymbol, Symbol, re.I):
  246                 return True
  247         else:
  248             if re.match("^%s$" % AtomSymbol, Symbol):
  249                 return True
  250     
  251     return False
  252 
  253 def ValidateElementSymbols(ElementSymbols):
  254     """Validate element symbols.
  255     
  256     Arguments:
  257         ElementSymbols (list): List of element symbols to validate.
  258 
  259     Returns:
  260         bool : True - All element symbols are valid; Otherwise, false. 
  261 
  262     """
  263     for ElementSymbol in ElementSymbols:
  264         if not IsValidElementSymbol(ElementSymbol):
  265             return False
  266     
  267     return True
  268 
  269 def GetAtomPositions(Mol, ConfID = -1):
  270     """Retrieve a list of lists containing coordinates of all atoms in a
  271     molecule.
  272     
  273     Arguments:
  274         Mol (object): RDKit molecule object.
  275         ConfID (int): Conformer number.
  276 
  277     Returns:
  278         list : List of lists containing atom positions.
  279 
  280     Examples:
  281 
  282         for AtomPosition in RDKitUtil.GetAtomPositions(Mol):
  283             print("X: %s; Y: %s; Z: %s" % (AtomPosition[0], AtomPosition[1], AtomPosition[2]))
  284 
  285     """
  286 
  287     return Mol.GetConformer(id = ConfID).GetPositions().tolist()
  288 
  289 def SetAtomPositions(Mol, AtomPositions, ConfID = -1):
  290     """Set atom positions of all atoms in a molecule.
  291     
  292     Arguments:
  293         Mol (object): RDKit molecule object.
  294         AtomPositions (object): List of lists containing atom positions.
  295         ConfID (int): Conformer number.
  296 
  297     Returns:
  298         object : RDKit molecule object.
  299 
  300     """
  301     
  302     MolConf = Mol.GetConformer(ConfID)
  303 
  304     for Index in range(len(AtomPositions)):
  305             MolConf.SetAtomPosition(Index, tuple(AtomPositions[Index]))
  306     
  307     return Mol
  308 
  309 def GetAtomSymbols(Mol):
  310     """Retrieve a list containing atom symbols of all atoms a molecule.
  311     
  312     Arguments:
  313         Mol (object): RDKit molecule object.
  314 
  315     Returns:
  316         list : List of atom symbols.
  317 
  318     """
  319 
  320     return [Atom.GetSymbol() for Atom in Mol.GetAtoms()]
  321 
  322 def GetAtomIndices(Mol):
  323     """Retrieve a list containing atom indices of all atoms a molecule.
  324     
  325     Arguments:
  326         Mol (object): RDKit molecule object.
  327 
  328     Returns:
  329         list : List of atom indices.
  330 
  331     """
  332 
  333     return [Atom.GetIdx() for Atom in Mol.GetAtoms()]
  334 
  335 def GetFormalCharge(Mol, CheckMolProp = True):
  336     """Get formal charge of a molecule. The formal charge is either retrieved
  337     from 'FormalCharge' molecule property or calculated using RDKit function
  338     Chem.GetFormalCharge(Mol).
  339     
  340     The 'FormalCharge' molecule property may contain multiple space delimited
  341     values. The total formal charge corresponds to the sum of the specified formal
  342     charge values.
  343 
  344     Arguments:
  345         Mol (object): RDKit molecule object.
  346         CheckMolProp (bool): Check 'FormalCharge' molecule property to
  347             retrieve formal charge.
  348 
  349     Returns:
  350         int : Formal charge.
  351 
  352     """
  353     
  354     Name = 'FormalCharge'
  355     if (CheckMolProp and Mol.HasProp(Name)):
  356         FormalCharge = Mol.GetProp(Name)
  357         Values = FormalCharge.split()
  358         if len(Values) > 1:
  359             MiscUtil.PrintWarning("RDKitUtil.GetFormalCharge: Molecule property, %s, contains multiple values, %s. Formal charge corresponds to sum of the specified values..." % (Name, FormalCharge))
  360             FormalCharge = 0.0
  361             for Value in Values:
  362                 FormalCharge += float(Value)
  363             FormalCharge = int(FormalCharge)
  364         else:
  365             FormalCharge = int(float(FormalCharge))
  366     else:
  367         FormalCharge =  CalculateFormalCharge(Mol)
  368 
  369     return int(FormalCharge)
  370 
  371 def CalculateFormalCharge(Mol):
  372     """Calculate formal charge of a molecule. The formal charge is calculated
  373     using RDKit function Chem.GetFormalCharge(Mol).
  374 
  375     Arguments:
  376         Mol (object): RDKit molecule object.
  377             retrieve formal charge.
  378 
  379     Returns:
  380         int : Formal charge.
  381 
  382     """
  383     
  384     return int(Chem.GetFormalCharge(Mol))
  385     
  386 def GetSpinMultiplicity(Mol, CheckMolProp = True):
  387     """Get spin multiplicity of a molecule. The spin multiplicity is either
  388     retrieved from 'SpinMultiplicity' molecule property or calculated
  389     from the number of free radical electrons using Hund's rule of maximum
  390     multiplicity defined as 2S + 1 where S is the total electron spin. The
  391     total spin is 1/2 the number of free radical electrons in a molecule.
  392     
  393     The 'SpinMultiplicity' molecule property may contain multiple space delimited
  394     values. The total spin multiplicity corresponds to the total number of free radical
  395     electrons which are calculated for each specified value.
  396 
  397     Arguments:
  398         Mol (object): RDKit molecule object.
  399         CheckMolProp (bool): Check 'SpinMultiplicity' molecule property to
  400             retrieve spin multiplicity.
  401 
  402     Returns:
  403         int : Spin multiplicity.
  404 
  405     """
  406     
  407     Name = 'SpinMultiplicity'
  408     if (CheckMolProp and Mol.HasProp(Name)):
  409         SpinMultiplicity = Mol.GetProp(Name)
  410         Values = SpinMultiplicity.split()
  411         if len(Values) > 1:
  412             MiscUtil.PrintWarning("RDKitUtil.GetSpinMultiplicity: Molecule property, %s, contains multiple values, %s. Calculating spin multiplicity corresponding to total number of free radical electrons for each specified value..." % (Name, SpinMultiplicity))
  413             NumRadicalElectrons = 0
  414             for Value in Values:
  415                 NumRadicalElectrons += int(float(Value)) - 1
  416             
  417             TotalElectronicSpin = NumRadicalElectrons/2
  418             SpinMultiplicity = 2 * TotalElectronicSpin + 1
  419         else:
  420             SpinMultiplicity = int(float(SpinMultiplicity))
  421     else:
  422         SpinMultiplicity = CalculateSpinMultiplicity(Mol)
  423 
  424     return int(SpinMultiplicity)
  425 
  426 def CalculateSpinMultiplicity(Mol):
  427     """Calculate spin multiplicity of a molecule. The spin multiplicity is calculated
  428     from the number of free radical electrons using Hund's rule of maximum
  429     multiplicity defined as 2S + 1 where S is the total electron spin. The
  430     total spin is 1/2 the number of free radical electrons in a molecule.
  431 
  432     Arguments:
  433         Mol (object): RDKit molecule object.
  434 
  435     Returns:
  436         int : Spin multiplicity.
  437 
  438     """
  439     
  440     # Calculate spin multiplicity using Hund's rule of maximum multiplicity...
  441     NumRadicalElectrons = 0
  442     for Atom in Mol.GetAtoms():
  443         NumRadicalElectrons += Atom.GetNumRadicalElectrons()
  444 
  445     TotalElectronicSpin = NumRadicalElectrons/2
  446     SpinMultiplicity = 2 * TotalElectronicSpin + 1
  447 
  448     return int(SpinMultiplicity)
  449     
  450 def GetPsi4XYZFormatString(Mol, ConfID = -1, FormalCharge = "auto", SpinMultiplicity = "auto", Symmetry = "auto", NoCom = False, NoReorient = False, CheckFragments = False):
  451     """Retrieve geometry string of a molecule in Psi4ish XYZ format to perform
  452     Psi4 quantum chemistry calculations.
  453     
  454     You may explicit specify multiple space delimited values for formal charge
  455     and spin multiplicity. Otherwise, these values are either automatically
  456     retrieved from 'FormalCharge' and 'SpinMultiplicity' molecule properties or
  457     calculated using RDKit. The number of specified values for these properties
  458     must match the number of fragments in the molecule during the processing
  459     of the fragments.
  460 
  461     Arguments:
  462         Mol (object): RDKit molecule object.
  463         ConfID (int): Conformer number.
  464         FormalCharge (str): Specified formal charge or 'auto' to calculate
  465            its value by RDKit.
  466         SpinMultiplicity (str): Specified spin multiplicity or 'auto' to calculate
  467            its value by RDKit.
  468         Symmetry (str): Specified symmetry or 'auto' to calculate its value by
  469            Psi4.
  470         NoCom (bool): Flag to disable recentering of a molecule by Psi4.
  471         NoReorient (bool): Flag to disable reorientation of a molecule by Psi4.
  472         CheckFragments (bool): Check for fragments and setup geometry string
  473            using  -- separator between fragments.
  474 
  475     Returns:
  476         str : Geometry string of a molecule in Psi4ish XYZ format.
  477 
  478     """
  479 
  480     # Check for fragments...
  481     Mols = [Mol]
  482     if CheckFragments:
  483         Fragments = list(Chem.rdmolops.GetMolFrags(Mol, asMols = True))
  484         if len(Fragments) > 1:
  485             Mols = Fragments
  486     
  487     FragMolFormalCharges = _SetFormalChargesForPsi4XYZFormatString(Mol, Mols, FormalCharge, CheckFragments)
  488     FragMolSpinMultiplicities = _SetSpinMultiplicitiesForPsi4XYZFormatString(Mol, Mols, SpinMultiplicity, CheckFragments)
  489     
  490     # Setup geometry string for Ps4...
  491     GeometryList = []
  492     FragMolCount = 0
  493     
  494     for FragMolIndex, FragMol in enumerate(Mols):
  495         FragMolCount += 1
  496         if FragMolCount > 1:
  497             GeometryList.append("--")
  498         
  499         FragMolFormalCharge = FragMolFormalCharges[FragMolIndex]
  500         FragMolSpinMultiplicity = FragMolSpinMultiplicities[FragMolIndex]
  501         if FragMolFormalCharge is None or FragMolSpinMultiplicity is None:
  502             MiscUtil.PrintInfo("")
  503             MiscUtil.PrintWarning("RDKitUtil.GetPsi4XYZFormatString: Failed to set formal charge and spin multiplicity values. Both formal charge, %s, and spin multiplicity, %s, must be valid values. These values are either specified explicitly or automatically calculated..." % (FragMolFormalCharge, FragMolSpinMultiplicity))
  504         else:
  505             GeometryList.append("%s %s" % (FragMolFormalCharge, FragMolSpinMultiplicity))
  506         
  507         AtomSymbols = GetAtomSymbols(FragMol)
  508         AtomPositions = GetAtomPositions(FragMol, ConfID)
  509         
  510         for AtomSymbol, AtomPosition in zip(AtomSymbols, AtomPositions):
  511             GeometryList.append("%s %s %s %s" % (AtomSymbol, AtomPosition[0], AtomPosition[1], AtomPosition[2]))
  512 
  513     GeometryList.append("units angstrom")
  514     
  515     if not re.match("^auto$", Symmetry, re.I):
  516         Name = 'Symmetry'
  517         if (Mol.HasProp(Name)):
  518             Symmetry =  Mol.GetProp(Name)
  519         GeometryList.append("symmetry %s" % Symmetry)
  520     
  521     if NoCom:
  522         GeometryList.append("no_com")
  523         
  524     if NoReorient:
  525         GeometryList.append("no_reorient")
  526     
  527     Geometry = "\n".join(GeometryList)
  528     
  529     return Geometry
  530 
  531 def _SetFormalChargesForPsi4XYZFormatString(Mol, FragMols, FormalCharge, CheckFragments):
  532     """Setup formal charges for Psi4 XYZ format string. """
  533 
  534     if not CheckFragments:
  535         if re.match("^auto$", FormalCharge, re.I):
  536             MolFormalCharge = GetFormalCharge(Mol)
  537         else:
  538             MolFormalCharge = int(FormalCharge)
  539         return [MolFormalCharge]
  540     
  541     FragMolsCount = len(FragMols)
  542     FormalCharges = [None] * FragMolsCount
  543     
  544     if re.match("^auto$", FormalCharge, re.I):
  545         PropName = "FormalCharge"
  546         if Mol.HasProp(PropName):
  547             FormalCharge = Mol.GetProp(PropName)
  548             FormalChargeWords = FormalCharge.split()
  549             if len(FormalChargeWords) == FragMolsCount:
  550                 FormalCharges = [int(float(FormalCharge)) for FormalCharge in FormalChargeWords]
  551             else:
  552                 MiscUtil.PrintWarning("RDKitUtil.GetPsi4XYZFormatString: Ignoring specified value, %s, for FormalCharge molecule property. The number of space delimted specified values, %s,  must match number of fragments, %s, in the molecule..." % (FormalCharge, len(FormalChargeWords), FragMolsCount))
  553         else:
  554             FormalCharges = [CalculateFormalCharge(FragMol) for FragMol in FragMols]
  555     else:
  556         FormalChargeWords = FormalCharge.split()
  557         if len(FormalChargeWords) != FragMolsCount:
  558             MiscUtil.PrintWarning("RDKitUtil.GetPsi4XYZFormatString: Ignoring specified value, %s, for FormalCharge paramater. The number of space delimted specified values, %s,  must match number of fragments, %s, in the molecule..." % (FormalCharge, len(FormalChargeWords), FragMolsCount))
  559         else:
  560             FormalCharges = [int(FormalCharge) for FormalCharge in FormalChargeWords]
  561 
  562     return FormalCharges
  563 
  564 def _SetSpinMultiplicitiesForPsi4XYZFormatString(Mol, FragMols, SpinMultiplicity, CheckFragments):
  565     """Setup spin multiplicites for Psi4 XYZ format string. """
  566     
  567     if not CheckFragments:
  568         if re.match("^auto$", SpinMultiplicity, re.I):
  569             MolSpinMultiplicity = GetSpinMultiplicity(Mol)
  570         else:
  571             MolSpinMultiplicity = int(SpinMultiplicity)
  572         return [MolSpinMultiplicity]
  573         
  574     FragMolsCount = len(FragMols)
  575     SpinMultiplicities = [None] * FragMolsCount
  576     
  577     if re.match("^auto$", SpinMultiplicity, re.I):
  578         PropName = "SpinMultiplicity"
  579         if Mol.HasProp(PropName):
  580             SpinMultiplicity = Mol.GetProp(PropName)
  581             SpinMultiplicityWords = SpinMultiplicity.split()
  582             if len(SpinMultiplicityWords) == FragMolsCount:
  583                 SpinMultiplicities = [int(float(SpinMultiplicity)) for SpinMultiplicity in SpinMultiplicityWords]
  584             else:
  585                 MiscUtil.PrintWarning("RDKitUtil.GetPsi4XYZFormatString: Ignoring specified value, %s, for SpinMultiplicity molecule property. The number of space delimted specified values, %s,  must match number of fragments, %s, in the molecule..." % (SpinMultiplicity, len(SpinMultiplicityWords), FragMolsCount))
  586         else:
  587             SpinMultiplicities = [CalculateSpinMultiplicity(FragMol) for FragMol in FragMols]
  588     else:
  589         SpinMultiplicityWords = SpinMultiplicity.split()
  590         if len(SpinMultiplicityWords) != FragMolsCount:
  591             MiscUtil.PrintWarning("RDKitUtil.GetPsi4XYZFormatString: Ignoring specified value, %s, for SpinMultiplicity paramater. The number of space delimted specified values, %s,  must match number of fragments, %s, in the molecule..." % (SpinMultiplicity, len(SpinMultiplicityWords), FragMolsCount))
  592         else:
  593             SpinMultiplicities = [int(SpinMultiplicity) for SpinMultiplicity in SpinMultiplicityWords]
  594 
  595     return SpinMultiplicities
  596 
  597 def GetTorsionsAroundRotatableBonds(Mol, RotBondsPatternMol, IgnoreHydrogens = True):
  598     """Identify torsions around rotatable bonds and return a list of lists
  599     containing atom indices of torsions.
  600     
  601     Arguments:
  602         Mol (object): RDKit molecule object.
  603         PatternMol (object): RDKit molecule object corresponding to SMARTS
  604             pattern to identify rotatable bonds.
  605         IgnoreHydrogens (bool): Flag to include torsions around rotatable bonds
  606             containing hydrogens.
  607 
  608     Returns:
  609         list : List of lists containing atom indices of torsions around
  610             rotatable bonds.
  611 
  612     """
  613     
  614     # Match rotatable bonds...
  615     RotBondsMatches = FilterSubstructureMatchesByAtomMapNumbers(Mol, RotBondsPatternMol, Mol.GetSubstructMatches(RotBondsPatternMol, useChirality = False))
  616     if not len(RotBondsMatches):
  617         return None
  618     
  619     # Identify torsions...
  620     TorsioAtomIndicesList = []
  621     for RotBondMatch in RotBondsMatches:
  622         if len(RotBondMatch) != 2:
  623             continue
  624         
  625         Atom1Index, Atom2Index = RotBondMatch
  626         Atom1NbrIndices = _GetRotBondAtomNeighbors(Mol, Atom1Index, Atom2Index, IgnoreHydrogens)
  627         Atom2NbrIndices = _GetRotBondAtomNeighbors(Mol, Atom2Index, Atom1Index, IgnoreHydrogens)
  628         
  629         TorsionAtomIndices = []
  630         for Atom1NbrIndex in Atom1NbrIndices:
  631             if Atom1NbrIndex in Atom2NbrIndices:
  632                 continue
  633             for Atom2NbrIndex in Atom2NbrIndices:
  634                 if Atom2NbrIndex in Atom1NbrIndices:
  635                     continue
  636                 TorsionAtomIndices.append([Atom1NbrIndex, Atom1Index, Atom2Index, Atom2NbrIndex])
  637         
  638         TorsioAtomIndicesList.extend(TorsionAtomIndices)
  639     
  640     return TorsioAtomIndicesList
  641 
  642 def _GetRotBondAtomNeighbors(Mol, AtomIndex, BondedAtomIndex, IgnoreHydrogens):
  643     """Get atom neigbors around a rotatable bond."""
  644 
  645     Atom = Mol.GetAtomWithIdx(AtomIndex)
  646     AtomNeighbors = []
  647     
  648     for AtomNbr in Atom.GetNeighbors():
  649         AtomNbrIndex = AtomNbr.GetIdx()
  650         if AtomNbrIndex == BondedAtomIndex:
  651             continue
  652         if IgnoreHydrogens:
  653             if AtomNbr.GetAtomicNum() == 1:
  654                 continue
  655         
  656         AtomNeighbors.append(AtomNbrIndex)
  657     
  658     return AtomNeighbors
  659 
  660 def GetNumFragments(Mol):
  661     """Get number of fragment in a molecule.
  662 
  663     Arguments:
  664         Atom (object): RDKit molecule object.
  665 
  666     Returns:
  667         int : Number of fragments.
  668 
  669     """
  670     
  671     Fragments = Chem.rdmolops.GetMolFrags(Mol, asMols = False)
  672     
  673     return len(Fragments) if Fragments is not None else 0
  674 
  675 def GetNumHeavyAtomNeighbors(Atom):
  676     """Get number of heavy atom neighbors.
  677 
  678     Arguments:
  679         Atom (object): RDKit atom object.
  680 
  681     Returns:
  682         int : Number of neighbors.
  683 
  684     """
  685     
  686     NbrCount = 0
  687     for AtomNbr in Atom.GetNeighbors():
  688         if AtomNbr.GetAtomicNum() > 1:
  689             NbrCount += 1
  690     
  691     return NbrCount
  692 
  693 def GetHeavyAtomNeighbors(Atom):
  694     """Get a list of heavy atom neighbors.
  695 
  696     Arguments:
  697         Atom (object): RDKit atom object.
  698 
  699     Returns:
  700         list : List of heavy atom neighbors.
  701 
  702     """
  703     
  704     AtomNeighbors = []
  705     for AtomNbr in Atom.GetNeighbors():
  706         if AtomNbr.GetAtomicNum() > 1:
  707             AtomNeighbors.append(AtomNbr)
  708     
  709     return AtomNeighbors
  710 
  711 def IsValidElementSymbol(ElementSymbol):
  712     """Validate element symbol.
  713     
  714     Arguments:
  715         ElementSymbol (str): Element symbol
  716 
  717     Returns:
  718         bool : True - Valid element symbol; Otherwise, false. 
  719 
  720     """
  721 
  722     try:
  723         AtomicNumber = Chem.GetPeriodicTable().GetAtomicNumber(ElementSymbol)
  724         Status = True if AtomicNumber > 0  else False
  725     except Exception as ErrMsg:
  726         Status = False
  727     
  728     return Status
  729 
  730 def IsValidAtomIndex(Mol, AtomIndex):
  731     """Validate presence  atom index in a molecule.
  732     
  733     Arguments:
  734         Mol (object): RDKit molecule object.
  735         AtomIndex (int): Atom index.
  736 
  737     Returns:
  738         bool : True - Valid atom index; Otherwise, false. 
  739 
  740     """
  741     for Atom in Mol.GetAtoms():
  742         if AtomIndex == Atom.GetIdx():
  743             return True
  744     
  745     return False
  746 
  747 def AreHydrogensMissingInMolecule(Mol):
  748     """Check for any missing hydrogens in  in a molecue.
  749 
  750     Arguments:
  751         Mol (object): RDKit molecule object.
  752 
  753     Returns:
  754         bool : True - Missing hydrogens; Otherwise, false. 
  755 
  756     """
  757 
  758     for Atom in Mol.GetAtoms():
  759         NumExplicitAndImplicitHs = Atom.GetNumExplicitHs() + Atom.GetNumImplicitHs()
  760         if NumExplicitAndImplicitHs > 0:
  761             return True
  762 
  763     return False
  764 
  765 def AreAtomIndicesSequentiallyConnected(Mol, AtomIndices):
  766     """Check for the presence bonds between sequential pairs of atoms in a
  767     molecule.
  768     
  769     Arguments:
  770         Mol (object): RDKit molecule object.
  771         AtomIndices (list): List of atom indices.
  772 
  773     Returns:
  774         bool : True - Sequentially connected; Otherwise, false. 
  775 
  776     """
  777 
  778     for Index in range(0, (len(AtomIndices) -1)):
  779         Bond = Mol.GetBondBetweenAtoms(AtomIndices[Index], AtomIndices[Index + 1])
  780         if Bond is None:
  781             return False
  782         
  783         if Bond.GetIdx() is None:
  784             return False
  785     
  786     return True
  787     
  788 def ReorderAtomIndicesInSequentiallyConnectedManner(Mol, AtomIndices):
  789     """Check for the presence of sequentially connected list of atoms in an
  790     arbitray list of atoms in molecule.
  791    
  792     Arguments:
  793         Mol (object): RDKit molecule object.
  794         AtomIndices (list): List of atom indices.
  795 
  796     Returns:
  797         bool : True - Sequentially connected list found; Otherwise, false. 
  798         list : List of seqeuntially connected atoms or None.
  799 
  800     """
  801     
  802     # Count the number of neighbors for specified atom indices ensuring
  803     # that the neighbors are also part of atom indices...
  804     AtomNbrsCount = {}
  805     for AtomIndex in AtomIndices:
  806         Atom = Mol.GetAtomWithIdx(AtomIndex)
  807         
  808         AtomNbrsCount[AtomIndex] = 0
  809         for AtomNbr in Atom.GetNeighbors():
  810             AtomNbrIndex = AtomNbr.GetIdx()
  811             if AtomNbrIndex not in AtomIndices:
  812                 continue
  813             AtomNbrsCount[AtomIndex] += 1
  814     
  815     # Number of neighbors for each specified atom indices must be 1 or 2
  816     # for sequentially connected list of atom indices...
  817     AtomsWithOneNbr = []
  818     for AtomIndex, NbrsCount  in AtomNbrsCount.items():
  819         if not (NbrsCount == 1 or NbrsCount ==2):
  820             return (False, None)
  821         
  822         if NbrsCount == 1:
  823             AtomsWithOneNbr.append(AtomIndex)
  824 
  825     # A sequentially connected list of indices must have two atom indices with
  826     # exactly # one neighbor...
  827     if len(AtomsWithOneNbr) != 2:
  828             return (False, None)
  829 
  830     # Setup a reordered list of sequentially connected atoms...
  831     ReorderedAtomIndices = []
  832     
  833     AtomIndex1, AtomIndex2 = AtomsWithOneNbr
  834     AtomIndex = AtomIndex1 if AtomIndex1 < AtomIndex2 else AtomIndex2
  835     ReorderedAtomIndices.append(AtomIndex)
  836 
  837     while (len(ReorderedAtomIndices) < len(AtomIndices)):
  838         Atom = Mol.GetAtomWithIdx(AtomIndex)
  839         
  840         for AtomNbr in Atom.GetNeighbors():
  841             AtomNbrIndex = AtomNbr.GetIdx()
  842             if AtomNbrIndex not in AtomIndices:
  843                 continue
  844             
  845             if AtomNbrIndex in ReorderedAtomIndices:
  846                 continue
  847             
  848             # Treat neighbor as next connected atom...
  849             AtomIndex = AtomNbrIndex
  850             ReorderedAtomIndices.append(AtomIndex)
  851             break
  852 
  853     # Check reorderd list size...
  854     if (len(ReorderedAtomIndices) != len(AtomIndices)):
  855         return (False, None)
  856 
  857     # A final check to validate reorderd list...
  858     if not AreAtomIndicesSequentiallyConnected(Mol, ReorderedAtomIndices):
  859         return (False, None)
  860     
  861     return (True, ReorderedAtomIndices)
  862 
  863 def MolToBase64EncodedMolString(Mol, PropertyPickleFlags = Chem.PropertyPickleOptions.AllProps):
  864     """Encode RDkit molecule object into a base64 encoded string. The properties
  865     can be optionally excluded.
  866     
  867     The molecule is pickled using RDKit Mol.ToBinary() function before
  868     their encoding.
  869    
  870     Arguments:
  871         Mol (object): RDKit molecule object.
  872         PropertyPickleFlags: RDKit property pickle options.
  873 
  874     Returns:
  875         str : Base64 encode molecule string or None.
  876 
  877     Notes:
  878         The following property pickle flags are currently available in RDKit:
  879             
  880             Chem.PropertyPickleOptions.NoProps
  881             Chem.PropertyPickleOptions.MolProps
  882             Chem.PropertyPickleOptions.AtomProps
  883             Chem.PropertyPickleOptions.BondProps
  884             Chem.PropertyPickleOptions.PrivateProps
  885             Chem.PropertyPickleOptions.AllProps
  886 
  887     """
  888 
  889     return None if Mol is None else base64.b64encode(Mol.ToBinary(PropertyPickleFlags)).decode()
  890 
  891 def MolFromBase64EncodedMolString(EncodedMol):
  892     """Generate a RDKit molecule object from a base64 encoded string.
  893     
  894     Arguments:
  895         str: Base64 encoded molecule string.
  896 
  897     Returns:
  898         object : RDKit molecule object or None.
  899 
  900     """
  901 
  902     return None if EncodedMol is None else Chem.Mol(base64.b64decode(EncodedMol))
  903 
  904 def GenerateBase64EncodedMolStrings(Mols, PropertyPickleFlags = Chem.PropertyPickleOptions.AllProps):
  905     """Setup an iterator for generating base64 encoded molecule string
  906     from a RDKit molecule iterator. The iterator returns a list containing
  907     a molecule index and encoded molecule string or None.
  908     
  909     The molecules are pickled using RDKit Mol.ToBinary() function
  910     before their encoding.
  911     
  912     Arguments:
  913         iterator: RDKit molecules iterator.
  914         PropertyFlags: RDKit property pickle options.
  915 
  916     Returns:
  917         object : Base64 endcoded molecules iterator. The iterator returns a
  918             list containing a molecule index and an encoded molecule string
  919             or None.
  920 
  921     Notes:
  922         The following property pickle flags are currently available in RDKit:
  923             
  924             Chem.PropertyPickleOptions.NoProps
  925             Chem.PropertyPickleOptions.MolProps
  926             Chem.PropertyPickleOptions.AtomProps
  927             Chem.PropertyPickleOptions.BondProps
  928             Chem.PropertyPickleOptions.PrivateProps
  929             Chem.PropertyPickleOptions.AllProps
  930 
  931     Examples:
  932 
  933         EncodedMolsInfo = GenerateBase64EncodedMolStrings(Mols)
  934         for MolIndex, EncodedMol in EncodedMolsInfo:
  935             if EncodeMol is not None:
  936                 Mol = MolFromBase64EncodedMolString(EncodedMol)
  937 
  938     """
  939     for MolIndex, Mol in enumerate(Mols):
  940         yield [MolIndex, None] if Mol is None else [MolIndex, MolToBase64EncodedMolString(Mol, PropertyPickleFlags)]
  941 
  942 def GenerateBase64EncodedMolStringsWithIDs(Mols, MolIDs, PropertyPickleFlags = Chem.PropertyPickleOptions.AllProps):
  943     """Setup an iterator for generating base64 encoded molecule string
  944     from a RDKit molecule iterator. The iterator returns a list containing
  945     a molecule ID and encoded molecule string or None.
  946     
  947     The molecules are pickled using RDKit Mol.ToBinary() function
  948     before their encoding.
  949     
  950     Arguments:
  951         iterator: RDKit molecules iterator.
  952         MolIDs (list): Molecule IDs.
  953         PropertyFlags: RDKit property pickle options.
  954 
  955     Returns:
  956         object : Base64 endcoded molecules iterator. The iterator returns a
  957             list containing a molecule ID and an encoded molecule string
  958             or None.
  959 
  960     Notes:
  961         The following property pickle flags are currently available in RDKit:
  962             
  963             Chem.PropertyPickleOptions.NoProps
  964             Chem.PropertyPickleOptions.MolProps
  965             Chem.PropertyPickleOptions.AtomProps
  966             Chem.PropertyPickleOptions.BondProps
  967             Chem.PropertyPickleOptions.PrivateProps
  968             Chem.PropertyPickleOptions.AllProps
  969 
  970     Examples:
  971 
  972         EncodedMolsInfo = GenerateBase64EncodedMolStringsWithIDs(Mols)
  973         for MolID, EncodedMol in EncodedMolsInfo:
  974             if EncodeMol is not None:
  975                 Mol = MolFromBase64EncodedMolString(EncodedMol)
  976 
  977     """
  978     for MolIndex, Mol in enumerate(Mols):
  979         yield [MolIDs[MolIndex], None] if Mol is None else [MolIDs[MolIndex], MolToBase64EncodedMolString(Mol, PropertyPickleFlags)]
  980 
  981 def GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs, PropertyPickleFlags = Chem.PropertyPickleOptions.AllProps):
  982     """Setup an iterator generating base64 encoded molecule string for a 
  983     molecule. The iterator returns a list containing a molecule index, an encoded
  984     molecule string, and conf ID.
  985     
  986     The molecules are pickled using RDKit Mol.ToBinary() function
  987     before their encoding.
  988     
  989     Arguments:
  990         Mol (object): RDKit molecule object.
  991         MolIndex (int): Molecule index.
  992         ConfIDs (list): Conformer IDs.
  993         PropertyFlags: RDKit property pickle options.
  994 
  995     Returns:
  996         object : Base64 endcoded molecules iterator. The iterator returns a
  997             list containing a molecule index, an encoded molecule string, and
  998             conf ID.
  999 
 1000     Notes:
 1001         The following property pickle flags are currently available in RDKit:
 1002             
 1003             Chem.PropertyPickleOptions.NoProps
 1004             Chem.PropertyPickleOptions.MolProps
 1005             Chem.PropertyPickleOptions.AtomProps
 1006             Chem.PropertyPickleOptions.BondProps
 1007             Chem.PropertyPickleOptions.PrivateProps
 1008             Chem.PropertyPickleOptions.AllProps
 1009 
 1010     Examples:
 1011 
 1012         EncodedMolsInfo = GenerateBase64EncodedMolStringWithConfIDs(Mol, MolIndex, ConfIDs)
 1013         for MolIndex, EncodedMol, ConfID in EncodedMolsInfo:
 1014             if EncodeMol is not None:
 1015                 Mol = MolFromBase64EncodedMolString(EncodedMol)
 1016 
 1017     """
 1018     for ConfID in ConfIDs:
 1019         yield [MolIndex, None, ConfID] if Mol is None else [MolIndex, MolToBase64EncodedMolString(Mol, PropertyPickleFlags), ConfID]
 1020 
 1021 def AreAtomMapNumbersPresentInMol(Mol):
 1022     """Check for the presence of atom map numbers in a molecue.
 1023     
 1024     Arguments:
 1025         Mol (object): RDKit molecule object.
 1026 
 1027     Returns:
 1028         bool : True - Atom map numbers present; Otherwise, false. 
 1029 
 1030     """
 1031 
 1032     return False if _GetAtomMapIndices(Mol) is None else True
 1033 
 1034 def ClearAtomMapNumbers(Mol, AllowImplicitValence = True, ClearRadicalElectrons = True):
 1035     """Check and clear atom map numbers in a molecule. In addition, allow implicit
 1036     valence and clear radical electrons for atoms with associated map numbers.
 1037     
 1038     For example, the following atomic properties are assigned by RDKit to atom
 1039     map number 1 in a molecule corresponding to SMILES C[C:1](C)C:
 1040     
 1041     NoImplicit: True; ImplicitValence: 0; ExplicitValence: 3; NumExplicitHs: 0;
 1042     NumImplicitHs: 0; NumRadicalElectrons: 1
 1043     
 1044     This function clears atoms map numbers in the molecule leading to SMILES 
 1045     CC(C)C, along with optionally updating atomic properties as shown below:
 1046     
 1047     NoImplicit: False; ImplicitValence: 1; ExplicitValence: 3; NumExplicitHs: 0;
 1048     NumImplicitHs: 1; NumRadicalElectrons: 0
 1049     
 1050     Arguments:
 1051         Mol (object): RDKit molecule object.
 1052 
 1053     Returns:
 1054         Mol (object): RDKit molecule object.
 1055 
 1056     """
 1057     
 1058     AtomMapIndices = GetAtomMapIndices(Mol)
 1059     
 1060     if AtomMapIndices is None:
 1061         return Mol
 1062     
 1063     for AtomMapIndex in AtomMapIndices:
 1064         Atom = Mol.GetAtomWithIdx(AtomMapIndex)
 1065         
 1066         # Clear map number property 'molAtomMapNumber'...
 1067         Atom.SetAtomMapNum(0)
 1068         
 1069         # Allow implit valence...
 1070         if AllowImplicitValence:
 1071             Atom.SetNoImplicit(False)
 1072         
 1073         # Set number of electrons to 0...
 1074         if ClearRadicalElectrons:
 1075             Atom.SetNumRadicalElectrons(0)
 1076         
 1077         Atom.UpdatePropertyCache()
 1078     
 1079     Mol.UpdatePropertyCache()
 1080 
 1081     return Mol
 1082 
 1083 def GetAtomMapIndices(Mol):
 1084     """Get a list of available atom indices corresponding to atom map numbers
 1085     present in a SMILES/SMARTS pattern used for creating a molecule. The list of
 1086     atom indices is sorted in ascending order by atom map numbers.
 1087     
 1088     Arguments:
 1089         Mol (object): RDKit molecule object.
 1090 
 1091     Returns:
 1092         list : List of atom indices sorted in the ascending order of atom map
 1093             numbers or None.
 1094 
 1095     """
 1096     
 1097     return _GetAtomMapIndices(Mol)
 1098 
 1099 def GetAtomMapIndicesAndMapNumbers(Mol):
 1100     """Get lists of available atom indices and atom map numbers present in a
 1101     SMILES/SMARTS pattern used for creating a molecule. Both lists are sorted
 1102     in ascending order by atom map numbers.
 1103     
 1104     Arguments:
 1105         Mol (object): RDKit molecule object.
 1106 
 1107     Returns:
 1108         list : List of atom indices sorted in the ascending order of atom map
 1109             numbers or None.
 1110         list : List of atom map numbers sorted in the ascending order or None.
 1111 
 1112     """
 1113     
 1114     return (_GetAtomMapIndicesAndMapNumbers(Mol))
 1115 
 1116 def MolFromSubstructureMatch(Mol, PatternMol, AtomIndices, FilterByAtomMapNums = False):
 1117     """Generate a RDKit molecule object for a list of matched atom indices
 1118     present in a pattern molecule. The list of atom indices correspond to a
 1119     list retrieved by RDKit function GetSubstructureMatches using SMILES/SMARTS
 1120     pattern. The atom indices are optionally filtered by mapping atom numbers
 1121     to appropriate atom indices during the generation of the molecule.
 1122     For example: [O:1]=[S:2](=[O])[C:3][C:4].
 1123 
 1124     Arguments:
 1125         Mol (object): RDKit molecule object.
 1126         PatternMol (object): RDKit molecule object for a SMILES/SMARTS pattern.
 1127         AtomIndices (list): Atom indices.
 1128         FilterByAtomMapNums (bool): Filter matches by atom map numbers.
 1129 
 1130     Returns:
 1131         object : RDKit molecule object or None.
 1132 
 1133     """
 1134 
 1135     AtomMapIndices = _GetAtomMapIndices(PatternMol) if FilterByAtomMapNums else None
 1136 
 1137     return (_MolFromSubstructureMatch(Mol, PatternMol, AtomIndices, AtomMapIndices))
 1138 
 1139 def MolsFromSubstructureMatches(Mol, PatternMol, AtomIndicesList, FilterByAtomMapNums = False):
 1140     """Generate  a list of RDKit molecule objects for a list containing lists of
 1141     matched atom indices present in a pattern molecule. The list of atom indices
 1142     correspond to a list retrieved by RDKit function GetSubstructureMatches using
 1143     SMILES/SMARTS pattern. The atom indices are optionally filtered by mapping
 1144     atom numbers to appropriate atom indices during the generation of the
 1145     molecule. For example: [O:1]=[S:2](=[O])[C:3][C:4].
 1146 
 1147     Arguments:
 1148         Mol (object): RDKit molecule object.
 1149         PatternMol (object): RDKit molecule object for a SMILES/SMARTS pattern.
 1150         AtomIndicesList (list): A list of lists containing atom indices.
 1151         FilterByAtomMapNums (bool): Filter matches by atom map numbers.
 1152 
 1153     Returns:
 1154         list : A list of lists containg RDKit molecule objects or None.
 1155 
 1156     """
 1157 
 1158     AtomMapIndices = _GetAtomMapIndices(PatternMol) if FilterByAtomMapNums else None
 1159 
 1160     Mols = []
 1161     for AtomIndices in AtomIndicesList:
 1162         Mols.append(_MolFromSubstructureMatch(Mol, PatternMol, AtomIndices, AtomMapIndices))
 1163     
 1164     return Mols if len(Mols) else None
 1165 
 1166 def FilterSubstructureMatchByAtomMapNumbers(Mol, PatternMol, AtomIndices):
 1167     """Filter a list of matched atom indices by map atom numbers present in a
 1168     pattern molecule. The list of atom indices correspond to a list retrieved by
 1169     RDKit function GetSubstructureMatches using SMILES/SMARTS pattern. The
 1170     atom map numbers are mapped to appropriate atom indices during the generation
 1171     of molecules. For example: [O:1]=[S:2](=[O])[C:3][C:4].
 1172     
 1173     Arguments:
 1174         Mol (object): RDKit molecule object.
 1175         PatternMol (object): RDKit molecule object for a SMILES/SMARTS pattern.
 1176         AtomIndices (list): Atom indices.
 1177 
 1178     Returns:
 1179         list : A list of filtered atom indices.
 1180 
 1181     """
 1182     AtomMapIndices = _GetAtomMapIndices(PatternMol)
 1183 
 1184     return _FilterSubstructureMatchByAtomMapNumbers(Mol, PatternMol, AtomIndices, AtomMapIndices)
 1185 
 1186 def FilterSubstructureMatchesByAtomMapNumbers(Mol, PatternMol, AtomIndicesList):
 1187     """Filter a list of lists containing matched atom indices by map atom numbers
 1188     present in a pattern molecule. The list of atom indices correspond to a list retrieved by
 1189     RDKit function GetSubstructureMatches using SMILES/SMARTS pattern. The
 1190     atom map numbers are mapped to appropriate atom indices during the generation
 1191     of molecules. For example: [O:1]=[S:2](=[O])[C:3][C:4].
 1192      
 1193     Arguments:
 1194         Mol (object): RDKit molecule object.
 1195         PatternMol (object): RDKit molecule object for a SMILES/SMARTS pattern.
 1196         AtomIndicesList (list): A list of lists containing atom indices.
 1197 
 1198     Returns:
 1199         list : A list of lists containing filtered atom indices.
 1200 
 1201     """
 1202     AtomMapIndices = _GetAtomMapIndices(PatternMol)
 1203 
 1204     MatchedAtomIndicesList = []
 1205     for AtomIndices in AtomIndicesList:
 1206         MatchedAtomIndicesList.append(_FilterSubstructureMatchByAtomMapNumbers(Mol, PatternMol, AtomIndices, AtomMapIndices))
 1207     
 1208     return MatchedAtomIndicesList
 1209 
 1210 def _MolFromSubstructureMatch(Mol, PatternMol, AtomIndices, AtomMapIndices):
 1211     """Generate a RDKit molecule object for a list of matched atom indices and available
 1212    atom map indices.
 1213     """
 1214 
 1215     if AtomMapIndices is not None:
 1216         MatchedAtomIndices = [AtomIndices[Index] for Index in AtomMapIndices]
 1217     else:
 1218         MatchedAtomIndices = list(AtomIndices)
 1219 
 1220     return _GetMolFromAtomIndices(Mol, MatchedAtomIndices)
 1221 
 1222 def _GetAtomMapIndices(Mol):
 1223     """Get a list of available atom indices corresponding to sorted atom map
 1224     numbers present in a SMILES/SMARTS pattern used for creating a molecule.
 1225     """
 1226     
 1227     AtomMapIndices, AtomMapNumbers = _GetAtomMapIndicesAndMapNumbers(Mol)
 1228     
 1229     return AtomMapIndices
 1230     
 1231 def _GetAtomMapIndicesAndMapNumbers(Mol):
 1232     """Get a list of available atom indices and atom map numbers present
 1233     in  a SMILES/SMARTS pattern used for creating a molecule. Both lists
 1234     are sorted in ascending order by atom map numbers.
 1235     """
 1236 
 1237     # Setup a atom map number to atom indices map..
 1238     AtomMapNumToIndices = {}
 1239     for Atom in Mol.GetAtoms():
 1240         AtomMapNum = Atom.GetAtomMapNum()
 1241         
 1242         if AtomMapNum:
 1243             AtomMapNumToIndices[AtomMapNum] = Atom.GetIdx()
 1244     
 1245     # Setup atom indices corresponding to sorted atom map numbers...
 1246     AtomMapIndices = None
 1247     AtomMapNumbers = None
 1248     if len(AtomMapNumToIndices):
 1249         AtomMapNumbers = sorted(AtomMapNumToIndices)
 1250         AtomMapIndices = [AtomMapNumToIndices[AtomMapNum] for AtomMapNum in AtomMapNumbers]
 1251 
 1252     return (AtomMapIndices, AtomMapNumbers)
 1253 
 1254 def _FilterSubstructureMatchByAtomMapNumbers(Mol, PatternMol, AtomIndices, AtomMapIndices):
 1255     """Filter substructure match atom indices by atom map indices corresponding to
 1256     atom map numbers.
 1257     """
 1258     
 1259     if AtomMapIndices is None:
 1260         return list(AtomIndices)
 1261                                                
 1262     return [AtomIndices[Index] for Index in AtomMapIndices]
 1263 
 1264 def _GetMolFromAtomIndices(Mol, AtomIndices):
 1265     """Generate a RDKit molecule object from atom indices returned by
 1266    substructure search.
 1267     """
 1268 
 1269     BondIndices = []
 1270     for AtomIndex in AtomIndices:
 1271         Atom = Mol.GetAtomWithIdx(AtomIndex)
 1272         
 1273         for AtomNbr in Atom.GetNeighbors():
 1274             AtomNbrIndex = AtomNbr.GetIdx()
 1275             if AtomNbrIndex not in AtomIndices:
 1276                 continue
 1277             
 1278             BondIndex = Mol.GetBondBetweenAtoms(AtomIndex, AtomNbrIndex).GetIdx()
 1279             if BondIndex in BondIndices:
 1280                 continue
 1281                 
 1282             BondIndices.append(BondIndex)
 1283             
 1284     MatchedMol = Chem.PathToSubmol(Mol, BondIndices) if len(BondIndices) else None
 1285     
 1286     return MatchedMol
 1287 
 1288 def ConstrainAndEmbed(mol, core, coreMatchesMol=None, useTethers=True, coreConfId=-1, randomseed=2342, getForceField=AllChem.UFFGetMoleculeForceField, **kwargs):
 1289     """
 1290     The function is a local copy of RDKit fucntion AllChem.ConstrainedEmbed().
 1291     It has been enhanced to support an explicit list of core matches corresponding
 1292     to the matched atom indices in the molecule. The number of matched atom indices
 1293     must be equal to the number of atoms in core molecule.
 1294 
 1295     Arguments:
 1296         mol (object): RDKit molecule object to embed.
 1297         core (object): RDKit molecule to use as a source of constraints.
 1298         coreMatchesMol (list): A list matches atom indices in mol.
 1299         useTethers: (bool) if True, the final conformation will be optimized
 1300             subject to a series of extra forces that pull the matching atoms to
 1301             the positions of the core atoms. Otherwise simple distance
 1302             constraints based on the core atoms will be used in the
 1303             optimization.
 1304         coreConfId (int): ID of the core conformation to use.
 1305         randomSeed (int): Seed for the random number generator
 1306 
 1307     Returns:
 1308         mol (object): RDKit molecule object.
 1309 
 1310     """
 1311     if coreMatchesMol is None:
 1312         match = mol.GetSubstructMatch(core)
 1313         if not match:
 1314             raise ValueError("Molecule doesn't match the core.")
 1315     else:
 1316         if core.GetNumAtoms() != len(coreMatchesMol):
 1317             raise ValueError("Number of atoms, %s, in core molecule must match number of atom indices, %s, specified in the list coreMatchesMol." % (core.GetNumAtoms(), len(coreMatchesMol)))
 1318         # Check specified matched atom indices in  coreMatchesMol and use the match atom
 1319         # indices returned by GetSubstructMatches() for embedding...
 1320         coreMatch = None
 1321         matches = mol.GetSubstructMatches(core)
 1322         for match in matches:
 1323             if len(match) != len(coreMatchesMol):
 1324                 continue
 1325             matchFound = True
 1326             for atomIndex in match:
 1327                 if atomIndex not in coreMatchesMol:
 1328                     matchFound = False
 1329                     break
 1330             if matchFound:
 1331                 coreMatch = match
 1332                 break
 1333         if coreMatch is None:
 1334             raise ValueError("Molecule doesn't match the atom indices specified in the list coreMatchesMol.")
 1335         match = coreMatch
 1336     
 1337     coordMap = {}
 1338     coreConf = core.GetConformer(coreConfId)
 1339     for i, idxI in enumerate(match):
 1340         corePtI = coreConf.GetAtomPosition(i)
 1341         coordMap[idxI] = corePtI
 1342     
 1343     ci = AllChem.EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, **kwargs)
 1344     if ci < 0:
 1345         raise ValueError('Could not embed molecule.')
 1346     
 1347     algMap = [(j, i) for i, j in enumerate(match)]
 1348     
 1349     if not useTethers:
 1350         # clean up the conformation
 1351         ff = getForceField(mol, confId=0)
 1352         for i, idxI in enumerate(match):
 1353             for j in range(i + 1, len(match)):
 1354                 idxJ = match[j]
 1355                 d = coordMap[idxI].Distance(coordMap[idxJ])
 1356                 ff.AddDistanceConstraint(idxI, idxJ, d, d, 100.)
 1357         ff.Initialize()
 1358         n = 4
 1359         more = ff.Minimize()
 1360         while more and n:
 1361             more = ff.Minimize()
 1362             n -= 1
 1363         # rotate the embedded conformation onto the core:
 1364         rms = AllChem.AlignMol(mol, core, atomMap=algMap)
 1365     else:
 1366         # rotate the embedded conformation onto the core:
 1367         rms = AllChem.AlignMol(mol, core, atomMap=algMap)
 1368         ff = getForceField(mol, confId=0)
 1369         conf = core.GetConformer()
 1370         for i in range(core.GetNumAtoms()):
 1371             p = conf.GetAtomPosition(i)
 1372             pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1
 1373             ff.AddDistanceConstraint(pIdx, match[i], 0, 0, 100.)
 1374         ff.Initialize()
 1375         n = 4
 1376         more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
 1377         while more and n:
 1378             more = ff.Minimize(energyTol=1e-4, forceTol=1e-3)
 1379             n -= 1
 1380         # realign
 1381         rms = AllChem.AlignMol(mol, core, atomMap=algMap)
 1382 
 1383     mol.SetProp('EmbedRMS', str(rms))
 1384     return mol
 1385 
 1386 def ReadAndValidateMolecules(FileName, **KeyWordArgs):
 1387     """Read molecules from an input file, validate all molecule objects, and return
 1388     a list of valid and non-valid molecule objects along with their counts.
 1389     
 1390     Arguments:
 1391         FileName (str): Name of a file with complete path.
 1392         **KeyWordArgs (dictionary) : Parameter name and value pairs for reading and
 1393             processing molecules.
 1394 
 1395     Returns:
 1396         list : List of valid RDKit molecule objects.
 1397         int : Number of total molecules in input file. 
 1398         int : Number of valid molecules in input file. 
 1399 
 1400     Notes:
 1401         The file extension is used to determine type of the file and set up an appropriate
 1402         file reader.
 1403 
 1404     """
 1405 
 1406     AllowEmptyMols = True
 1407     if "AllowEmptyMols" in KeyWordArgs:
 1408         AllowEmptyMols = KeyWordArgs["AllowEmptyMols"]
 1409     
 1410     Mols = ReadMolecules(FileName, **KeyWordArgs)
 1411 
 1412     if AllowEmptyMols:
 1413         ValidMols = [Mol for Mol in Mols if Mol is not None]
 1414     else:
 1415         ValidMols = []
 1416         MolCount = 0
 1417         for Mol in Mols:
 1418             MolCount += 1
 1419             if Mol is None:
 1420                 continue
 1421             
 1422             if IsMolEmpty(Mol):
 1423                 MolName = GetMolName(Mol, MolCount)
 1424                 MiscUtil.PrintWarning("Ignoring empty molecule: %s" % MolName)
 1425                 continue
 1426             
 1427             ValidMols.append(Mol)
 1428             
 1429     MolCount = len(Mols)
 1430     ValidMolCount = len(ValidMols)
 1431 
 1432     return (ValidMols, MolCount, ValidMolCount)
 1433 
 1434 def ReadMolecules(FileName, **KeyWordArgs):
 1435     """Read molecules from an input file without performing any validation
 1436     and creation of molecule objects.
 1437     
 1438     Arguments:
 1439         FileName (str): Name of a file with complete path.
 1440         **KeyWordArgs (dictionary) : Parameter name and value pairs for reading and
 1441             processing molecules.
 1442 
 1443     Returns:
 1444         list : List of RDKit molecule objects.
 1445 
 1446     Notes:
 1447         The file extension is used to determine type of the file and set up an appropriate
 1448         file reader.
 1449 
 1450     """
 1451 
 1452     # Set default values for possible arguments...
 1453     ReaderArgs = {"Sanitize": True, "RemoveHydrogens": True, "StrictParsing": True,  "SMILESDelimiter" : ' ', "SMILESColumn": 1, "SMILESNameColumn": 2, "SMILESTitleLine": True }
 1454 
 1455     # Set specified values for possible arguments...
 1456     for Arg in ReaderArgs:
 1457         if Arg in KeyWordArgs:
 1458             ReaderArgs[Arg] = KeyWordArgs[Arg]
 1459 
 1460     # Modify specific valeus for SMILES...
 1461     if MiscUtil.CheckFileExt(FileName, "smi csv tsv txt"):
 1462         Args = ["Sanitize", "SMILESTitleLine"]
 1463         for Arg in Args:
 1464             if ReaderArgs[Arg] is True:
 1465                 ReaderArgs[Arg] = 1
 1466             else:
 1467                 ReaderArgs[Arg] = 0
 1468     
 1469     Mols = []
 1470     if MiscUtil.CheckFileExt(FileName, "sdf sd"):
 1471         return ReadMoleculesFromSDFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing'])
 1472     elif MiscUtil.CheckFileExt(FileName, "mol"):
 1473         return ReadMoleculesFromMolFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"], ReaderArgs['StrictParsing'])
 1474     elif MiscUtil.CheckFileExt(FileName, "mol2"):
 1475         return ReadMoleculesFromMol2File(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"])
 1476     elif MiscUtil.CheckFileExt(FileName, "pdb"):
 1477         return ReadMoleculesFromPDBFile(FileName, ReaderArgs["Sanitize"], ReaderArgs["RemoveHydrogens"])
 1478     elif MiscUtil.CheckFileExt(FileName, "smi txt csv tsv"):
 1479         SMILESColumnIndex = ReaderArgs["SMILESColumn"] - 1
 1480         SMILESNameColumnIndex = ReaderArgs["SMILESNameColumn"] - 1
 1481         return ReadMoleculesFromSMILESFile(FileName, ReaderArgs["SMILESDelimiter"], SMILESColumnIndex, SMILESNameColumnIndex, ReaderArgs["SMILESTitleLine"], ReaderArgs["Sanitize"])
 1482     else:
 1483         MiscUtil.PrintWarning("RDKitUtil.ReadMolecules: Non supported file type: %s" % FileName)
 1484     
 1485     return Mols
 1486 
 1487 def ReadMoleculesFromSDFile(FileName, Sanitize = True, RemoveHydrogens = True, StrictParsing = True):
 1488     """Read molecules from a SD file.
 1489     
 1490     Arguments:
 1491         FileName (str): Name of a file with complete path.
 1492         Sanitize (bool): Sanitize molecules.
 1493         RemoveHydrogens (bool): Remove hydrogens from molecules.
 1494         StrictParsing (bool): Perform strict parsing.
 1495 
 1496     Returns:
 1497         list : List of RDKit molecule objects.
 1498 
 1499     """
 1500     return  Chem.SDMolSupplier(FileName, sanitize = Sanitize, removeHs = RemoveHydrogens, strictParsing = StrictParsing)
 1501 
 1502 def ReadMoleculesFromMolFile(FileName, Sanitize = True, RemoveHydrogens = True, StrictParsing = True):
 1503     """Read molecule from a MDL Mol file.
 1504     
 1505     Arguments:
 1506         FileName (str): Name of a file with complete path.
 1507         Sanitize (bool): Sanitize molecules.
 1508         RemoveHydrogens (bool): Remove hydrogens from molecules.
 1509         StrictParsing (bool): Perform strict parsing.
 1510 
 1511     Returns:
 1512         list : List of RDKit molecule objects.
 1513 
 1514     """
 1515     
 1516     Mols = []
 1517     Mols.append(Chem.MolFromMolFile(FileName, sanitize = Sanitize, removeHs = RemoveHydrogens, strictParsing = StrictParsing))
 1518     return Mols
 1519 
 1520 
 1521 def ReadMoleculesFromMol2File(FileName, Sanitize = True, RemoveHydrogens = True):
 1522     """Read molecules from a Tripos Mol2  file. The first call to the function
 1523     creates and returns  a generator object using Python yield statement. The
 1524     molecules are created during the subsequent iteration by the generator object.
 1525 
 1526     Arguments:
 1527         FileName (str): Name of a file with complete path.
 1528         Sanitize (bool): Sanitize molecules.
 1529         RemoveHydrogens (bool): Remove hydrogens from molecules.
 1530 
 1531     Returns:
 1532         list : A Python generator object for iterating over the molecules.
 1533 
 1534     """
 1535 
 1536     return _Mol2MolSupplier(FileName, Sanitize, RemoveHydrogens)
 1537 
 1538 def _Mol2MolSupplier(FileName, Sanitize = True, RemoveHydrogens = True):
 1539     """Read molecules from a Tripos Mol2  file."""
 1540 
 1541     fh = open(FileName, 'r')
 1542     
 1543     FirstMol = True
 1544     ProcessingMol = False
 1545     
 1546     for Line in fh:
 1547         if re.match("^#", Line, re.I):
 1548             continue
 1549         
 1550         if re.match("^@<TRIPOS>MOLECULE", Line, re.I):
 1551             ProcessingMol = True
 1552             
 1553             if FirstMol:
 1554                 FirstMol = False
 1555                 
 1556                 MolLines = []
 1557                 MolLines.append(Line)
 1558                 continue
 1559             
 1560             # Process lines for existing molecule...
 1561             MolBlock = "".join(MolLines)
 1562             
 1563             Mol = Chem.MolFromMol2Block(MolBlock, sanitize = Sanitize, removeHs = RemoveHydrogens)
 1564             yield Mol
 1565             
 1566             # Track lines for next molecule...
 1567             MolLines = []
 1568             MolLines.append(Line)
 1569             continue
 1570         
 1571         if not ProcessingMol:
 1572             continue
 1573         
 1574         MolLines.append(Line)
 1575         
 1576     fh.close
 1577 
 1578     # Process last molecule...
 1579     if len(MolLines):
 1580         MolBlock = "".join(MolLines)
 1581         Mol = Chem.MolFromMol2Block(MolBlock, sanitize = Sanitize, removeHs = RemoveHydrogens)
 1582         yield Mol
 1583     
 1584 def ReadMoleculesFromPDBFile(FileName, Sanitize = True, RemoveHydrogens = True):
 1585     """Read molecule from a PDB  file.
 1586     
 1587     Arguments:
 1588         FileName (str): Name of a file with complete path.
 1589         Sanitize (bool): Sanitize molecules.
 1590         RemoveHydrogens (bool): Remove hydrogens from molecules.
 1591 
 1592     Returns:
 1593         list : List of RDKit molecule objects.
 1594 
 1595     """
 1596     
 1597     Mols = []
 1598     Mols.append(Chem.MolFromPDBFile(FileName,  sanitize = Sanitize, removeHs = RemoveHydrogens))
 1599     return Mols
 1600 
 1601 def ReadMoleculesFromSMILESFile(FileName, SMILESDelimiter = ' ', SMILESColIndex = 0, SMILESNameColIndex = 1, SMILESTitleLine = 1, Sanitize = 1):
 1602     """Read molecules from a SMILES file.
 1603     
 1604     Arguments:
 1605         SMILESDelimiter (str): Delimiter for parsing SMILES line
 1606         SMILESColIndex (int): Column index containing SMILES string.
 1607         SMILESNameColIndex (int): Column index containing molecule name.
 1608         SMILESTitleLine (int): Flag to indicate presence of title line.
 1609         Sanitize (int): Sanitize molecules.
 1610 
 1611     Returns:
 1612         list : List of RDKit molecule objects.
 1613 
 1614     """
 1615     
 1616     return  Chem.SmilesMolSupplier(FileName, delimiter = SMILESDelimiter, smilesColumn = SMILESColIndex, nameColumn = SMILESNameColIndex, titleLine = SMILESTitleLine, sanitize = Sanitize)
 1617 
 1618 def MoleculesWriter(FileName, **KeyWordArgs):
 1619     """Set up a molecule writer.
 1620     
 1621     Arguments:
 1622         FileName (str): Name of a file with complete path.
 1623         **KeyWordArgs (dictionary) : Parameter name and value pairs for writing and
 1624             processing molecules.
 1625 
 1626     Returns:
 1627         RDKit object : Molecule writer.
 1628 
 1629     Notes:
 1630         The file extension is used to determine type of the file and set up an appropriate
 1631         file writer.
 1632 
 1633     """
 1634     
 1635     # Set default values for possible arguments...
 1636     WriterArgs = {"Compute2DCoords" : False, "Kekulize": True, "ForceV3000": False, "SMILESKekulize": False, "SMILESDelimiter" : ' ', "SMILESIsomeric": True, "SMILESTitleLine": True, "SMILESMolName": True}
 1637 
 1638     # Set specified values for possible arguments...
 1639     for Arg in WriterArgs:
 1640         if Arg in KeyWordArgs:
 1641             WriterArgs[Arg] = KeyWordArgs[Arg]
 1642     
 1643     Writer = None
 1644     if MiscUtil.CheckFileExt(FileName, "sdf sd"):
 1645         Writer = Chem.SDWriter(FileName)
 1646         Writer.SetKekulize(WriterArgs["Kekulize"])
 1647         Writer.SetForceV3000(WriterArgs["ForceV3000"])
 1648     elif MiscUtil.CheckFileExt(FileName, "pdb"):
 1649         Writer = Chem.PDBWriter(FileName)
 1650     elif MiscUtil.CheckFileExt(FileName, "smi"):
 1651         # Text for the name column in the title line. Blank indicates not to include name column
 1652         # in the output file...
 1653         NameHeader = 'Name' if WriterArgs["SMILESMolName"] else ''
 1654         Writer = Chem.SmilesWriter(FileName, delimiter = WriterArgs["SMILESDelimiter"], nameHeader = NameHeader, includeHeader = WriterArgs["SMILESTitleLine"],  isomericSmiles = WriterArgs["SMILESIsomeric"], kekuleSmiles = WriterArgs["SMILESKekulize"])
 1655     else:
 1656         MiscUtil.PrintWarning("RDKitUtil.WriteMolecules: Non supported file type: %s" % FileName)
 1657     
 1658     return Writer
 1659     
 1660 def WriteMolecules(FileName, Mols, **KeyWordArgs):
 1661     """Write molecules to an output file.
 1662     
 1663     Arguments:
 1664         FileName (str): Name of a file with complete path.
 1665         Mols (list): List of RDKit molecule objects. 
 1666         **KeyWordArgs (dictionary) : Parameter name and value pairs for writing and
 1667             processing molecules.
 1668 
 1669     Returns:
 1670         int : Number of total molecules.
 1671         int : Number of processed molecules written to output file.
 1672 
 1673     Notes:
 1674         The file extension is used to determine type of the file and set up an appropriate
 1675         file writer.
 1676 
 1677     """
 1678     
 1679     Compute2DCoords = False
 1680     if "Compute2DCoords" in KeyWordArgs:
 1681         Compute2DCoords = KeyWordArgs["Compute2DCoords"]
 1682     
 1683     SetSMILESMolProps = KeyWordArgs["SetSMILESMolProps"] if "SetSMILESMolProps" in KeyWordArgs else False
 1684         
 1685     MolCount = 0
 1686     ProcessedMolCount = 0
 1687     
 1688     Writer = MoleculesWriter(FileName, **KeyWordArgs)
 1689     
 1690     if Writer is None:
 1691         return (MolCount, ProcessedMolCount)
 1692     
 1693     FirstMol = True
 1694     for Mol in Mols:
 1695         MolCount += 1
 1696         if Mol is None:
 1697             continue
 1698 
 1699         if FirstMol:
 1700             FirstMol = False
 1701             if SetSMILESMolProps:
 1702                 SetWriterMolProps(Writer, Mol)
 1703                 
 1704         ProcessedMolCount += 1
 1705         if Compute2DCoords:
 1706             AllChem.Compute2DCoords(Mol)
 1707         
 1708         Writer.write(Mol)
 1709     
 1710     Writer.close()
 1711     
 1712     return (MolCount, ProcessedMolCount)
 1713 
 1714 def SetWriterMolProps(Writer, Mol):
 1715     """Setup molecule properties for a writer to output.
 1716     
 1717     Arguments:
 1718         Writer (object): RDKit writer object.
 1719         Mol (object): RDKit molecule object.
 1720 
 1721     Returns:
 1722         object : Writer object.
 1723 
 1724     """
 1725     PropNames = list(Mol.GetPropNames())
 1726     if len(PropNames):
 1727         Writer.SetProps(PropNames)
 1728         
 1729     return Writer
 1730