1 #!/bin/env python 2 # 3 # File: RDKitClusterMolecules.py 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # The functionality available in this script is implemented using RDKit, an 9 # open source toolkit for cheminformatics developed by Greg Landrum. 10 # 11 # This file is part of MayaChemTools. 12 # 13 # MayaChemTools is free software; you can redistribute it and/or modify it under 14 # the terms of the GNU Lesser General Public License as published by the Free 15 # Software Foundation; either version 3 of the License, or (at your option) any 16 # later version. 17 # 18 # MayaChemTools is distributed in the hope that it will be useful, but without 19 # any warranty; without even the implied warranty of merchantability of fitness 20 # for a particular purpose. See the GNU Lesser General Public License for more 21 # details. 22 # 23 # You should have received a copy of the GNU Lesser General Public License 24 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 25 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 26 # Boston, MA, 02111-1307, USA. 27 # 28 29 from __future__ import print_function 30 31 # Add local python path to the global path and import standard library modules... 32 import os 33 import sys; sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), "..", "lib", "Python")) 34 import time 35 import re 36 37 # RDKit imports... 38 try: 39 from rdkit import rdBase 40 from rdkit import Chem 41 from rdkit.Chem import AllChem 42 from rdkit import DataStructs 43 from rdkit.Chem.Fingerprints import FingerprintMols 44 from rdkit.Chem import rdMolDescriptors 45 from rdkit.ML.Cluster import Butina 46 from rdkit.SimDivFilters import rdSimDivPickers 47 from rdkit.SimDivFilters.rdSimDivPickers import HierarchicalClusterPicker 48 except ImportError as ErrMsg: 49 sys.stderr.write("\nFailed to import RDKit module/package: %s\n" % ErrMsg) 50 sys.stderr.write("Check/update your RDKit environment and try again.\n\n") 51 sys.exit(1) 52 53 # MayaChemTools imports... 54 try: 55 from docopt import docopt 56 import MiscUtil 57 import RDKitUtil 58 except ImportError as ErrMsg: 59 sys.stderr.write("\nFailed to import MayaChemTools module/package: %s\n" % ErrMsg) 60 sys.stderr.write("Check/update your MayaChemTools environment and try again.\n\n") 61 sys.exit(1) 62 63 ScriptName = os.path.basename(sys.argv[0]) 64 Options = {} 65 OptionsInfo = {} 66 67 def main(): 68 """Start execution of the script.""" 69 70 MiscUtil.PrintInfo("\n%s (RDKit v%s; MayaChemTools v%s; %s): Starting...\n" % (ScriptName, rdBase.rdkitVersion, MiscUtil.GetMayaChemToolsVersion(), time.asctime())) 71 72 (WallClockTime, ProcessorTime) = MiscUtil.GetWallClockAndProcessorTime() 73 74 # Retrieve command line arguments and options... 75 RetrieveOptions() 76 77 # Process and validate command line arguments and options... 78 ProcessOptions() 79 80 # Perform actions required by the script... 81 ClusterMolecules() 82 83 MiscUtil.PrintInfo("\n%s: Done...\n" % ScriptName) 84 MiscUtil.PrintInfo("Total time: %s" % MiscUtil.GetFormattedElapsedTime(WallClockTime, ProcessorTime)) 85 86 def ClusterMolecules(): 87 """Cluster molecules.""" 88 89 Mols = RetrieveMolecules() 90 MolsFingerprints = GenerateFingerprints(Mols) 91 MolsClusters = PerformClustering(Mols, MolsFingerprints) 92 93 WriteMolecules(MolsClusters) 94 95 def PerformClustering(Mols, MolsFingerprints): 96 """Perform clustering.""" 97 98 ClusteredMols = [] 99 if re.match("^Butina$", OptionsInfo["ClusteringMethod"], re.I): 100 return PerformButinaClustering(Mols, MolsFingerprints) 101 else: 102 return PerformHierarchicalClustering(Mols, MolsFingerprints) 103 104 return ClusteredMols 105 106 def PerformButinaClustering(Mols, MolsFingerprints): 107 """Perform clustering using Butina methodology.""" 108 109 MiscUtil.PrintInfo("\nClustering molecules using Butina methodology and %s similarity metric..." % OptionsInfo["SimilarityMetric"]) 110 111 FingerprintsCount = len(MolsFingerprints) 112 DistanceCutoff = 1 - OptionsInfo["ButinaSimilarityCutoff"] 113 Reordering = OptionsInfo["ButinaReordering"] 114 115 DistanceMatrix = GenerateLowerTriangularDistanceMatrix(MolsFingerprints) 116 117 ClusteredMolIndices = Butina.ClusterData(DistanceMatrix, FingerprintsCount, DistanceCutoff, reordering = Reordering, isDistData = True) 118 119 MolsClusters = [] 120 for Cluster in ClusteredMolIndices: 121 MolsCluster = [Mols[MolIndex] for MolIndex in Cluster] 122 MolsClusters.append(MolsCluster) 123 124 return MolsClusters 125 126 def PerformHierarchicalClustering(Mols, MolsFingerprints): 127 """Perform hierarchical clustering.""" 128 129 try: 130 import numpy 131 except ImportError: 132 MiscUtil.PrintError("Failed to import numpy python module. This is required to cluster molecules using hierarchical clustering methodology.") 133 134 if OptionsInfo["NumClusters"] > len(Mols): 135 MiscUtil.PrintError("The number of clusters, %d, specified using \"-n, --numClusters\" must be less than total number of valid molecules, %d" % (OptionsInfo["NumClusters"], len(Mols))) 136 137 MiscUtil.PrintInfo("\nCluster molecules using %s hierarchical clustering methodology and %s similarity metric..." % (OptionsInfo["SpecifiedHierarchicalClusteringMethod"], OptionsInfo["SimilarityMetric"])) 138 139 NumFingerprints = len(MolsFingerprints) 140 NumClusters = OptionsInfo["NumClusters"] 141 DistanceMatrix = GenerateLowerTriangularDistanceMatrix(MolsFingerprints) 142 143 ClusterPicker = HierarchicalClusterPicker(OptionsInfo["SpecifiedHierarchicalClusteringMethodID"]) 144 ClusteredMolIndices = ClusterPicker.Cluster(numpy.asarray(DistanceMatrix), NumFingerprints, NumClusters) 145 146 MolsClusters = [] 147 for Cluster in ClusteredMolIndices: 148 MolsCluster = [Mols[MolIndex] for MolIndex in Cluster] 149 MolsClusters.append(MolsCluster) 150 151 return MolsClusters 152 153 def WriteMolecules(MolsClusters): 154 """Write out molecules for each cluster along with cluster numbers.""" 155 156 ClustersCount = len(MolsClusters) 157 158 SingleOutFileMode = OptionsInfo["SingleOutFileMode"] 159 TextOutFileMode = OptionsInfo["TextOutFileMode"] 160 TextOutFileDelim = OptionsInfo["TextOutFileDelim"] 161 162 Compute2DCoords = OptionsInfo["OutfileParams"]["Compute2DCoords"] 163 164 SMILESIsomeric = OptionsInfo["OutfileParams"]["SMILESIsomeric"] 165 SMILESKekulize = OptionsInfo["OutfileParams"]["SMILESKekulize"] 166 167 # Setup outfile names and writers... 168 SetupClustersOutFilesNames(len(MolsClusters)) 169 SingleClusterWriter, ClustersOutfilesWriters = SetupMoleculeWriters(ClustersCount) 170 171 MolCount = 0 172 SingleMolClustersCount = 0 173 174 if SingleOutFileMode: 175 Writer = SingleClusterWriter 176 177 for ClusterIndex in range(0, ClustersCount): 178 MolsCluster = MolsClusters[ClusterIndex] 179 ClusterNum = ClusterIndex + 1 180 181 if len(MolsCluster) == 1: 182 SingleMolClustersCount += 1 183 184 if not SingleOutFileMode: 185 Writer = ClustersOutfilesWriters[ClusterIndex] 186 187 for Mol in MolsCluster: 188 MolCount += 1 189 190 if TextOutFileMode: 191 # Write out text file including SMILES file... 192 SMILES = Chem.MolToSmiles(Mol, isomericSmiles = SMILESIsomeric, kekuleSmiles = SMILESKekulize) 193 MolName = RDKitUtil.GetMolName(Mol, MolCount) 194 Line = TextOutFileDelim.join([SMILES, MolName, "%d" % ClusterNum]) 195 Writer.write("%s\n" % Line) 196 else: 197 # Write out SD file... 198 Mol.SetProp("ClusterNumber", "%s" % ClusterNum) 199 if Compute2DCoords: 200 AllChem.Compute2DCoords(Mol) 201 Writer.write(Mol) 202 203 if SingleClusterWriter is not None: 204 SingleClusterWriter.close() 205 for ClusterOutfileWriter in ClustersOutfilesWriters: 206 ClusterOutfileWriter.close() 207 208 MiscUtil.PrintInfo("\nTotal number of clusters: %d" % ClustersCount) 209 210 if ClustersCount > 0: 211 MiscUtil.PrintInfo("\nNumber of clusters containing only a single molecule: %d" % SingleMolClustersCount) 212 MiscUtil.PrintInfo("Average number of molecules per cluster: %.1f" % (MolCount/ClustersCount)) 213 214 MiscUtil.PrintInfo("\nNumber of molecules in each cluster:") 215 MiscUtil.PrintInfo("ClusterNumber,MolCount") 216 ClusterNum = 0 217 for MolsCluster in MolsClusters: 218 ClusterNum += 1 219 MiscUtil.PrintInfo("%d,%d" % (ClusterNum, len(MolsCluster))) 220 221 def RetrieveMolecules(): 222 """Retrieve molecules.""" 223 224 Infile = OptionsInfo["Infile"] 225 226 # Read molecules... 227 MiscUtil.PrintInfo("\nReading file %s..." % Infile) 228 OptionsInfo["InfileParams"]["AllowEmptyMols"] = False 229 ValidMols, MolCount, ValidMolCount = RDKitUtil.ReadAndValidateMolecules(Infile, **OptionsInfo["InfileParams"]) 230 231 MiscUtil.PrintInfo("Total number of molecules: %d" % MolCount) 232 MiscUtil.PrintInfo("Number of valid molecules: %d" % ValidMolCount) 233 MiscUtil.PrintInfo("Number of ignored molecules: %d" % (MolCount - ValidMolCount)) 234 235 return ValidMols 236 237 def GenerateFingerprints(Mols): 238 """Generate fingerprints.""" 239 240 FingerprintsName = OptionsInfo["SpecifiedFingerprints"] 241 242 MolsFingerprints = [] 243 if re.match("^AtomPairs$", FingerprintsName, re.I): 244 return GenerateAtomPairsFingerprints(Mols) 245 elif re.match("^MACCS166Keys$", FingerprintsName, re.I): 246 return GenerateMACCS166KeysFingerprints(Mols) 247 elif re.match("^Morgan$", FingerprintsName, re.I): 248 return GenerateMorganFingerprints(Mols) 249 elif re.match("^MorganFeatures$", FingerprintsName, re.I): 250 return GenerateMorganFeaturesFingerprints(Mols) 251 elif re.match("^PathLength$", FingerprintsName, re.I): 252 return GeneratePathLengthFingerprints(Mols) 253 elif re.match("^TopologicalTorsions$", FingerprintsName, re.I): 254 return GenerateTopologicalTorsionsFingerprints(Mols) 255 else: 256 MiscUtil.PrintError("Fingerprints name, %s, is not a valid name" % FingerprintsName) 257 258 return MolsFingerprints 259 260 def GenerateAtomPairsFingerprints(Mols): 261 """Generate AtomPairs fingerprints.""" 262 263 MiscUtil.PrintInfo("\nGenerating AtomPairs %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) 264 265 MinLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MinLength"] 266 MaxLength = OptionsInfo["FingerprintsParams"]["AtomPairs"]["MaxLength"] 267 UseChirality = OptionsInfo["FingerprintsParams"]["AtomPairs"]["UseChirality"] 268 FPSize = OptionsInfo["FingerprintsParams"]["AtomPairs"]["FPSize"] 269 BitsPerHash = OptionsInfo["FingerprintsParams"]["AtomPairs"]["BitsPerHash"] 270 271 if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): 272 # Generate ExplicitBitVect fingerprints... 273 MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash)) 274 MolsFingerprints = [rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality, nBits = FPSize, nBitsPerEntry = BitsPerHash) for Mol in Mols] 275 else: 276 # Generate IntSparseIntVect fingerprints... 277 MolsFingerprints = [rdMolDescriptors.GetAtomPairFingerprint(Mol, minLength = MinLength, maxLength = MaxLength, includeChirality = UseChirality) for Mol in Mols] 278 279 return MolsFingerprints 280 281 def GenerateMACCS166KeysFingerprints(Mols): 282 """Generate MACCS166Keys fingerprints.""" 283 284 MiscUtil.PrintInfo("\nGenerating MACCS166Keys %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) 285 286 # Generate ExplicitBitVect fingerprints... 287 MolsFingerprints = [rdMolDescriptors.GetMACCSKeysFingerprint(Mol) for Mol in Mols] 288 289 return MolsFingerprints 290 291 def GenerateMorganFingerprints(Mols): 292 """Generate Morgan fingerprints.""" 293 294 MiscUtil.PrintInfo("\nGenerating Morgan %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) 295 296 Radius = OptionsInfo["FingerprintsParams"]["Morgan"]["Radius"] 297 UseChirality = OptionsInfo["FingerprintsParams"]["Morgan"]["UseChirality"] 298 FPSize = OptionsInfo["FingerprintsParams"]["Morgan"]["FPSize"] 299 UseFeatures = False 300 301 if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): 302 # Generate ExplicitBitVect fingerprints... 303 MiscUtil.PrintInfo("FPSize: %s" % (FPSize)) 304 MolsFingerprints = [rdMolDescriptors.GetMorganFingerprintAsBitVect(Mol, Radius, useFeatures = UseFeatures, useChirality = UseChirality, nBits = FPSize) for Mol in Mols] 305 else: 306 # Generate UIntSparseIntVect fingerprints... 307 MolsFingerprints = [rdMolDescriptors.GetMorganFingerprint(Mol, Radius, useFeatures = UseFeatures, useChirality = UseChirality) for Mol in Mols] 308 309 return MolsFingerprints 310 311 def GenerateMorganFeaturesFingerprints(Mols): 312 """Generate MorganFeatures fingerprints.""" 313 314 MiscUtil.PrintInfo("\nGenerating MorganFeatures %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) 315 316 # Setup fingerprints parameters... 317 Radius = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["Radius"] 318 UseChirality = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["UseChirality"] 319 FPSize = OptionsInfo["FingerprintsParams"]["MorganFeatures"]["FPSize"] 320 UseFeatures = True 321 322 if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): 323 # Generate ExplicitBitVect fingerprints... 324 MiscUtil.PrintInfo("FPSize: %s" % (FPSize)) 325 MolsFingerprints = [rdMolDescriptors.GetMorganFingerprintAsBitVect(Mol, Radius, useFeatures = UseFeatures, useChirality = UseChirality, nBits = FPSize) for Mol in Mols] 326 else: 327 # Generate UIntSparseIntVect fingerprints... 328 MolsFingerprints = [rdMolDescriptors.GetMorganFingerprint(Mol, Radius, useFeatures = UseFeatures, useChirality = UseChirality) for Mol in Mols] 329 330 return MolsFingerprints 331 332 def GeneratePathLengthFingerprints(Mols): 333 """Generate PathLength fingerprints.""" 334 335 MiscUtil.PrintInfo("\nGenerating PathLength %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) 336 337 MinPath = OptionsInfo["FingerprintsParams"]["PathLength"]["MinPath"] 338 MaxPath = OptionsInfo["FingerprintsParams"]["PathLength"]["MaxPath"] 339 FPSize = OptionsInfo["FingerprintsParams"]["PathLength"]["FPSize"] 340 BitsPerHash = OptionsInfo["FingerprintsParams"]["PathLength"]["BitsPerHash"] 341 UseHs = False 342 TargetDensity = 0.3 343 MinSize = 54 344 345 # Generate ExplicitBitVect fingerprints... 346 MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash)) 347 MolsFingerprints = [FingerprintMols.FingerprintMol(Mol, minPath = MinPath, maxPath = MaxPath, fpSize = FPSize, bitsPerHash = BitsPerHash, useHs = UseHs, tgtDensity = TargetDensity, minSize = MinSize) for Mol in Mols] 348 349 return MolsFingerprints 350 351 def GenerateTopologicalTorsionsFingerprints(Mols): 352 """Generate TopologicalTorsions fingerprints.""" 353 354 MiscUtil.PrintInfo("\nGenerating TopologicalTorsions %s fingerprints..." % OptionsInfo["SpecifiedFingerprintsType"]) 355 356 UseChirality = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"]["UseChirality"] 357 FPSize = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"]["FPSize"] 358 BitsPerHash = OptionsInfo["FingerprintsParams"]["TopologicalTorsions"]["BitsPerHash"] 359 360 if re.match("^BitVect$", OptionsInfo["SpecifiedFingerprintsType"], re.I): 361 # Generate ExplicitBitVect fingerprints... 362 MiscUtil.PrintInfo("FPSize: %s; BitsPerHash: %s" % (FPSize, BitsPerHash)) 363 MolsFingerprints = [rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(Mol, includeChirality = UseChirality, nBits = FPSize, nBitsPerEntry = BitsPerHash) for Mol in Mols] 364 else: 365 # Generate LongSparseIntVect fingerprint... 366 MolsFingerprints = [rdMolDescriptors.GetTopologicalTorsionFingerprint(Mol, includeChirality = UseChirality) for Mol in Mols] 367 368 return MolsFingerprints 369 370 def GenerateLowerTriangularDistanceMatrix(MolsFingerprints): 371 """Generate a lower triangular distance matrix without the diagonal.""" 372 373 SimilarityFunction = OptionsInfo["SimilarityFunction"] 374 375 DistanceMatrix = [] 376 NumFPs = len(MolsFingerprints) 377 for Index1 in range(0, NumFPs): 378 for Index2 in range(0, Index1): 379 Distance = 1 - SimilarityFunction(MolsFingerprints[Index1], MolsFingerprints[Index2],) 380 DistanceMatrix.append(Distance) 381 382 return DistanceMatrix 383 384 def SetupMoleculeWriters(ClustersCount): 385 """Set up molecule writers for SD and text files.""" 386 387 Writer = None 388 ClustersOutfilesWriters = [] 389 390 TextOutFileMode = OptionsInfo["TextOutFileMode"] 391 TextOutFileDelim = OptionsInfo["TextOutFileDelim"] 392 TextOutFileTitleLine = OptionsInfo["TextOutFileTitleLine"] 393 394 if OptionsInfo["SingleOutFileMode"]: 395 Outfile = OptionsInfo["Outfile"] 396 if TextOutFileMode: 397 Writer = open(Outfile, "w") 398 else: 399 Writer = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 400 if Writer is None: 401 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 402 403 if TextOutFileMode: 404 if TextOutFileTitleLine: 405 WriteTextFileHeaderLine(Writer, TextOutFileDelim) 406 407 MiscUtil.PrintInfo("Generating file %s..." % Outfile) 408 else: 409 for ClusterIndex in range(0, ClustersCount): 410 Outfile = OptionsInfo["ClustersOutfiles"][ClusterIndex] 411 if TextOutFileMode: 412 ClusterWriter = open(Outfile, "w") 413 else: 414 ClusterWriter = RDKitUtil.MoleculesWriter(Outfile, **OptionsInfo["OutfileParams"]) 415 if ClusterWriter is None: 416 MiscUtil.PrintError("Failed to setup a writer for output fie %s " % Outfile) 417 418 if TextOutFileMode: 419 if TextOutFileTitleLine: 420 WriteTextFileHeaderLine(ClusterWriter, TextOutFileDelim) 421 422 ClustersOutfilesWriters.append(ClusterWriter) 423 424 if ClustersCount > 4: 425 MiscUtil.PrintInfo("Generating %d output files with the following file name format: %s_Cluster<Num>.%s" % (ClustersCount, OptionsInfo["OutfileBasename"], OptionsInfo["OutfileExt"])) 426 else: 427 Delmiter = ',' 428 OutfileNames = Delmiter.join(OptionsInfo["ClustersOutfiles"]) 429 MiscUtil.PrintInfo("Generating %d output files: %s..." % (ClustersCount, OutfileNames)) 430 431 return (Writer, ClustersOutfilesWriters) 432 433 def WriteTextFileHeaderLine(Writer, TextOutFileDelim): 434 """Write out a header line for text files including SMILEs file.""" 435 436 Line = TextOutFileDelim.join(["SMILES", "Name", "ClusterNumber"]) 437 Writer.write("%s\n" % Line) 438 439 def SetupClustersOutFilesNames(ClustersCount): 440 """Set up out file names for clusters.""" 441 442 OptionsInfo["ClustersOutfiles"] = [] 443 if OptionsInfo["SingleOutFileMode"] or ClustersCount == 0: 444 # Nothing to do... 445 return 446 447 OutfileBasename = OptionsInfo["OutfileBasename"] 448 OutfileExt = OptionsInfo["OutfileExt"] 449 450 ClusterOutfiles = [] 451 for ClusterIndex in range(0, ClustersCount): 452 ClusterNum = ClusterIndex + 1 453 ClusterOutfile = "%s_Cluster%d.%s" % (OutfileBasename, ClusterNum, OutfileExt) 454 ClusterOutfiles.append(ClusterOutfile) 455 456 OptionsInfo["ClustersOutfiles"] = ClusterOutfiles 457 458 def ProcessFingerprintsParameters(): 459 """Set up and process fingerprints parameters.""" 460 461 SetupFingerprintsNamesAndParameters() 462 463 ProcessSpecifiedFingerprintsName() 464 ProcessSpecifiedFingerprintsType() 465 466 ProcessSpecifiedFingerprintsParameters() 467 468 def SetupFingerprintsNamesAndParameters(): 469 """Set up fingerprints parameters.""" 470 471 OptionsInfo["FingerprintsNames"] = ["AtomPairs", "MACCS166Keys", "Morgan", "MorganFeatures", "PathLength", "TopologicalTorsions"] 472 473 OptionsInfo["FingerprintsParams"] = {} 474 OptionsInfo["FingerprintsParams"]["AtomPairs"] = {"MinLength": 1, "MaxLength": 30, "UseChirality": False, "FPSize": 2048, "BitsPerHash": 2} 475 OptionsInfo["FingerprintsParams"]["MACCS166Keys"] = {} 476 OptionsInfo["FingerprintsParams"]["Morgan"] = {"Radius": 2, "UseChirality": False, "FPSize": 2048} 477 OptionsInfo["FingerprintsParams"]["MorganFeatures"] = {"Radius": 2, "UseChirality": False, "FPSize": 2048} 478 OptionsInfo["FingerprintsParams"]["TopologicalTorsions"] = {"UseChirality": False, "FPSize": 2048, "BitsPerHash": 4} 479 OptionsInfo["FingerprintsParams"]["PathLength"] = {"MinPath": 1, "MaxPath": 7, "FPSize": 2048, "BitsPerHash": 2} 480 481 def ProcessSpecifiedFingerprintsName(): 482 """Process specified fingerprints name.""" 483 484 # Set up a canonical fingerprints name map... 485 CanonicalFingerprintsNamesMap = {} 486 for Name in OptionsInfo["FingerprintsNames"]: 487 CanonicalName = Name.lower() 488 CanonicalFingerprintsNamesMap[CanonicalName] = Name 489 490 # Validate specified fingerprints name... 491 CanonicalFingerprintsName = OptionsInfo["Fingerprints"].lower() 492 if CanonicalFingerprintsName not in CanonicalFingerprintsNamesMap: 493 MiscUtil.PrintError("The fingerprints name, %s, specified using \"-f, --fingerprints\" option is not a valid name." % (OptionsInfo["Fingerprints"])) 494 495 OptionsInfo["SpecifiedFingerprints"] = CanonicalFingerprintsNamesMap[CanonicalFingerprintsName] 496 497 def ProcessSpecifiedFingerprintsType(): 498 """Process specified fingerprints type.""" 499 500 FingerprintsName = OptionsInfo["SpecifiedFingerprints"] 501 FingerprintsType = OptionsInfo["FingerprintsType"] 502 SimilarityName = OptionsInfo["SimilarityMetric"] 503 504 if re.match("^auto$", FingerprintsType, re.I): 505 if re.match("^(MACCS166Keys|PathLength)$", FingerprintsName, re.I): 506 SpecifiedFingerprintsType = "BitVect" 507 else: 508 if re.match("^(Tanimoto|Dice)$", SimilarityName, re.I): 509 SpecifiedFingerprintsType = "IntVect" 510 else: 511 SpecifiedFingerprintsType = "BitVect" 512 elif re.match("^IntVect$", FingerprintsType, re.I): 513 SpecifiedFingerprintsType = "IntVect" 514 515 if re.match("^(MACCS166Keys|PathLength)$", FingerprintsName, re.I): 516 MiscUtil.PrintError("The fingerprints Type, %s, specified using \"--fingerprintsType\" is not allowed for fingerprints %s." % (FingerprintsType, FingerprintsName)) 517 518 # RDKit similarity functions, besides Dice and Tanimoto, are not able to handle int bit vectors... 519 if not re.match("^(Tanimoto|Dice)$", SimilarityName, re.I): 520 MiscUtil.PrintError("The fingerprints Type, %s, specified using \"--fingerprintsType\" is not allowed for similarity metric %s.\nSupported similarity metrics: Tanimoto or Dice" % (FingerprintsType, SimilarityName)) 521 elif re.match("^BitVect$", FingerprintsType, re.I): 522 SpecifiedFingerprintsType = "BitVect" 523 else: 524 MiscUtil.PrintError("The fingerprints Type, %s, is not supported." % (FingerprintsType)) 525 526 OptionsInfo["SpecifiedFingerprintsType"] = SpecifiedFingerprintsType 527 528 def ProcessSpecifiedFingerprintsParameters(): 529 """Process specified fingerprints parameters.""" 530 531 if re.match("^auto$", OptionsInfo["ParamsFingerprints"], re.I): 532 # Nothing to process... 533 return 534 535 SpecifiedFingerprintsName = OptionsInfo["SpecifiedFingerprints"] 536 537 # Parse specified fingerprints parameters... 538 ParamsFingerprints = re.sub(" ", "", OptionsInfo["ParamsFingerprints"]) 539 if not ParamsFingerprints: 540 MiscUtil.PrintError("No valid parameter name and value pairs specified using \"-p, --paramsFingerprints\" option corrresponding to fingerprints %s." % (SpecifiedFingerprintsName)) 541 542 ParamsFingerprintsWords = ParamsFingerprints.split(",") 543 if len(ParamsFingerprintsWords) % 2: 544 MiscUtil.PrintError("The number of comma delimited paramater names and values, %d, specified using \"-p, --paramsFingerprints\" option must be an even number." % (len(ParamsFingerprintsWords))) 545 546 # Setup canonical parameter names for specified fingerprints... 547 ValidParamNames = [] 548 CanonicalParamNamesMap = {} 549 for ParamName in sorted(OptionsInfo["FingerprintsParams"][SpecifiedFingerprintsName]): 550 ValidParamNames.append(ParamName) 551 CanonicalParamNamesMap[ParamName.lower()] = ParamName 552 553 # Validate and set paramater names and value... 554 for Index in range(0, len(ParamsFingerprintsWords), 2): 555 Name = ParamsFingerprintsWords[Index] 556 Value = ParamsFingerprintsWords[Index + 1] 557 558 CanonicalName = Name.lower() 559 if not CanonicalName in CanonicalParamNamesMap: 560 MiscUtil.PrintError("The parameter name, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid name. Supported parameter names: %s" % (Name, SpecifiedFingerprintsName, " ".join(ValidParamNames))) 561 562 ParamName = CanonicalParamNamesMap[CanonicalName] 563 if re.match("^UseChirality$", ParamName, re.I): 564 if not re.match("^(Yes|No|True|False)$", Value, re.I): 565 MiscUtil.PrintError("The parameter value, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid value. Supported values: Yes No True False" % (Value, SpecifiedFingerprintsName)) 566 ParamValue = False 567 if re.match("^(Yes|True)$", Value, re.I): 568 ParamValue = True 569 else: 570 ParamValue = int(Value) 571 if ParamValue <= 0: 572 MiscUtil.PrintError("The parameter value, %s, specified using \"-p, --paramsFingerprints\" option for fingerprints, %s, is not a valid value. Supported values: > 0" % (Value, SpecifiedFingerprintsName)) 573 574 # Set value... 575 OptionsInfo["FingerprintsParams"][SpecifiedFingerprintsName][ParamName] = ParamValue 576 577 def ProcessSimilarityMetricParameter(): 578 """Process specified similarity metric value.""" 579 580 SimilarityInfoMap = {} 581 CanonicalNameMap = {} 582 583 for SimilarityFunctionInfo in DataStructs.similarityFunctions: 584 Name = SimilarityFunctionInfo[0] 585 Function = SimilarityFunctionInfo[1] 586 587 SimilarityInfoMap[Name] = Function 588 CanonicalName = Name.lower() 589 CanonicalNameMap[CanonicalName] = Name 590 591 SpecifiedCanonicalName = OptionsInfo["SimilarityMetric"].lower() 592 SimilarityFunction = None 593 if SpecifiedCanonicalName in CanonicalNameMap: 594 SimilarityName = CanonicalNameMap[SpecifiedCanonicalName] 595 SimilarityFunction = SimilarityInfoMap[SimilarityName] 596 else: 597 MiscUtil.PrintError("Similarity metric name, %s, is not a valid name. " % OptionsInfo["SimilarityMetric"]) 598 599 OptionsInfo["SimilarityMetric"] = SimilarityName 600 OptionsInfo["SimilarityFunction"] = SimilarityFunction 601 602 def ProcessClusteringMethodParameter(): 603 """Process specified clustering method parameter.""" 604 605 OptionsInfo["SpecifiedHierarchicalClusteringMethod"] = "" 606 OptionsInfo["SpecifiedHierarchicalClusteringMethodID"] = "" 607 608 if re.match("^Butina$", OptionsInfo["ClusteringMethod"], re.I): 609 # Nothing to process... 610 return 611 612 # Setup a canonical cluster method name map.. 613 ClusteringMethodInfoMap = {} 614 CanonicalClusteringMethodNameMap = {} 615 for Name in sorted(rdSimDivPickers.ClusterMethod.names): 616 NameID = rdSimDivPickers.ClusterMethod.names[Name] 617 ClusteringMethodInfoMap[Name] = NameID 618 619 CanonicalName = Name.lower() 620 CanonicalClusteringMethodNameMap[CanonicalName] = Name 621 622 CanonicalName = OptionsInfo["ClusteringMethod"].lower() 623 if not CanonicalName in CanonicalClusteringMethodNameMap: 624 MiscUtil.PrintError("The clustering method, %s, specified using \"-c, --clusteringMethod\" option is not a valid name." % (OptionsInfo["ClusteringMethod"])) 625 626 SpecifiedHierarchicalClusteringMethodName = CanonicalClusteringMethodNameMap[CanonicalName] 627 OptionsInfo["SpecifiedHierarchicalClusteringMethod"] = SpecifiedHierarchicalClusteringMethodName 628 OptionsInfo["SpecifiedHierarchicalClusteringMethodID"] = ClusteringMethodInfoMap[SpecifiedHierarchicalClusteringMethodName] 629 630 def ProcessOptions(): 631 """Process and validate command line arguments and options.""" 632 633 MiscUtil.PrintInfo("Processing options...") 634 635 # Validate options... 636 ValidateOptions() 637 638 OptionsInfo["ButinaSimilarityCutoff"] = float(Options["--butinaSimilarityCutoff"]) 639 OptionsInfo["ButinaReordering"] = False 640 if re.match("^Yes$", Options["--butinaReordering"], re.I): 641 OptionsInfo["ButinaReordering"] = True 642 643 OptionsInfo["Fingerprints"] = Options["--fingerprints"] 644 OptionsInfo["FingerprintsType"] = Options["--fingerprintsType"] 645 646 OptionsInfo["ClusteringMethod"] = Options["--clusteringMethod"] 647 ProcessClusteringMethodParameter() 648 649 OptionsInfo["NumClusters"] = int(Options["--numClusters"]) 650 651 OptionsInfo["Infile"] = Options["--infile"] 652 OptionsInfo["InfileParams"] = MiscUtil.ProcessOptionInfileParameters("--infileParams", Options["--infileParams"], Options["--infile"]) 653 654 OptionsInfo["Outfile"] = Options["--outfile"] 655 OptionsInfo["OutfileParams"] = MiscUtil.ProcessOptionOutfileParameters("--outfileParams", Options["--outfileParams"], Options["--infile"], Options["--outfile"]) 656 657 OptionsInfo["Overwrite"] = Options["--overwrite"] 658 659 OptionsInfo["OutFileMode"] = Options["--outfileMode"] 660 SingleOutFileMode = True 661 if not re.match("^SingleFile$", Options["--outfileMode"], re.I): 662 SingleOutFileMode = False 663 OptionsInfo["SingleOutFileMode"] = SingleOutFileMode 664 665 FileDir, FileName, FileExt = MiscUtil.ParseFileName(Options["--outfile"]) 666 OptionsInfo["OutfileBasename"] = FileName 667 OptionsInfo["OutfileExt"] = FileExt 668 669 TextOutFileMode = False 670 TextOutFileDelim = "" 671 TextOutFileTitleLine = True 672 673 if MiscUtil.CheckFileExt(Options["--outfile"], "csv"): 674 TextOutFileMode = True 675 TextOutFileDelim = "," 676 elif MiscUtil.CheckFileExt(Options["--outfile"], "tsv txt"): 677 TextOutFileMode = True 678 TextOutFileDelim = "\t" 679 elif MiscUtil.CheckFileExt(Options["--outfile"], "smi"): 680 TextOutFileMode = True 681 TextOutFileDelim = OptionsInfo["OutfileParams"]["SMILESDelimiter"] 682 TextOutFileTitleLine = OptionsInfo["OutfileParams"]["SMILESTitleLine"] 683 684 OptionsInfo["TextOutFileMode"] = TextOutFileMode 685 OptionsInfo["TextOutFileDelim"] = TextOutFileDelim 686 OptionsInfo["TextOutFileTitleLine"] = TextOutFileTitleLine 687 688 OptionsInfo["SimilarityMetric"] = Options["--similarityMetric"] 689 ProcessSimilarityMetricParameter() 690 691 OptionsInfo["ParamsFingerprints"] = Options["--paramsFingerprints"] 692 ProcessFingerprintsParameters() 693 694 def RetrieveOptions(): 695 """Retrieve command line arguments and options.""" 696 697 # Get options... 698 global Options 699 Options = docopt(_docoptUsage_) 700 701 # Set current working directory to the specified directory... 702 WorkingDir = Options["--workingdir"] 703 if WorkingDir: 704 os.chdir(WorkingDir) 705 706 # Handle examples option... 707 if "--examples" in Options and Options["--examples"]: 708 MiscUtil.PrintInfo(MiscUtil.GetExamplesTextFromDocOptText(_docoptUsage_)) 709 sys.exit(0) 710 711 def ValidateOptions(): 712 """Validate option values.""" 713 714 MiscUtil.ValidateOptionFloatValue("-b, --butinaSimilarityCutoff", Options["--butinaSimilarityCutoff"], {">": 0.0, "<=" : 1.0}) 715 MiscUtil.ValidateOptionTextValue("--butinaReordering", Options["--butinaReordering"], "yes no") 716 717 MiscUtil.ValidateOptionTextValue("-c, --clusteringMethod", Options["--clusteringMethod"], "Butina Centroid CLink Gower McQuitty SLink UPGMA Ward") 718 MiscUtil.ValidateOptionTextValue("-f, --fingerprints", Options["--fingerprints"], "AtomPairs MACCS166Keys Morgan MorganFeatures PathLength TopologicalTorsions") 719 MiscUtil.ValidateOptionTextValue("--fingerprintsType", Options["--fingerprintsType"], "IntVect BitVect auto") 720 721 MiscUtil.ValidateOptionIntegerValue("-n, --numClusters", Options["--numClusters"], {">": 0}) 722 723 MiscUtil.ValidateOptionFilePath("-i, --infile", Options["--infile"]) 724 MiscUtil.ValidateOptionFileExt("-i, --infile", Options["--infile"], "sdf sd mol smi csv tsv txt") 725 726 MiscUtil.ValidateOptionFileExt("-o, --outfile", Options["--outfile"], "sdf sd smi csv tsv txt") 727 MiscUtil.ValidateOptionsOutputFileOverwrite("-o, --outfile", Options["--outfile"], "--overwrite", Options["--overwrite"]) 728 MiscUtil.ValidateOptionsDistinctFileNames("-i, --infile", Options["--infile"], "-o, --outfile", Options["--outfile"]) 729 730 MiscUtil.ValidateOptionTextValue("--outfileMode", Options["--outfileMode"], "SingleFile MultipleFiles") 731 732 MiscUtil.ValidateOptionTextValue("-s, --similarityMetric", Options["--similarityMetric"], "BraunBlanquet Cosine Dice Kulczynski RogotGoldberg Russel Sokal Tanimoto") 733 734 # Setup a usage string for docopt... 735 _docoptUsage_ = """ 736 RDKitClusterMolecules.py - Cluster molecules using 2D fingerprints 737 738 Usage: 739 RDKitClusterMolecules.py [--butinaSimilarityCutoff <number>] [--butinaReordering <yes or no>] 740 [--clusteringMethod <Butina, Centroid, CLink...>] [--fingerprints <MACCS166Keys, Morgan, PathLength...> ] 741 [--fingerprintsType <IntVect, BitVect, or Auto>] [--infileParams <Name,Value,...>] 742 [--numClusters <number>] [--outfileMode <SingleFile or MultipleFiles>] 743 [ --outfileParams <Name,Value,...> ] [--overwrite] [--paramsFingerprints <Name,Value,...>] 744 [--similarityMetric <Dice, Tanimoto...>] [-w <dir>] -i <infile> -o <outfile> 745 RDKitClusterMolecules.py -h | --help | -e | --examples 746 747 Description: 748 Cluster molecules based on a variety of 2D fingerprints using Butina [ Ref 136 ] or any 749 other available hierarchical clustering methodology and write them to output file(s). 750 751 The supported input file formats are: Mol (.mol), SD (.sdf, .sd), SMILES (.smi, 752 .txt, .csv, .tsv) 753 754 The supported output file formats are: SD (.sdf, .sd), SMILES (.smi), CSV/TSV 755 (.csv, .tsv, .txt) 756 757 Options: 758 -b, --butinaSimilarityCutoff <number> [default: 0.55] 759 Similarity cutoff to use during Butina clustering. The molecule pairs with 760 similarity value greater than specified value or distance less than '1 - specified 761 value' are considered neighbors. This value is only used during 'Butina' value 762 of '-c, --clusteringMethod' option and determines the number of clusters 763 during the clustering of molecules. It is ignored for all other clustering methods. 764 --butinaReordering <yes or no> [default: no] 765 Update number of neighbors for unassigned molecules after creating a new 766 cluster in order to insure that the molecule with the largest number of 767 unassigned neighbors is selected as the next cluster center. 768 -c, --clusteringMethod <Butina, Centroid, CLink...> [default: Butina] 769 Clustering method to use for clustering molecules. Supported values: 770 Butina, Centroid, CLink, Gower, McQuitty, SLink, UPGMA, Ward. 771 Butina is an unsupervised database clustering method to automatically 772 cluster small and large data sets. All other clustering methods correspond 773 to hierarchical clustering and require a priori specification of number of 774 clusters to be generated. 775 -f, --fingerprints <MACCS166Keys, Morgan, PathLength...> [default: Morgan] 776 Fingerprints to use for calculating similarity/distance between molecules. 777 Supported values: AtomPairs, MACCS166Keys, Morgan, MorganFeatures, PathLength, 778 TopologicalTorsions. The PathLength fingerprints are Daylight like fingerprints. 779 The Morgan and MorganFeature fingerprints are circular fingerprints, corresponding 780 Scitegic's Extended Connectivity Fingerprints (ECFP) and Features Connectivity 781 Fingerprints (FCFP). The values of default parameters for generating fingerprints 782 can be modified using '-p, --paramsFingerprints' option. 783 --fingerprintsType <IntVect, BitVect, or auto> [default: auto] 784 Fingerprints type to generate for calculating similarity. Supported values: 785 IntVect, BitVect, Auto. 786 787 The following default fingerprints type are automatically generated for 788 available fingerprints, based on the value of similarty metric: 789 790 AtomPairs Tanimoto|Dice: IntVect All Others: BitVect 791 MACCS166Keys All: BitVect 792 Morgan Tanimoto|Dice: IntVect All Others: BitVect 793 MorganFeatures Tanimoto|Dice: IntVect All Others: BitVect 794 PathLength All: BitVect 795 TopologicalTorsions Tanimoto|Dice: IntVect All Others: BitVect 796 797 The Dice and Tanimoto similarity functions available in RDKit are able to 798 handle fingerprints corresponding to both IntVect and BitVect. All other 799 similarity functions, however, expect BitVect fingerprints to calculate 800 pairwise similarity. Consequently, BitVect fingerprints, instead of 801 default IntVect fingerprints, are generated for AtomPairs, Morgan, 802 MorganFeatures, and TopologicalTorsions during the calculation 803 of similarity using all other similarity functions. 804 805 The IntVect fingerprints type is not available for MACCS166Keys and 806 Pathlength fingerprints. In addition, IntVect fingerprints type is only 807 valid for Tanimoto or Dice value of ' -s, --similarityMetric' option. The 808 BitVect fingerprints type is valid for all values of '' -s, --similarityMetric' 809 option. 810 -e, --examples 811 Print examples. 812 -h, --help 813 Print this help message. 814 -i, --infile <infile> 815 Input file name. 816 --infileParams <Name,Value,...> [default: auto] 817 A comma delimited list of parameter name and value pairs for reading 818 molecules from files. The supported parameter names for different file 819 formats, along with their default values, are shown below: 820 821 SD, MOL: removeHydrogens,yes,sanitize,yes,strictParsing,yes 822 SMILES: smilesColumn,1,smilesNameColumn,2,smilesDelimiter,space, 823 smilesTitleLine,auto,sanitize,yes 824 825 Possible values for smilesDelimiter: space, comma or tab. 826 -n, --numClusters <number> [default: 10] 827 Number of clusters to generate during hierarchical clustering. This option is 828 ignored for 'Butina' value of '-c, --clusteringMethod' option. 829 -o, --outfile <outfile> 830 Output file name. 831 --outfileMode <SingleFile or MultipleFiles> [default: SingleFile] 832 Write out a single file containing molecule clusters or generate an individual file 833 for each cluster. Possible values: SingleFile or MultipleFiles. The molecules are 834 grouped for each cluster before they are written to output file(s) along with 835 appropriate cluster numbers. The cluster number is also appended to output 836 file names during generation of multiple output files. 837 --outfileParams <Name,Value,...> [default: auto] 838 A comma delimited list of parameter name and value pairs for writing 839 molecules to files. The supported parameter names for different file 840 formats, along with their default values, are shown below: 841 842 SD: compute2DCoords,auto,kekulize,yes,forceV3000,no 843 SMILES: smilesKekulize,no,smilesDelimiter,space, smilesIsomeric,yes, 844 smilesTitleLine,yes 845 846 Default value for compute2DCoords: yes for SMILES input file; no for all other 847 file types. The kekulize and smilesIsomeric parameters are also used during 848 generation of SMILES strings for CSV/TSV files. 849 --overwrite 850 Overwrite existing files. 851 -p, --paramsFingerprints <Name,Value,...> [default: auto] 852 Parameter values to use for generating fingerprints. The default values 853 are dependent on the value of '-f, --fingerprints' option. In general, it is a 854 comma delimited list of parameter name and value pairs for the name of 855 fingerprints specified using '-f, --fingerprints' option. The supported 856 parameter names along with their default values for valid fingerprints 857 names are shown below: 858 859 AtomPairs: minLength,1 ,maxLength,30, useChirality,No, 860 fpSize, 2048, bitsPerHash,4 861 Morgan: radius,2, useChirality,No, fpSize, 2048 862 MorganFeatures: radius,2, useChirality,No, fpSize, 2048 863 PathLength: minPath,1, maxPath,7, fpSize, 2048, bitsPerHash,2 864 TopologicalTorsions: useChirality,No, fpSize, 2048, bitsPerHash,4 865 866 The fpSize and bitsPerHash are only used for BitVect fingerprints type 867 specified using '--fingerprintsType' option. 868 -s, --similarityMetric <Dice, Tanimoto...> [default: Tanimoto] 869 Similarity metric to use for calculating similarity/distance between molecules. 870 Possible values: BraunBlanquet, Cosine, Dice, Kulczynski, RogotGoldberg, 871 Russel, Sokal, Tanimoto. 872 -w, --workingdir <dir> 873 Location of working directory which defaults to the current directory. 874 875 Examples: 876 To cluster molecules using Butina methodology at a similarity cutoff of 0.55 877 with automatic determination of number of clusters, Tanimoto similarity 878 metric corresponding to Morgan fingerprints with radius of 2, and write out 879 a single SMILES file containing clustered molecules along with cluster number 880 for each molecule, type: 881 882 % RDKitClusterMolecules.py -i Sample.smi -o SampleOut.smi 883 884 To cluster molecules using Butina methodology at a similarity cutoff of 0.55 885 with automatic determination of number of clusters, Tanimoto similarity 886 metric corresponding to Morgan fingerprints with radius of 2 and type 887 BitVect, fingerprint BitVect size of 4096, and write out a single SMILES file 888 containing clustered molecules along with cluster number for each molecule, 889 type: 890 891 % RDKitClusterMolecules.py -f Morgan --fingerprintsType BitVect 892 -p "fpSize,4096" -s Tanimoto -i Sample.smi -o SampleOut.smi 893 894 To cluster molecules using Butina methodology at similarity cutoff of 0.45 895 with automatic determination of number of clusters, Dice similarity metric 896 corresponding to Morgan fingerprints with radius of 2, and write out multiple 897 SD files containing clustered molecules for each cluster, type: 898 899 % RDKitClusterMolecules.py -b 0.45 -s Dice --outfileMode MultipleFiles 900 -i Sample.smi -o SampleOut.sdf 901 902 To cluster molecules using Ward hierarchical methodology to generate 15 903 clusters, Dice similarity metric corresponding to Pathlength fingerprints with 904 path length between 1 and 7, and write out a single TSV file for clustered 905 molecules along with cluster numner for each molecule, type: 906 907 % RDKitClusterMolecules.py -c Ward -f PathLength -n 15 908 -p 'minPath,1, maxPath,7' -i Sample.sdf -o SampleOut.tsv 909 910 To cluster molecules using Centroid hierarchical methodology to generate 5 911 clusters, Dice similarity metric corresponding to MACCS166Keys fingerprints 912 for molecules in a SMILES CSV file, SMILES strings in column 1, name in 913 column 2, and write out a single SD file for clustered molecules along with 914 cluster numner for each molecule, type: 915 916 % RDKitClusterMolecules.py -c Centroid -f MACCS166Keys --infileParams 917 "smilesDelimiter,comma,smilesTitleLine,yes,smilesColumn,1, 918 smilesNameColumn,2" --outfileParams "compute2DCoords,yes" 919 -i SampleSMILES.csv -o SampleOut.sdf 920 921 Author: 922 Manish Sud(msud@san.rr.com) 923 924 See also: 925 RDKitConvertFileFormat.py, RDKitPickDiverseMolecules.py, RDKitSearchFunctionalGroups.py, 926 RDKitSearchSMARTS.py 927 928 Copyright: 929 Copyright (C) 2025 Manish Sud. All rights reserved. 930 931 The functionality available in this script is implemented using RDKit, an 932 open source toolkit for cheminformatics developed by Greg Landrum. 933 934 This file is part of MayaChemTools. 935 936 MayaChemTools is free software; you can redistribute it and/or modify it under 937 the terms of the GNU Lesser General Public License as published by the Free 938 Software Foundation; either version 3 of the License, or (at your option) any 939 later version. 940 941 """ 942 943 if __name__ == "__main__": 944 main()