1 #!/usr/bin/perl -w 2 # 3 # File: PathLengthFingerprints.pl 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use FindBin; use lib "$FindBin::Bin/../lib"; 28 use Getopt::Long; 29 use File::Basename; 30 use Text::ParseWords; 31 use Benchmark; 32 use FileUtil; 33 use TextUtil; 34 use SDFileUtil; 35 use MoleculeFileIO; 36 use FileIO::FingerprintsSDFileIO; 37 use FileIO::FingerprintsTextFileIO; 38 use FileIO::FingerprintsFPFileIO; 39 use AtomTypes::AtomicInvariantsAtomTypes; 40 use AtomTypes::FunctionalClassAtomTypes; 41 use Fingerprints::PathLengthFingerprints; 42 43 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 44 45 # Autoflush STDOUT 46 $| = 1; 47 48 # Starting message... 49 $ScriptName = basename($0); 50 print "\n$ScriptName: Starting...\n\n"; 51 $StartTime = new Benchmark; 52 53 # Get the options and setup script... 54 SetupScriptUsage(); 55 if ($Options{help} || @ARGV < 1) { 56 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 57 } 58 59 my(@SDFilesList); 60 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd"); 61 62 # Process options... 63 print "Processing options...\n"; 64 my(%OptionsInfo); 65 ProcessOptions(); 66 67 # Setup information about input files... 68 print "Checking input SD file(s)...\n"; 69 my(%SDFilesInfo); 70 RetrieveSDFilesInfo(); 71 72 # Process input files.. 73 my($FileIndex); 74 if (@SDFilesList > 1) { 75 print "\nProcessing SD files...\n"; 76 } 77 for $FileIndex (0 .. $#SDFilesList) { 78 if ($SDFilesInfo{FileOkay}[$FileIndex]) { 79 print "\nProcessing file $SDFilesList[$FileIndex]...\n"; 80 GeneratePathLengthFingerprints($FileIndex); 81 } 82 } 83 print "\n$ScriptName:Done...\n\n"; 84 85 $EndTime = new Benchmark; 86 $TotalTime = timediff ($EndTime, $StartTime); 87 print "Total time: ", timestr($TotalTime), "\n"; 88 89 ############################################################################### 90 91 # Generate fingerprints for a SD file... 92 # 93 sub GeneratePathLengthFingerprints { 94 my($FileIndex) = @_; 95 my($CmpdCount, $IgnoredCmpdCount, $SDFile, $MoleculeFileIO, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO); 96 97 $SDFile = $SDFilesList[$FileIndex]; 98 99 # Setup output files... 100 # 101 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = SetupAndOpenOutputFiles($FileIndex); 102 103 $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile); 104 $MoleculeFileIO->Open(); 105 106 $CmpdCount = 0; 107 $IgnoredCmpdCount = 0; 108 109 COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) { 110 $CmpdCount++; 111 112 # Filter compound data before calculating fingerprints... 113 if ($OptionsInfo{Filter}) { 114 if (CheckAndFilterCompound($CmpdCount, $Molecule)) { 115 $IgnoredCmpdCount++; 116 next COMPOUND; 117 } 118 } 119 120 $PathLengthFingerprints = GenerateMoleculeFingerprints($Molecule); 121 if (!$PathLengthFingerprints) { 122 $IgnoredCmpdCount++; 123 ProcessIgnoredCompound('FingerprintsGenerationFailed', $CmpdCount, $Molecule); 124 next COMPOUND; 125 } 126 127 WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO); 128 } 129 $MoleculeFileIO->Close(); 130 131 if ($NewFPSDFileIO) { 132 $NewFPSDFileIO->Close(); 133 } 134 if ($NewFPTextFileIO) { 135 $NewFPTextFileIO->Close(); 136 } 137 if ($NewFPFileIO) { 138 $NewFPFileIO->Close(); 139 } 140 141 WriteFingerprintsGenerationSummaryStatistics($CmpdCount, $IgnoredCmpdCount); 142 } 143 144 # Process compound being ignored due to problems in fingerprints geneation... 145 # 146 sub ProcessIgnoredCompound { 147 my($Mode, $CmpdCount, $Molecule) = @_; 148 my($CmpdID, $DataFieldLabelAndValuesRef); 149 150 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); 151 $CmpdID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 152 153 MODE: { 154 if ($Mode =~ /^ContainsNonElementalData$/i) { 155 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n"; 156 next MODE; 157 } 158 159 if ($Mode =~ /^ContainsNoElementalData$/i) { 160 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n"; 161 next MODE; 162 } 163 164 if ($Mode =~ /^FingerprintsGenerationFailed$/i) { 165 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n"; 166 next MODE; 167 } 168 warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Fingerprints generation didn't succeed...\n\n"; 169 } 170 } 171 172 # Check and filter compounds.... 173 # 174 sub CheckAndFilterCompound { 175 my($CmpdCount, $Molecule) = @_; 176 my($ElementCount, $NonElementCount); 177 178 ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements(); 179 180 if ($NonElementCount) { 181 ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule); 182 return 1; 183 } 184 185 if (!$ElementCount) { 186 ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule); 187 return 1; 188 } 189 190 return 0; 191 } 192 193 # Write out compounds fingerprints generation summary statistics... 194 # 195 sub WriteFingerprintsGenerationSummaryStatistics { 196 my($CmpdCount, $IgnoredCmpdCount) = @_; 197 my($ProcessedCmpdCount); 198 199 $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount; 200 201 print "\nNumber of compounds: $CmpdCount\n"; 202 print "Number of compounds processed successfully during fingerprints generation: $ProcessedCmpdCount\n"; 203 print "Number of compounds ignored during fingerprints generation: $IgnoredCmpdCount\n"; 204 } 205 206 # Open output files... 207 # 208 sub SetupAndOpenOutputFiles { 209 my($FileIndex) = @_; 210 my($NewFPSDFile, $NewFPFile, $NewFPTextFile, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO, %FingerprintsFileIOParams); 211 212 ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = (undef) x 3; 213 214 # Setup common parameters for fingerprints file IO objects... 215 # 216 %FingerprintsFileIOParams = (); 217 if ($OptionsInfo{Mode} =~ /^PathLengthBits$/i) { 218 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsBitVectorString', 'BitStringFormat' => $OptionsInfo{BitStringFormat}, 'BitsOrder' => $OptionsInfo{BitsOrder}); 219 } 220 elsif ($OptionsInfo{Mode} =~ /^PathLengthCount$/i) { 221 %FingerprintsFileIOParams = ('Mode' => 'Write', 'Overwrite' => $OptionsInfo{OverwriteFiles}, 'FingerprintsStringMode' => 'FingerprintsVectorString', 'VectorStringFormat' => $OptionsInfo{VectorStringFormat}); 222 } 223 224 if ($OptionsInfo{SDOutput}) { 225 $NewFPSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex]; 226 print "Generating SD file $NewFPSDFile...\n"; 227 $NewFPSDFileIO = new FileIO::FingerprintsSDFileIO('Name' => $NewFPSDFile, %FingerprintsFileIOParams, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsLabel}); 228 $NewFPSDFileIO->Open(); 229 } 230 231 if ($OptionsInfo{FPOutput}) { 232 $NewFPFile = $SDFilesInfo{FPOutFileNames}[$FileIndex]; 233 print "Generating FP file $NewFPFile...\n"; 234 $NewFPFileIO = new FileIO::FingerprintsFPFileIO('Name' => $NewFPFile, %FingerprintsFileIOParams); 235 $NewFPFileIO->Open(); 236 } 237 238 if ($OptionsInfo{TextOutput}) { 239 my($ColLabelsRef); 240 241 $NewFPTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex]; 242 $ColLabelsRef = SetupFPTextFileCoulmnLabels($FileIndex); 243 244 print "Generating text file $NewFPTextFile...\n"; 245 $NewFPTextFileIO = new FileIO::FingerprintsTextFileIO('Name' => $NewFPTextFile, %FingerprintsFileIOParams, 'DataColLabels' => $ColLabelsRef, 'OutDelim' => $OptionsInfo{OutDelim}, 'OutQuote' => $OptionsInfo{OutQuote}); 246 $NewFPTextFileIO->Open(); 247 } 248 249 return ($NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO); 250 } 251 252 # Write fingerpritns and other data to appropriate output files... 253 # 254 sub WriteDataToOutputFiles { 255 my($FileIndex, $CmpdCount, $Molecule, $PathLengthFingerprints, $NewFPSDFileIO, $NewFPTextFileIO, $NewFPFileIO) = @_; 256 my($DataFieldLabelAndValuesRef); 257 258 $DataFieldLabelAndValuesRef = undef; 259 if ($NewFPTextFileIO || $NewFPFileIO) { 260 $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues(); 261 } 262 263 if ($NewFPSDFileIO) { 264 my($CmpdString); 265 266 $CmpdString = $Molecule->GetInputMoleculeString(); 267 $NewFPSDFileIO->WriteFingerprints($PathLengthFingerprints, $CmpdString); 268 } 269 270 if ($NewFPTextFileIO) { 271 my($ColValuesRef); 272 273 $ColValuesRef = SetupFPTextFileCoulmnValues($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 274 $NewFPTextFileIO->WriteFingerprints($PathLengthFingerprints, $ColValuesRef); 275 } 276 277 if ($NewFPFileIO) { 278 my($CompoundID); 279 280 $CompoundID = SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 281 $NewFPFileIO->WriteFingerprints($PathLengthFingerprints, $CompoundID); 282 } 283 } 284 285 # Generate approriate column labels for FPText output file... 286 # 287 sub SetupFPTextFileCoulmnLabels { 288 my($FileIndex) = @_; 289 my($Line, @ColLabels); 290 291 @ColLabels = (); 292 if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) { 293 push @ColLabels, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]}; 294 } 295 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) { 296 push @ColLabels, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]}; 297 } 298 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) { 299 push @ColLabels, @{$OptionsInfo{SpecifiedDataFields}}; 300 } 301 elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) { 302 push @ColLabels, $OptionsInfo{CompoundIDLabel}; 303 } 304 # Add fingerprints label... 305 push @ColLabels, $OptionsInfo{FingerprintsLabel}; 306 307 return \@ColLabels; 308 } 309 310 # Generate column values FPText output file.. 311 # 312 sub SetupFPTextFileCoulmnValues { 313 my($FileIndex, $CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_; 314 my(@ColValues); 315 316 @ColValues = (); 317 if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) { 318 push @ColValues, SetupCmpdIDForOutputFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef); 319 } 320 elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) { 321 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]}; 322 } 323 elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) { 324 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]}; 325 } 326 elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) { 327 @ColValues = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}}; 328 } 329 330 return \@ColValues; 331 } 332 333 # Generate compound ID for FP and FPText output files.. 334 # 335 sub SetupCmpdIDForOutputFiles { 336 my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_; 337 my($CmpdID); 338 339 $CmpdID = ''; 340 if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) { 341 my($MolName); 342 $MolName = $Molecule->GetName(); 343 $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}"; 344 } 345 elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) { 346 $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}"; 347 } 348 elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) { 349 my($SpecifiedDataField); 350 $SpecifiedDataField = $OptionsInfo{CompoundID}; 351 $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : ''; 352 } 353 elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) { 354 $CmpdID = $Molecule->GetName(); 355 } 356 return $CmpdID; 357 } 358 359 # Generate fingerprints for molecule... 360 # 361 sub GenerateMoleculeFingerprints { 362 my($Molecule) = @_; 363 my($PathLengthFingerprints); 364 365 if ($OptionsInfo{KeepLargestComponent}) { 366 $Molecule->KeepLargestComponent(); 367 } 368 if ($OptionsInfo{IgnoreHydrogens}) { 369 $Molecule->DeleteHydrogens(); 370 } 371 372 if ($OptionsInfo{DetectAromaticity}) { 373 if (!$Molecule->DetectRings()) { 374 return undef; 375 } 376 $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel}); 377 $Molecule->DetectAromaticity(); 378 } 379 380 $PathLengthFingerprints = undef; 381 if ($OptionsInfo{Mode} =~ /^PathLengthBits$/i) { 382 $PathLengthFingerprints = GeneratePathLengthBitsFingerprints($Molecule); 383 } 384 elsif ($OptionsInfo{Mode} =~ /^PathLengthCount$/i) { 385 $PathLengthFingerprints = GeneratePathLengthCountFingerprints($Molecule); 386 } 387 else { 388 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: PathLengthBits or PathLengthCount\n"; 389 } 390 391 return $PathLengthFingerprints; 392 } 393 394 # Generate pathlength bits finerprints for molecule... 395 # 396 sub GeneratePathLengthBitsFingerprints { 397 my($Molecule) = @_; 398 my($PathLengthFingerprints); 399 400 $PathLengthFingerprints = new Fingerprints::PathLengthFingerprints('Molecule' => $Molecule, 'Type' => 'PathLengthBits', 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType}, 'NumOfBitsToSetPerPath' => $OptionsInfo{NumOfBitsToSetPerPath}, 'Size' => $OptionsInfo{Size}, 'MinLength' => $OptionsInfo{MinPathLength}, 'MaxLength' => $OptionsInfo{MaxPathLength}, 'AllowRings' => $OptionsInfo{AllowRings}, 'AllowSharedBonds' => $OptionsInfo{AllowSharedBonds}, 'UseBondSymbols' => $OptionsInfo{UseBondSymbols}, 'UseUniquePaths' => $OptionsInfo{UseUniquePaths}, 'UsePerlCoreRandom' => $OptionsInfo{UsePerlCoreRandom}); 401 402 # Set atom identifier type... 403 SetAtomIdentifierTypeValuesToUse($PathLengthFingerprints); 404 405 # Generate fingerprints... 406 $PathLengthFingerprints->GenerateFingerprints(); 407 408 # Make sure fingerprints generation is successful... 409 if (!$PathLengthFingerprints->IsFingerprintsGenerationSuccessful()) { 410 return undef; 411 } 412 413 if ($OptionsInfo{Fold}) { 414 my($CheckSizeValue) = 0; 415 $PathLengthFingerprints->FoldFingerprintsBySize($OptionsInfo{FoldedSize}, $CheckSizeValue); 416 } 417 418 return $PathLengthFingerprints; 419 } 420 421 # Generate pathlength count finerprints for molecule... 422 # 423 sub GeneratePathLengthCountFingerprints { 424 my($Molecule) = @_; 425 my($PathLengthFingerprints); 426 427 $PathLengthFingerprints = new Fingerprints::PathLengthFingerprints('Molecule' => $Molecule, 'Type' => 'PathLengthCount', 'AtomIdentifierType' => $OptionsInfo{AtomIdentifierType}, 'MinLength' => $OptionsInfo{MinPathLength}, 'MaxLength' => $OptionsInfo{MaxPathLength}, 'AllowRings' => $OptionsInfo{AllowRings}, 'AllowSharedBonds' => $OptionsInfo{AllowSharedBonds}, 'UseBondSymbols' => $OptionsInfo{UseBondSymbols}, 'UseUniquePaths' => $OptionsInfo{UseUniquePaths}); 428 429 # Set atom identifier type... 430 SetAtomIdentifierTypeValuesToUse($PathLengthFingerprints); 431 432 # Generate fingerprints... 433 $PathLengthFingerprints->GenerateFingerprints(); 434 435 # Make sure fingerprints generation is successful... 436 if (!$PathLengthFingerprints->IsFingerprintsGenerationSuccessful()) { 437 return undef; 438 } 439 return $PathLengthFingerprints; 440 } 441 442 # Set atom identifier type to use for generating path strings... 443 # 444 sub SetAtomIdentifierTypeValuesToUse { 445 my($PathLengthFingerprints) = @_; 446 447 if ($OptionsInfo{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) { 448 $PathLengthFingerprints->SetAtomicInvariantsToUse(\@{$OptionsInfo{AtomicInvariantsToUse}}); 449 } 450 elsif ($OptionsInfo{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) { 451 $PathLengthFingerprints->SetFunctionalClassesToUse(\@{$OptionsInfo{FunctionalClassesToUse}}); 452 } 453 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 454 # Nothing to do for now... 455 } 456 else { 457 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; 458 } 459 } 460 461 # Retrieve information about SD files... 462 # 463 sub RetrieveSDFilesInfo { 464 my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $FPOutFileExt, $NewSDFileName, $NewFPFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef); 465 466 %SDFilesInfo = (); 467 @{$SDFilesInfo{FileOkay}} = (); 468 @{$SDFilesInfo{OutFileRoot}} = (); 469 @{$SDFilesInfo{SDOutFileNames}} = (); 470 @{$SDFilesInfo{FPOutFileNames}} = (); 471 @{$SDFilesInfo{TextOutFileNames}} = (); 472 @{$SDFilesInfo{AllDataFieldsRef}} = (); 473 @{$SDFilesInfo{CommonDataFieldsRef}} = (); 474 475 $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0; 476 $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0; 477 478 FILELIST: for $Index (0 .. $#SDFilesList) { 479 $SDFile = $SDFilesList[$Index]; 480 481 $SDFilesInfo{FileOkay}[$Index] = 0; 482 $SDFilesInfo{OutFileRoot}[$Index] = ''; 483 $SDFilesInfo{SDOutFileNames}[$Index] = ''; 484 $SDFilesInfo{FPOutFileNames}[$Index] = ''; 485 $SDFilesInfo{TextOutFileNames}[$Index] = ''; 486 487 $SDFile = $SDFilesList[$Index]; 488 if (!(-e $SDFile)) { 489 warn "Warning: Ignoring file $SDFile: It doesn't exist\n"; 490 next FILELIST; 491 } 492 if (!CheckFileType($SDFile, "sd sdf")) { 493 warn "Warning: Ignoring file $SDFile: It's not a SD file\n"; 494 next FILELIST; 495 } 496 497 if ($CheckDataField) { 498 # Make sure data field exists in SD file.. 499 my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues); 500 501 @CmpdLines = (); 502 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n"; 503 $CmpdString = ReadCmpdString(\*SDFILE); 504 close SDFILE; 505 @CmpdLines = split "\n", $CmpdString; 506 %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines); 507 $SpecifiedDataField = $OptionsInfo{CompoundID}; 508 if (!exists $DataFieldValues{$SpecifiedDataField}) { 509 warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n"; 510 next FILELIST; 511 } 512 } 513 514 $AllDataFieldsRef = ''; 515 $CommonDataFieldsRef = ''; 516 if ($CollectDataFields) { 517 my($CmpdCount); 518 open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n"; 519 ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE); 520 close SDFILE; 521 } 522 523 # Setup output file names... 524 $FileDir = ""; $FileName = ""; $FileExt = ""; 525 ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile); 526 527 $TextOutFileExt = "csv"; 528 if ($Options{outdelim} =~ /^tab$/i) { 529 $TextOutFileExt = "tsv"; 530 } 531 $SDOutFileExt = $FileExt; 532 $FPOutFileExt = "fpf"; 533 534 if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) { 535 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); 536 if ($RootFileName && $RootFileExt) { 537 $FileName = $RootFileName; 538 } 539 else { 540 $FileName = $OptionsInfo{OutFileRoot}; 541 } 542 $OutFileRoot = $FileName; 543 } 544 else { 545 $OutFileRoot = "${FileName}PathLengthFP"; 546 } 547 548 $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}"; 549 $NewFPFileName = "${OutFileRoot}.${FPOutFileExt}"; 550 $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}"; 551 552 if ($OptionsInfo{SDOutput}) { 553 if ($SDFile =~ /$NewSDFileName/i) { 554 warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n"; 555 print "Specify a different name using \"-r --root\" option or use default name.\n"; 556 next FILELIST; 557 } 558 } 559 560 if (!$OptionsInfo{OverwriteFiles}) { 561 # Check SD, FP and text outout files... 562 if ($OptionsInfo{SDOutput}) { 563 if (-e $NewSDFileName) { 564 warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n"; 565 next FILELIST; 566 } 567 } 568 if ($OptionsInfo{FPOutput}) { 569 if (-e $NewFPFileName) { 570 warn "Warning: Ignoring file $SDFile: The file $NewFPFileName already exists\n"; 571 next FILELIST; 572 } 573 } 574 if ($OptionsInfo{TextOutput}) { 575 if (-e $NewTextFileName) { 576 warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n"; 577 next FILELIST; 578 } 579 } 580 } 581 582 $SDFilesInfo{FileOkay}[$Index] = 1; 583 584 $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; 585 $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName; 586 $SDFilesInfo{FPOutFileNames}[$Index] = $NewFPFileName; 587 $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName; 588 589 $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef; 590 $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef; 591 } 592 } 593 594 # Process option values... 595 sub ProcessOptions { 596 %OptionsInfo = (); 597 598 $OptionsInfo{Mode} = $Options{mode}; 599 $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel}; 600 $OptionsInfo{PathMode} = $Options{pathmode}; 601 602 ProcessAtomIdentifierTypeOptions(); 603 604 $OptionsInfo{BitsOrder} = $Options{bitsorder}; 605 $OptionsInfo{BitStringFormat} = $Options{bitstringformat}; 606 607 $OptionsInfo{CompoundIDMode} = $Options{compoundidmode}; 608 $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel}; 609 $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode}; 610 611 my(@SpecifiedDataFields); 612 @SpecifiedDataFields = (); 613 614 @{$OptionsInfo{SpecifiedDataFields}} = (); 615 $OptionsInfo{CompoundID} = ''; 616 617 if ($Options{datafieldsmode} =~ /^CompoundID$/i) { 618 if ($Options{compoundidmode} =~ /^DataField$/i) { 619 if (!$Options{compoundid}) { 620 die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n"; 621 } 622 $OptionsInfo{CompoundID} = $Options{compoundid}; 623 } 624 elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) { 625 $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd'; 626 } 627 } 628 elsif ($Options{datafieldsmode} =~ /^Specify$/i) { 629 if (!$Options{datafields}) { 630 die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n"; 631 } 632 @SpecifiedDataFields = split /\,/, $Options{datafields}; 633 push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields; 634 } 635 636 if ($Options{atomidentifiertype} !~ /^AtomicInvariantsAtomTypes$/i) { 637 if ($Options{detectaromaticity} =~ /^No$/i) { 638 die "Error: The value specified, $Options{detectaromaticity}, for option \"--DetectAromaticity\" is not valid. No value is only allowed during AtomicInvariantsAtomTypes value for \"-a, --AtomIdentifierType\" \n"; 639 } 640 } 641 $OptionsInfo{DetectAromaticity} = ($Options{detectaromaticity} =~ /^Yes$/i) ? 1 : 0; 642 643 $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0; 644 645 $OptionsInfo{FingerprintsLabel} = $Options{fingerprintslabel} ? $Options{fingerprintslabel} : 'PathLengthFingerprints'; 646 647 my($Size, $MinSize, $MaxSize); 648 $MinSize = 32; 649 $MaxSize = 2**32; 650 $Size = $Options{size}; 651 if (!(IsPositiveInteger($Size) && $Size >= $MinSize && $Size <= $MaxSize && IsNumberPowerOfNumber($Size, 2))) { 652 die "Error: Invalid size value, $Size, for \"-s, --size\" option. Allowed values: power of 2, >= minimum size of $MinSize, and <= maximum size of $MaxSize.\n"; 653 } 654 $OptionsInfo{Size} = $Size; 655 656 $OptionsInfo{Fold} = ($Options{fold} =~ /^Yes$/i) ? 1 : 0; 657 my($FoldedSize); 658 $FoldedSize = $Options{foldedsize}; 659 if ($Options{fold} =~ /^Yes$/i) { 660 if (!(IsPositiveInteger($FoldedSize) && $FoldedSize < $Size && IsNumberPowerOfNumber($FoldedSize, 2))) { 661 die "Error: Invalid folded size value, $FoldedSize, for \"--FoldedSize\" option. Allowed values: power of 2, >= minimum size of $MinSize, and < size value of $Size.\n"; 662 } 663 } 664 $OptionsInfo{FoldedSize} = $FoldedSize; 665 666 $OptionsInfo{IgnoreHydrogens} = ($Options{ignorehydrogens} =~ /^Yes$/i) ? 1 : 0; 667 $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0; 668 669 my($MinPathLength, $MaxPathLength); 670 $MinPathLength = $Options{minpathlength}; 671 $MaxPathLength = $Options{maxpathlength}; 672 if (!IsPositiveInteger($MinPathLength)) { 673 die "Error: Invalid path length value, $MinPathLength, for \"--MinPathLength\" option. Allowed values: > 0\n"; 674 } 675 if (!IsPositiveInteger($MaxPathLength)) { 676 die "Error: Invalid path length value, $MaxPathLength, for \"--MinPathLength\" option. Allowed values: > 0\n"; 677 } 678 if ($MinPathLength >= $MaxPathLength) { 679 die "Error: Invalid minimum and maximum path length values, $MinPathLength and $MaxPathLength, for \"--MinPathLength\" and \"--MaxPathLength\"options. Allowed values: minimum path length value must be smaller than maximum path length value.\n"; 680 } 681 $OptionsInfo{MinPathLength} = $MinPathLength; 682 $OptionsInfo{MaxPathLength} = $MaxPathLength; 683 684 my($NumOfBitsToSetPerPath); 685 $NumOfBitsToSetPerPath = $Options{numofbitstosetperpath}; 686 if (!IsPositiveInteger($MaxPathLength)) { 687 die "Error: Invalid value, $NumOfBitsToSetPerPath, for \"-n, --NumOfBitsToSetPerPath\" option. Allowed values: > 0\n"; 688 } 689 if ($NumOfBitsToSetPerPath >= $Size) { 690 die "Error: Invalid value, $NumOfBitsToSetPerPath, for \"-n, --NumOfBitsToSetPerPath\" option. Allowed values: It must be less than the size, $Size, of the fingerprint bit-string.\n"; 691 } 692 $OptionsInfo{NumOfBitsToSetPerPath} = $NumOfBitsToSetPerPath; 693 694 $OptionsInfo{Output} = $Options{output}; 695 $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|All)$/i) ? 1 : 0; 696 $OptionsInfo{FPOutput} = ($Options{output} =~ /^(FP|All)$/i) ? 1 : 0; 697 $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|All)$/i) ? 1 : 0; 698 699 $OptionsInfo{OutDelim} = $Options{outdelim}; 700 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0; 701 702 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; 703 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; 704 705 $OptionsInfo{UseBondSymbols} = ($Options{usebondsymbols} =~ /^Yes$/i) ? 1 : 0; 706 707 $OptionsInfo{UsePerlCoreRandom} = ($Options{useperlcorerandom} =~ /^Yes$/i) ? 1 : 0; 708 709 $OptionsInfo{UseUniquePaths} = ($Options{useuniquepaths} =~ /^Yes$/i) ? 1 : 0; 710 711 $OptionsInfo{VectorStringFormat} = $Options{vectorstringformat}; 712 713 # Setup parameters used during generation of fingerprints by PathLengthFingerprints class... 714 my($AllowRings, $AllowSharedBonds); 715 $AllowRings = 1; 716 $AllowSharedBonds = 1; 717 MODE: { 718 if ($Options{pathmode} =~ /^AtomPathsWithoutRings$/i) { $AllowSharedBonds = 0; $AllowRings = 0; last MODE;} 719 if ($Options{pathmode} =~ /^AtomPathsWithRings$/i) { $AllowSharedBonds = 0; $AllowRings = 1; last MODE;} 720 if ($Options{pathmode} =~ /^AllAtomPathsWithoutRings$/i) { $AllowSharedBonds = 1; $AllowRings = 0; last MODE;} 721 if ($Options{pathmode} =~ /^AllAtomPathsWithRings$/i) { $AllowSharedBonds = 1; $AllowRings = 1; last MODE;} 722 die "Error: ProcessOptions: mode value, $Options{pathmode}, is not supported.\n"; 723 } 724 $OptionsInfo{AllowRings} = $AllowRings; 725 $OptionsInfo{AllowSharedBonds} = $AllowSharedBonds; 726 } 727 728 # Process atom identifier type and related options... 729 # 730 sub ProcessAtomIdentifierTypeOptions { 731 732 $OptionsInfo{AtomIdentifierType} = $Options{atomidentifiertype}; 733 734 if ($Options{atomidentifiertype} =~ /^AtomicInvariantsAtomTypes$/i) { 735 ProcessAtomicInvariantsToUseOption(); 736 } 737 elsif ($Options{atomidentifiertype} =~ /^FunctionalClassAtomTypes$/i) { 738 ProcessFunctionalClassesToUse(); 739 } 740 elsif ($OptionsInfo{AtomIdentifierType} =~ /^(DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 741 # Nothing to do for now... 742 } 743 else { 744 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; 745 } 746 } 747 748 # Process specified atomic invariants to use... 749 # 750 sub ProcessAtomicInvariantsToUseOption { 751 my($AtomicInvariant, $AtomSymbolSpecified, @AtomicInvariantsWords); 752 753 @{$OptionsInfo{AtomicInvariantsToUse}} = (); 754 if (IsEmpty($Options{atomicinvariantstouse})) { 755 die "Error: Atomic invariants value specified using \"--AtomicInvariantsToUse\" option is empty\n"; 756 } 757 $AtomSymbolSpecified = 0; 758 @AtomicInvariantsWords = split /\,/, $Options{atomicinvariantstouse}; 759 for $AtomicInvariant (@AtomicInvariantsWords) { 760 if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($AtomicInvariant)) { 761 die "Error: Atomic invariant specified, $AtomicInvariant, using \"--AtomicInvariantsToUse\" option is not valid...\n "; 762 } 763 if ($AtomicInvariant =~ /^(AS|AtomSymbol)$/i) { 764 $AtomSymbolSpecified = 1; 765 } 766 push @{$OptionsInfo{AtomicInvariantsToUse}}, $AtomicInvariant; 767 } 768 if (!$AtomSymbolSpecified) { 769 die "Error: Atomic invariant, AS or AtomSymbol, must be specified as using \"--AtomicInvariantsToUse\" option...\n "; 770 } 771 } 772 773 # Process specified functional classes invariants to use... 774 # 775 sub ProcessFunctionalClassesToUse { 776 my($FunctionalClass, @FunctionalClassesToUseWords); 777 778 @{$OptionsInfo{FunctionalClassesToUse}} = (); 779 if (IsEmpty($Options{functionalclassestouse})) { 780 die "Error: Functional classes value specified using \"--FunctionalClassesToUse\" option is empty\n"; 781 } 782 @FunctionalClassesToUseWords = split /\,/, $Options{functionalclassestouse}; 783 for $FunctionalClass (@FunctionalClassesToUseWords) { 784 if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($FunctionalClass)) { 785 die "Error: Functional class specified, $FunctionalClass, using \"--FunctionalClassesToUse\" option is not valid...\n "; 786 } 787 push @{$OptionsInfo{FunctionalClassesToUse}}, $FunctionalClass; 788 } 789 } 790 791 # Setup script usage and retrieve command line arguments specified using various options... 792 sub SetupScriptUsage { 793 794 # Retrieve all the options... 795 %Options = (); 796 797 $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel'; 798 799 $Options{atomidentifiertype} = 'AtomicInvariantsAtomTypes'; 800 $Options{atomicinvariantstouse} = 'AS'; 801 802 $Options{functionalclassestouse} = 'HBD,HBA,PI,NI,Ar,Hal'; 803 804 $Options{bitsorder} = 'Ascending'; 805 $Options{bitstringformat} = 'HexadecimalString'; 806 807 $Options{compoundidmode} = 'LabelPrefix'; 808 $Options{compoundidlabel} = 'CompoundID'; 809 $Options{datafieldsmode} = 'CompoundID'; 810 $Options{detectaromaticity} = 'Yes'; 811 812 $Options{filter} = 'Yes'; 813 814 $Options{fold} = 'No'; 815 $Options{foldedsize} = 256; 816 817 $Options{ignorehydrogens} = 'Yes'; 818 $Options{keeplargestcomponent} = 'Yes'; 819 820 $Options{mode} = 'PathLengthBits'; 821 $Options{pathmode} = 'AllAtomPathsWithRings'; 822 823 $Options{minpathlength} = 1; 824 $Options{maxpathlength} = 8; 825 826 $Options{numofbitstosetperpath} = 1; 827 828 $Options{output} = 'text'; 829 $Options{outdelim} = 'comma'; 830 $Options{quote} = 'yes'; 831 832 $Options{size} = 1024; 833 834 $Options{usebondsymbols} = 'yes'; 835 $Options{useperlcorerandom} = 'yes'; 836 $Options{useuniquepaths} = 'yes'; 837 838 $Options{vectorstringformat} = 'IDsAndValuesString'; 839 840 if (!GetOptions(\%Options, "aromaticitymodel=s", "atomidentifiertype|a=s", "atomicinvariantstouse=s", "functionalclassestouse=s", "bitsorder=s", "bitstringformat|b=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "detectaromaticity=s", "filter|f=s", "fingerprintslabel=s", "fold=s", "foldedsize=i", "help|h", "ignorehydrogens|i=s", "keeplargestcomponent|k=s", "mode|m=s", "minpathlength=i", "maxpathlength=i", "numofbitstosetperpath|n=i", "outdelim=s", "output=s", "overwrite|o", "pathmode|p=s", "quote|q=s", "root|r=s", "size|s=i", "usebondsymbols|u=s", "useperlcorerandom=s", "useuniquepaths=s", "vectorstringformat|v=s", "workingdir|w=s")) { 841 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 842 } 843 if ($Options{workingdir}) { 844 if (! -d $Options{workingdir}) { 845 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 846 } 847 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 848 } 849 if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) { 850 my(@SupportedModels) = Molecule::GetSupportedAromaticityModels(); 851 die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n"; 852 } 853 if ($Options{atomidentifiertype} !~ /^(AtomicInvariantsAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|FunctionalClassAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) { 854 die "Error: The value specified, $Options{atomidentifiertype}, for option \"-a, --AtomIdentifierType\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, FunctionalClassAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes, UFFAtomTypes\n"; 855 } 856 if ($Options{bitsorder} !~ /^(Ascending|Descending)$/i) { 857 die "Error: The value specified, $Options{bitsorder}, for option \"--BitsOrder\" is not valid. Allowed values: Ascending or Descending\n"; 858 } 859 if ($Options{bitstringformat} !~ /^(BinaryString|HexadecimalString)$/i) { 860 die "Error: The value specified, $Options{bitstringformat}, for option \"-b, --bitstringformat\" is not valid. Allowed values: BinaryString or HexadecimalString\n"; 861 } 862 if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) { 863 die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n"; 864 } 865 if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) { 866 die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n"; 867 } 868 if ($Options{detectaromaticity} !~ /^(Yes|No)$/i) { 869 die "Error: The value specified, $Options{detectaromaticity}, for option \"--DetectAromaticity\" is not valid. Allowed values: Yes or No\n"; 870 } 871 if ($Options{filter} !~ /^(Yes|No)$/i) { 872 die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n"; 873 } 874 if ($Options{fold} !~ /^(Yes|No)$/i) { 875 die "Error: The value specified, $Options{fold}, for option \"--fold\" is not valid. Allowed values: Yes or No\n"; 876 } 877 if (!IsPositiveInteger($Options{foldedsize})) { 878 die "Error: The value specified, $Options{foldedsize}, for option \"--FoldedSize\" is not valid. Allowed values: > 0 \n"; 879 } 880 if ($Options{ignorehydrogens} !~ /^(Yes|No)$/i) { 881 die "Error: The value specified, $Options{ignorehydrogens}, for option \"-i, --IgnoreHydrogens\" is not valid. Allowed values: Yes or No\n"; 882 } 883 if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) { 884 die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n"; 885 } 886 if ($Options{mode} !~ /^(PathLengthBits|PathLengthCount)$/i) { 887 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: PathLengthBits or PathLengthCount\n"; 888 } 889 if (!IsPositiveInteger($Options{minpathlength})) { 890 die "Error: The value specified, $Options{minpathlength}, for option \"--MinPathLength\" is not valid. Allowed values: > 0 \n"; 891 } 892 if (!IsPositiveInteger($Options{numofbitstosetperpath})) { 893 die "Error: The value specified, $Options{NumOfBitsToSetPerPath}, for option \"--NumOfBitsToSetPerPath\" is not valid. Allowed values: > 0 \n"; 894 } 895 if (!IsPositiveInteger($Options{maxpathlength})) { 896 die "Error: The value specified, $Options{maxpathlength}, for option \"--MaxPathLength\" is not valid. Allowed values: > 0 \n"; 897 } 898 if ($Options{output} !~ /^(SD|FP|text|all)$/i) { 899 die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, FP, text, or all\n"; 900 } 901 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) { 902 die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n"; 903 } 904 if ($Options{pathmode} !~ /^(AtomPathsWithoutRings|AtomPathsWithRings|AllAtomPathsWithoutRings|AllAtomPathsWithRings)$/i) { 905 die "Error: The value specified, $Options{pathmode}, for option \"-m, --PathMode\" is not valid. Allowed values: AtomPathsWithoutRings, AtomPathsWithRings, AllAtomPathsWithoutRings or AllAtomPathsWithRings\n"; 906 } 907 if ($Options{quote} !~ /^(Yes|No)$/i) { 908 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n"; 909 } 910 if ($Options{outdelim} =~ /semicolon/i && $Options{quote} =~ /^No$/i) { 911 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not allowed with, semicolon value of \"--outdelim\" option: Fingerprints string use semicolon as delimiter for various data fields and must be quoted.\n"; 912 } 913 914 if (!IsPositiveInteger($Options{size})) { 915 die "Error: The value specified, $Options{size}, for option \"-s, --size\" is not valid. Allowed values: > 0 \n"; 916 } 917 if ($Options{usebondsymbols} !~ /^(Yes|No)$/i) { 918 die "Error: The value specified, $Options{usebondsymbols}, for option \"-u, --UseBondSymbols\" is not valid. Allowed values: Yes or No\n"; 919 } 920 if ($Options{useperlcorerandom} !~ /^(Yes|No)$/i) { 921 die "Error: The value specified, $Options{useperlcorerandom}, for option \"--UsePerlCoreRandom\" is not valid. Allowed values: Yes or No\n"; 922 } 923 if ($Options{useuniquepaths} !~ /^(Yes|No)$/i) { 924 die "Error: The value specified, $Options{useuniquepaths}, for option \"--UseUniquePaths\" is not valid. Allowed values: Yes or No\n"; 925 } 926 if ($Options{vectorstringformat} !~ /^(IDsAndValuesString|IDsAndValuesPairsString|ValuesAndIDsString|ValuesAndIDsPairsString)$/i) { 927 die "Error: The value specified, $Options{vectorstringformat}, for option \"-v, --VectorStringFormat\" is not valid. Allowed values: IDsAndValuesString, IDsAndValuesPairsString, ValuesAndIDsString or ValuesAndIDsPairsString\n"; 928 } 929 } 930