1 #!/usr/bin/perl -w 2 # 3 # File: SimilarityMatricesFingerprints.pl 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use FindBin; use lib "$FindBin::Bin/../lib"; 28 use Getopt::Long; 29 use File::Basename; 30 use File::Copy; 31 use Text::ParseWords; 32 use Benchmark; 33 use FileUtil; 34 use TextUtil; 35 use Fingerprints::FingerprintsFileUtil; 36 use Fingerprints::FingerprintsBitVector; 37 use Fingerprints::FingerprintsVector; 38 39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 40 41 # Autoflush STDOUT 42 $| = 1; 43 44 # Starting message... 45 $ScriptName = basename($0); 46 print "\n$ScriptName: Starting...\n\n"; 47 $StartTime = new Benchmark; 48 49 # Get the options and setup script... 50 SetupScriptUsage(); 51 if ($Options{help} || @ARGV < 1) { 52 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 53 } 54 55 my(@FingerprintsFilesList); 56 @FingerprintsFilesList = ExpandFileNames(\@ARGV, "sdf sd fpf fp csv tsv"); 57 58 # Process options... 59 print "Processing options...\n"; 60 my(%OptionsInfo); 61 ProcessOptions(); 62 63 # Setup information about input files... 64 print "Checking input fingerprints file(s)...\n"; 65 my(%FingerprintsFilesInfo); 66 RetrieveFingerprintsFilesInfo(); 67 68 # Process input files.. 69 my($FileIndex); 70 if (@FingerprintsFilesList > 1) { 71 print "\nProcessing fingerprints files...\n"; 72 } 73 for $FileIndex (0 .. $#FingerprintsFilesList) { 74 if ($FingerprintsFilesInfo{FileOkay}[$FileIndex]) { 75 print "\nProcessing file $FingerprintsFilesList[$FileIndex]...\n"; 76 GenerateSimilarityMatrices($FileIndex); 77 } 78 } 79 print "\n$ScriptName:Done...\n\n"; 80 81 $EndTime = new Benchmark; 82 $TotalTime = timediff ($EndTime, $StartTime); 83 print "Total time: ", timestr($TotalTime), "\n"; 84 85 ############################################################################### 86 87 # Generate similarity matrices using fingerprints data in text file... 88 # 89 sub GenerateSimilarityMatrices { 90 my($FileIndex) = @_; 91 92 ProcessFingerprintsData($FileIndex); 93 94 if ($FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$FileIndex]) { 95 GenerateSimilarityMatricesForFingerprintsBitVectors($FileIndex); 96 } 97 elsif ($FingerprintsFilesInfo{FingerprintsVectorStringMode}[$FileIndex]) { 98 GenerateSimilarityMatricesForFingerprintsVectors($FileIndex); 99 } 100 101 CleanupFingerprintsData($FileIndex); 102 } 103 104 # Generate bit vector similarity matrices... 105 # 106 sub GenerateSimilarityMatricesForFingerprintsBitVectors { 107 my($FileIndex) = @_; 108 my($SpecifiedComparisonMeasure, $ComparisonMeasure, $NewTextFile, $SimilarityMatrixRef, $MethodName, @MethodParameters); 109 110 for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedBitVectorComparisonsRef}}) { 111 $ComparisonMeasure = $OptionsInfo{SpecifiedBitVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)}; 112 $NewTextFile = $FingerprintsFilesInfo{OutFileRoot}[$FileIndex] . "${ComparisonMeasure}." . $FingerprintsFilesInfo{OutFileExt}[$FileIndex]; 113 114 $MethodName = $OptionsInfo{SpecifiedBitVectorComparisonsMethodRef}->{lc($ComparisonMeasure)}; 115 116 @MethodParameters = (); 117 @MethodParameters = @{$OptionsInfo{SpecifiedBitVectorComparisonsParameterRef}->{lc($ComparisonMeasure)}}; 118 119 GenerateSimilarityMatrix($FileIndex, $NewTextFile, $MethodName, \@MethodParameters); 120 } 121 } 122 123 # Generate vector similarity and/or distance matrices... 124 # 125 sub GenerateSimilarityMatricesForFingerprintsVectors { 126 my($FileIndex) = @_; 127 my($SpecifiedComparisonMeasure, $ComparisonMode, $ComparisonMeasure, $NewTextFile, $MethodName, @MethodParameters); 128 129 for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedVectorComparisonsRef}}) { 130 $ComparisonMeasure = $OptionsInfo{SpecifiedVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)}; 131 132 for $ComparisonMode (@{$OptionsInfo{SpecifiedVectorComparisonModesRef}}) { 133 $NewTextFile = $FingerprintsFilesInfo{OutFileRoot}[$FileIndex] . "${ComparisonMeasure}${ComparisonMode}." . $FingerprintsFilesInfo{OutFileExt}[$FileIndex]; 134 135 $MethodName = $OptionsInfo{SpecifiedVectorComparisonsMethodRef}->{lc($ComparisonMeasure)}; 136 137 @MethodParameters = (); 138 push @MethodParameters, $ComparisonMode; 139 push @MethodParameters, @{$OptionsInfo{SpecifiedVectorComparisonsParameterRef}->{lc($ComparisonMeasure)}}; 140 141 GenerateSimilarityMatrix($FileIndex, $NewTextFile, $MethodName, \@MethodParameters); 142 } 143 } 144 } 145 146 # Calculate similarity matrix and write it out... 147 # 148 sub GenerateSimilarityMatrix { 149 my($FileIndex, $NewTextFile, $MethodName, $MethodParametersRef) = @_; 150 151 print "\nGenerating $NewTextFile...\n"; 152 153 # Open new file and write out column labels... 154 open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n"; 155 WriteColumnLabels($FileIndex, \*NEWTEXTFILE); 156 157 # Calculate and write out similarity matrix values... 158 if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) { 159 GenerateSimilarityMatrixUsingMemoryData($FileIndex, \*NEWTEXTFILE, $MethodName, $MethodParametersRef); 160 } 161 elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) { 162 GenerateSimilarityMatrixUsingFileData($FileIndex, \*NEWTEXTFILE, $MethodName, $MethodParametersRef); 163 } 164 else { 165 warn "Warning: Input data mode, $OptionsInfo{InputDataMode}, is not supported.\n"; 166 } 167 168 # Close new text file... 169 close NEWTEXTFILE; 170 171 } 172 173 # Calculate and write out similarity values using fingerprints data already loaded in 174 # memory... 175 # 176 sub GenerateSimilarityMatrixUsingMemoryData { 177 my($FileIndex, $NewTextFileRef, $MethodName, $MethodParametersRef) = @_; 178 my($RowIndex, $ColIndex, $CmpdID1, $CmpdID2, $FingerprintsObject1, $FingerprintsObject2, $Value, $Line, @LineWords); 179 180 for $RowIndex (0 .. $#{$FingerprintsFilesInfo{FingerprintsObjectsRef}}) { 181 $FingerprintsObject1 = $FingerprintsFilesInfo{FingerprintsObjectsRef}->[$RowIndex]; 182 $CmpdID1 = $FingerprintsFilesInfo{CompundIDsRef}->[$RowIndex]; 183 184 if ($OptionsInfo{WriteRowsAndColumns}) { 185 print $NewTextFileRef "$OptionsInfo{OutQuoteValue}${CmpdID1}$OptionsInfo{OutQuoteValue}"; 186 } 187 188 COLINDEX: for $ColIndex (0 .. $#{$FingerprintsFilesInfo{FingerprintsObjectsRef}}) { 189 if (SkipMatrixData($RowIndex, $ColIndex)) { 190 next COLINDEX; 191 } 192 193 $FingerprintsObject2 = $FingerprintsFilesInfo{FingerprintsObjectsRef}->[$ColIndex]; 194 195 $Value = $FingerprintsObject1->$MethodName($FingerprintsObject2, @{$MethodParametersRef}); 196 $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : ''; 197 198 if ($OptionsInfo{WriteRowsAndColumns}) { 199 print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${Value}$OptionsInfo{OutQuoteValue}"; 200 } 201 elsif ($OptionsInfo{WriteIDPairsAndValue}) { 202 $CmpdID2 = $FingerprintsFilesInfo{CompundIDsRef}->[$ColIndex]; 203 204 @LineWords = (); 205 push @LineWords, ($CmpdID1, $CmpdID2, $Value); 206 $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote}); 207 print $NewTextFileRef "$Line\n"; 208 } 209 } 210 if ($OptionsInfo{WriteRowsAndColumns}) { 211 print $NewTextFileRef "\n"; 212 } 213 } 214 } 215 216 # Calculate and write out similarity values by retrieving and prcessing data 217 # from fingerprint file... 218 # 219 sub GenerateSimilarityMatrixUsingFileData { 220 my($FileIndex, $NewTextFileRef, $MethodName, $MethodParametersRef) = @_; 221 my($RowIndex, $ColIndex, $FingerprintsFileIO, $TmpFingerprintsFileIO, $FingerprintsObject1, $FingerprintsObject2, $CmpdID1, $CmpdID2, $FingerprintsCount, $IgnoredFingerprintsCount, $Value, $Line, @LineWords); 222 223 print "\nReading and processing fingerprints data...\n"; 224 225 $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]}); 226 $FingerprintsFileIO->Open(); 227 228 $RowIndex = 0; $ColIndex = 0; 229 $FingerprintsCount = 0; $IgnoredFingerprintsCount = 0; 230 231 FINGERPRINTSFILEIO: while ($FingerprintsFileIO->Read()) { 232 $FingerprintsCount++; 233 234 if (!$FingerprintsFileIO->IsFingerprintsDataValid()) { 235 $IgnoredFingerprintsCount++; 236 next FINGERPRINTSFILEIO; 237 } 238 $RowIndex++; 239 $FingerprintsObject1 = $FingerprintsFileIO->GetFingerprints(); 240 $CmpdID1 = $FingerprintsFileIO->GetCompoundID(); 241 242 if ($OptionsInfo{WriteRowsAndColumns}) { 243 print $NewTextFileRef "$OptionsInfo{OutQuoteValue}${CmpdID1}$OptionsInfo{OutQuoteValue}"; 244 } 245 246 # Force detail level of 1 to avoid duplicate printing of diagnostic messages for invalid 247 # fingerprints data... 248 $TmpFingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$FileIndex]}, "DetailLevel" => 1); 249 $TmpFingerprintsFileIO->Open(); 250 251 $ColIndex = 0; 252 TMPFINGERPRINTSFILEIO: while ($TmpFingerprintsFileIO->Read()) { 253 if (!$TmpFingerprintsFileIO->IsFingerprintsDataValid()) { 254 next TMPFINGERPRINTSFILEIO; 255 } 256 $ColIndex++; 257 258 if (SkipMatrixData($RowIndex, $ColIndex)) { 259 next TMPFINGERPRINTSFILEIO; 260 } 261 262 $FingerprintsObject2 = $TmpFingerprintsFileIO->GetFingerprints(); 263 264 $Value = $FingerprintsObject1->$MethodName($FingerprintsObject2, @{$MethodParametersRef}); 265 $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : ''; 266 267 if ($OptionsInfo{WriteRowsAndColumns}) { 268 print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${Value}$OptionsInfo{OutQuoteValue}"; 269 } 270 elsif ($OptionsInfo{WriteIDPairsAndValue}) { 271 $CmpdID2 = $TmpFingerprintsFileIO->GetCompoundID(); 272 273 @LineWords = (); 274 push @LineWords, ($CmpdID1, $CmpdID2, $Value); 275 $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote}); 276 print $NewTextFileRef "$Line\n"; 277 } 278 } 279 $TmpFingerprintsFileIO->Close(); 280 281 if ($OptionsInfo{WriteRowsAndColumns}) { 282 print $NewTextFileRef "\n"; 283 } 284 } 285 286 $FingerprintsFileIO->Close(); 287 288 print "Number of fingerprints data entries in database fingerprints file: $FingerprintsCount\n"; 289 print "Number of fingerprints date entries processed successfully: ", ($FingerprintsCount - $IgnoredFingerprintsCount) , "\n"; 290 print "Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n"; 291 } 292 293 # Check whether matrix data need to be skipped... 294 # 295 sub SkipMatrixData { 296 my($RowIndex, $ColIndex) = @_; 297 298 if ($OptionsInfo{WriteFullMatrix}) { 299 return 0; 300 } 301 elsif ($OptionsInfo{WriteUpperTriangularMatrix}) { 302 return ($RowIndex > $ColIndex) ? 1 : 0; 303 } 304 elsif ($OptionsInfo{WriteLowerTriangularMatrix}) { 305 return ($RowIndex < $ColIndex) ? 1 : 0; 306 } 307 308 return 0; 309 } 310 311 # Write out column labels... 312 # 313 sub WriteColumnLabels { 314 my($FileIndex, $NewTextFileRef) = @_; 315 my($Line, @LineWords); 316 317 if ($OptionsInfo{OutMatrixFormat} =~ /^IDPairsAndValue$/i) { 318 @LineWords = (); 319 push @LineWords, ('CmpdID1', 'CmpdID2', 'Coefficient Value'); 320 $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote}); 321 print $NewTextFileRef "$Line\n"; 322 } 323 elsif ($OptionsInfo{OutMatrixFormat} =~ /^RowsAndColumns$/i) { 324 if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) { 325 @LineWords = (); 326 push @LineWords, ''; 327 push @LineWords, @{$FingerprintsFilesInfo{CompundIDsRef}}; 328 $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote}); 329 print $NewTextFileRef "$Line\n"; 330 } 331 elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) { 332 my( $FingerprintsFileIO, $CmpdID); 333 334 # Scan file to retrieve compound IDs... 335 # 336 print "\nProcessing fingerprints file to generate compound IDs...\n"; 337 338 # Force detail level of 1 to avoid diagnostics messages for invalid fingeprints data during 339 # retrieval of compound IDs as these get printed out during calculation of matrix... 340 # 341 $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]}, "DetailLevel" => 1); 342 $FingerprintsFileIO->Open(); 343 344 print $NewTextFileRef "$OptionsInfo{OutQuoteValue}$OptionsInfo{OutQuoteValue}"; 345 346 FINGERPRINTSFILEIO: while ($FingerprintsFileIO->Read()) { 347 if (!$FingerprintsFileIO->IsFingerprintsDataValid()) { 348 next FINGERPRINTSFILEIO; 349 } 350 $CmpdID = $FingerprintsFileIO->GetCompoundID(); 351 print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${CmpdID}$OptionsInfo{OutQuoteValue}"; 352 } 353 $FingerprintsFileIO->Close(); 354 355 print $NewTextFileRef "\n"; 356 357 print "Processing fingerprints file to generate matrix...\n"; 358 } 359 } 360 else { 361 warn "Warning: Output matrix format, $OptionsInfo{OutMatrixFormat}, is not supported.\n"; 362 } 363 } 364 365 # Process fingerprints data... 366 # 367 sub ProcessFingerprintsData { 368 my($FileIndex) = @_; 369 my($FingerprintsFileIO); 370 371 $FingerprintsFilesInfo{CompundIDsRef} = undef; 372 $FingerprintsFilesInfo{FingerprintsObjectsRef} = undef; 373 374 if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) { 375 my($FingerprintsFileIO); 376 377 $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]}); 378 ($FingerprintsFilesInfo{CompundIDsRef}, $FingerprintsFilesInfo{FingerprintsObjectsRef}) = Fingerprints::FingerprintsFileUtil::ReadAndProcessFingerpritsData($FingerprintsFileIO); 379 } 380 elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) { 381 my($FingerprintsFile, $TmpFingerprintsFile); 382 383 $FingerprintsFile = $FingerprintsFilesList[$FileIndex]; 384 $TmpFingerprintsFile = $FingerprintsFilesInfo{TmpFingerprintsFile}[$FileIndex]; 385 386 # Copy fingerprints file to a tmp file for calculating similarity matrix... 387 print "\nCopying fingerprints file, $FingerprintsFile, to temporary fingperints file, $TmpFingerprintsFile...\n"; 388 copy $FingerprintsFile, $TmpFingerprintsFile or die "Error: Couldn't copy $FingerprintsFile to $TmpFingerprintsFile: $! \n"; 389 } 390 } 391 392 # Clean up fingerprints data... 393 # 394 sub CleanupFingerprintsData { 395 my($FileIndex) = @_; 396 397 if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) { 398 $FingerprintsFilesInfo{CompundIDsRef} = undef; 399 $FingerprintsFilesInfo{FingerprintsObjectsRef} = undef; 400 } 401 elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) { 402 my($TmpFingerprintsFile); 403 404 # Delete temporary fingerprints file... 405 $TmpFingerprintsFile = $FingerprintsFilesInfo{TmpFingerprintsFile}[$FileIndex]; 406 407 print "\nDeleting temporary fingerprints file $TmpFingerprintsFile...\n"; 408 unlink $TmpFingerprintsFile or die "Error: Couldn't unlink $TmpFingerprintsFile: $! \n"; 409 } 410 } 411 412 # Retrieve information about fingerprints files... 413 # 414 sub RetrieveFingerprintsFilesInfo { 415 my($FingerprintsFile, $TmpFingerprintsFile, $FingerprintsFileIO, $FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FileType, $Index, $FileDir, $FileExt, $FileName, $InDelim, $OutFileRoot, $OutFileExt, %FingerprintsFileIOParameters); 416 417 %FingerprintsFilesInfo = (); 418 @{$FingerprintsFilesInfo{FileOkay}} = (); 419 @{$FingerprintsFilesInfo{FileType}} = (); 420 @{$FingerprintsFilesInfo{InDelim}} = (); 421 @{$FingerprintsFilesInfo{OutFileRoot}} = (); 422 @{$FingerprintsFilesInfo{OutFileExt}} = (); 423 424 @{$FingerprintsFilesInfo{TmpFingerprintsFile}} = (); 425 426 @{$FingerprintsFilesInfo{FingerprintsFileIOParameters}} = (); 427 @{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}} = (); 428 429 @{$FingerprintsFilesInfo{FingerprintsBitVectorStringMode}} = (); 430 @{$FingerprintsFilesInfo{FingerprintsVectorStringMode}} = (); 431 432 FILELIST: for $Index (0 .. $#FingerprintsFilesList) { 433 $FingerprintsFilesInfo{FileOkay}[$Index] = 0; 434 $FingerprintsFilesInfo{FileType}[$Index] = ''; 435 $FingerprintsFilesInfo{InDelim}[$Index] = ""; 436 $FingerprintsFilesInfo{OutFileRoot}[$Index] = ''; 437 $FingerprintsFilesInfo{OutFileExt}[$Index] = ''; 438 439 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = (); 440 441 $FingerprintsFilesInfo{TmpFingerprintsFile}[$Index] = ""; 442 %{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$Index]} = (); 443 444 $FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$Index] = 0; 445 $FingerprintsFilesInfo{FingerprintsVectorStringMode}[$Index] = 0; 446 447 $FingerprintsFile = $FingerprintsFilesList[$Index]; 448 if (!(-e $FingerprintsFile)) { 449 warn "Warning: Ignoring file $FingerprintsFile: It doesn't exist\n"; 450 next FILELIST; 451 } 452 453 $FileType = Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType($FingerprintsFile); 454 if (IsEmpty($FileType)) { 455 warn "Warning: Ignoring file $FingerprintsFile: It's not a fingerprints file\n"; 456 next FILELIST; 457 } 458 459 $FileDir = ""; $FileName = ""; $FileExt = ""; 460 ($FileDir, $FileName, $FileExt) = ParseFileName($FingerprintsFile); 461 462 # Setup temporary fingerprints file name for scan file mode... 463 $TmpFingerprintsFile = "${FileName}Tmp.${FileExt}"; 464 465 $InDelim = ($FileExt =~ /^tsv$/i) ? 'Tab' : $OptionsInfo{InDelim}; 466 467 # Setup output file names... 468 $OutFileExt = "csv"; 469 if ($Options{outdelim} =~ /^tab$/i) { 470 $OutFileExt = "tsv"; 471 } 472 473 $OutFileRoot = $FileName; 474 if ($OptionsInfo{OutFileRoot} && (@FingerprintsFilesList == 1)) { 475 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); 476 if ($RootFileName && $RootFileExt) { 477 $FileName = $RootFileName; 478 } 479 else { 480 $FileName = $OptionsInfo{OutFileRoot}; 481 } 482 $OutFileRoot = $FileName; 483 } 484 485 if (!$Options{overwrite}) { 486 # Similarity matrices output file names for bit-vector strings... 487 my($SpecifiedComparisonMeasure, $ComparisonMeasure); 488 for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedBitVectorComparisonsRef}}) { 489 $ComparisonMeasure = $OptionsInfo{SpecifiedBitVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)}; 490 if (-e "${OutFileRoot}${ComparisonMeasure}.${OutFileExt}") { 491 warn "Warning: Ignoring file $FingerprintsFile: The file ${OutFileRoot}${ComparisonMeasure}.${OutFileExt} already exists.\n"; 492 next FILELIST; 493 } 494 } 495 # Similarity matrices output file names for vector strings... 496 my($ComparisonMode); 497 for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedVectorComparisonsRef}}) { 498 $ComparisonMeasure = $OptionsInfo{SpecifiedVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)}; 499 for $ComparisonMode (@{$OptionsInfo{SpecifiedVectorComparisonModesRef}}) { 500 if (-e "${OutFileRoot}${ComparisonMeasure}${ComparisonMode}.${OutFileExt}") { 501 warn "Warning: Ignoring file $FingerprintsFile: The file ${OutFileRoot}${ComparisonMeasure}${ComparisonMode}.${OutFileExt} already exists.\n"; 502 next FILELIST; 503 } 504 } 505 } 506 } 507 508 # Setup FingerprintsFileIO parameters... 509 %FingerprintsFileIOParameters = (); 510 FILEIOPARAMETERS: { 511 if ($FileType =~ /^SD$/i) { 512 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => $OptionsInfo{Detail}, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsField}, 'CompoundIDMode' => $OptionsInfo{CompoundIDMode}, 'CompoundIDFieldLabel' => $OptionsInfo{CompoundIDField}, 'CompoundIDPrefix' => $OptionsInfo{CompoundIDPrefix}); 513 last FILEIOPARAMETERS; 514 } 515 if ($FileType =~ /^FP$/i) { 516 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => $OptionsInfo{Detail}); 517 last FILEIOPARAMETERS; 518 } 519 if ($FileType =~ /^Text$/i) { 520 %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' => $OptionsInfo{Detail}, 'FingerprintsCol' => $OptionsInfo{FingerprintsCol}, 'ColMode' => $OptionsInfo{ColMode}, 'CompoundIDCol' => $OptionsInfo{CompoundIDCol}, 'CompoundIDPrefix' => $OptionsInfo{CompoundIDPrefix}, 'InDelim' => $OptionsInfo{InDelim}); 521 last FILEIOPARAMETERS; 522 } 523 warn "Warning: File type for fingerprints file, $FingerprintsFile, is not valid. Supported file types: SD, FP or Text\n"; 524 next FILELIST; 525 } 526 527 # Retrieve fingerints file string mode information... 528 $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%FingerprintsFileIOParameters); 529 530 if (!$FingerprintsFileIO) { 531 warn "Warning: Ignoring fingerprints file $FingerprintsFile: It contains invalid fingerprints data\n"; 532 next FILELIST; 533 } 534 if (!$FingerprintsFileIO->IsFingerprintsFileDataValid()) { 535 warn "Warning: Ignoring fingerprints file $FingerprintsFile: It contains invalid fingerprints data\n"; 536 next FILELIST; 537 } 538 $FingerprintsBitVectorStringMode = $FingerprintsFileIO->GetFingerprintsBitVectorStringMode(); 539 $FingerprintsVectorStringMode = $FingerprintsFileIO->GetFingerprintsVectorStringMode(); 540 541 542 $FingerprintsFilesInfo{FileOkay}[$Index] = 1; 543 $FingerprintsFilesInfo{FileType}[$Index] = $FileType; 544 545 $FingerprintsFilesInfo{InDelim}[$Index] = $InDelim; 546 547 $FingerprintsFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; 548 $FingerprintsFilesInfo{OutFileExt}[$Index] = $OutFileExt; 549 550 %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters; 551 552 $FingerprintsFilesInfo{TmpFingerprintsFile}[$Index] = $TmpFingerprintsFile; 553 554 $FingerprintsFileIOParameters{Name} = $TmpFingerprintsFile; 555 %{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters; 556 557 $FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$Index] = $FingerprintsBitVectorStringMode; 558 $FingerprintsFilesInfo{FingerprintsVectorStringMode}[$Index] = $FingerprintsVectorStringMode; 559 } 560 } 561 562 # Process option values... 563 sub ProcessOptions { 564 %OptionsInfo = (); 565 566 $OptionsInfo{Mode} = $Options{mode}; 567 568 $OptionsInfo{InputDataMode} = $Options{inputdatamode}; 569 570 ProcessBitVectorComparisonOptions(); 571 ProcessVectorComparisonOptions(); 572 573 $OptionsInfo{CompoundIDPrefix} = $Options{compoundidprefix} ? $Options{compoundidprefix} : 'Cmpd'; 574 575 # Compound ID and fingerprints column options for text files... 576 $OptionsInfo{ColMode} = $Options{colmode}; 577 578 if (IsNotEmpty($Options{compoundidcol})) { 579 if ($Options{colmode} =~ /^ColNum$/i) { 580 if (!IsPositiveInteger($Options{compoundidcol})) { 581 die "Error: Column value, $Options{compoundidcol}, specified using \"--CompoundIDCol\" is not valid: Allowed integer values: > 0\n"; 582 } 583 } 584 $OptionsInfo{CompoundIDCol} = $Options{compoundidcol}; 585 } 586 else { 587 $OptionsInfo{CompoundIDCol} = 'AutoDetect'; 588 } 589 590 if (IsNotEmpty($Options{fingerprintscol})) { 591 if ($Options{colmode} =~ /^ColNum$/i) { 592 if (!IsPositiveInteger($Options{fingerprintscol})) { 593 die "Error: Column value, $Options{fingerprintscol}, specified using \"--FingerprintsCol\" is not valid: Allowed integer values: > 0\n"; 594 } 595 } 596 $OptionsInfo{FingerprintsCol} = $Options{fingerprintscol}; 597 } 598 else { 599 $OptionsInfo{FingerprintsCol} = 'AutoDetect'; 600 } 601 602 if (IsNotEmpty($Options{compoundidcol}) && IsNotEmpty($Options{fingerprintscol})) { 603 if (IsPositiveInteger($Options{compoundidcol}) && IsPositiveInteger($Options{fingerprintscol})) { 604 if (($Options{compoundidcol} == $Options{fingerprintscol})) { 605 die "Error: Values specified using \"--CompoundIDCol\" and \"--FingerprintsCol\", $Options{compoundidcol}, must be different.\n"; 606 } 607 } 608 else { 609 if (($Options{compoundidcol} eq $Options{fingerprintscol})) { 610 die "Error: Values specified using \"--CompoundIDCol\" and \"--FingerprintsCol\", $Options{compoundidcol}, must be different.\n"; 611 } 612 } 613 } 614 615 # Compound ID and fingerprints field options for SD files... 616 $OptionsInfo{CompoundIDMode} = $Options{compoundidmode}; 617 $OptionsInfo{CompoundIDField} = ''; 618 619 if ($Options{compoundidmode} =~ /^DataField$/i) { 620 if (!$Options{compoundidfield}) { 621 die "Error: You must specify a value for \"--CompoundIDField\" option in \"DataField\" \"--CompoundIDMode\". \n"; 622 } 623 $OptionsInfo{CompoundIDField} = $Options{compoundidfield}; 624 } 625 626 627 if (IsNotEmpty($Options{fingerprintsfield})) { 628 $OptionsInfo{FingerprintsField} = $Options{fingerprintsfield}; 629 } 630 else { 631 $OptionsInfo{FingerprintsField} = 'AutoDetect'; 632 } 633 634 if ($Options{compoundidfield} && IsNotEmpty($Options{fingerprintsfield})) { 635 if (($Options{compoundidfield} eq $Options{fingerprintsfield})) { 636 die "Error: Values specified using \"--CompoundIDField\" and \"--Fingerprintsfield\", $Options{compoundidfield}, must be different.\n"; 637 } 638 } 639 640 $OptionsInfo{Detail} = $Options{detail}; 641 642 $OptionsInfo{InDelim} = $Options{indelim}; 643 $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,"); 644 $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0; 645 $OptionsInfo{OutQuoteValue} = ($Options{quote} =~ /^Yes$/i) ? '"' : ''; 646 647 $OptionsInfo{OutMatrixFormat} = $Options{outmatrixformat}; 648 649 $OptionsInfo{WriteRowsAndColumns} = 0; $OptionsInfo{WriteIDPairsAndValue} = 0; 650 OUTMATRIXFORMAT: { 651 if ($OptionsInfo{OutMatrixFormat} =~ /^RowsAndColumns$/i) { 652 $OptionsInfo{WriteRowsAndColumns} = 1; last OUTMATRIXFORMAT; 653 } 654 if ($OptionsInfo{OutMatrixFormat} =~ /^IDPairsAndValue$/i) { 655 $OptionsInfo{WriteIDPairsAndValue} = 1; last OUTMATRIXFORMAT; 656 } 657 die "Error: The value specified, $Options{outmatrixformat}, for option \"--OutMatrixFormat\" is not valid. Allowed values: RowsAndColumns or IDPairsAndValue\n"; 658 } 659 660 $OptionsInfo{OutMatrixType} = $Options{outmatrixtype}; 661 662 $OptionsInfo{WriteFullMatrix} = 0; 663 $OptionsInfo{WriteUpperTriangularMatrix} = 0; $OptionsInfo{WriteLowerTriangularMatrix} = 0; 664 OUTMATRIXTYPE: { 665 if ($OptionsInfo{OutMatrixType} =~ /^FullMatrix$/i) { 666 $OptionsInfo{WriteFullMatrix} = 1; last OUTMATRIXTYPE; 667 } 668 if ($OptionsInfo{OutMatrixType} =~ /^UpperTriangularMatrix$/i) { 669 $OptionsInfo{WriteUpperTriangularMatrix} = 1; last OUTMATRIXTYPE; 670 } 671 if ($OptionsInfo{OutMatrixType} =~ /^LowerTriangularMatrix$/i) { 672 $OptionsInfo{WriteLowerTriangularMatrix} = 1; last OUTMATRIXTYPE; 673 } 674 die "Error: The value specified, $Options{outmatrixtype}, for option \"--OutMatrixType\" is not valid. Allowed values: FullMatrix, UpperTriangularMatrix or LowerTriangularMatrix\n"; 675 } 676 677 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; 678 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; 679 680 $OptionsInfo{Fast} = $Options{fast} ? 1 : 0; 681 $OptionsInfo{ValidateData} = $Options{fast} ? 0 : 1; 682 683 $OptionsInfo{Precision} = $Options{precision}; 684 685 } 686 687 # Process options related to comparion of bit vector strings... 688 # 689 sub ProcessBitVectorComparisonOptions { 690 # Setup supported bit vector similarity coefficients for bit vector strings... 691 my($ComparisonMeasure, $SupportedComparisonMeasure, @SupportedComparisonMeasures, %SupportedComparisonMeasuresNameMap, %SupportedComparisonMeasuresMethodMap); 692 693 @SupportedComparisonMeasures = (); 694 %SupportedComparisonMeasuresNameMap = (); 695 %SupportedComparisonMeasuresMethodMap = (); 696 697 for $SupportedComparisonMeasure (Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients()) { 698 # Similarity coefficient function/method names contain "Coefficient" in their names. 699 # So take 'em out and setup a map to original function/method name... 700 $ComparisonMeasure = $SupportedComparisonMeasure; 701 $ComparisonMeasure =~ s/Coefficient$//; 702 703 push @SupportedComparisonMeasures, $ComparisonMeasure; 704 $SupportedComparisonMeasuresNameMap{lc($ComparisonMeasure)} = $ComparisonMeasure; 705 $SupportedComparisonMeasuresMethodMap{lc($ComparisonMeasure)} = $SupportedComparisonMeasure; 706 } 707 708 # Setup a list of similarity coefficients to use for calculating similarity matrices for bit vector strings... 709 my($SpecifiedMeasure, @SpecifiedComparisonMeasures, %SpecifiedComparisonMeasuresNameMap, %SpecifiedComparisonMeasuresMethodMap, %SpecifiedComparisonMeasuresParameterMap); 710 711 @SpecifiedComparisonMeasures = (); 712 %SpecifiedComparisonMeasuresNameMap = (); 713 %SpecifiedComparisonMeasuresMethodMap = (); 714 %SpecifiedComparisonMeasuresParameterMap = (); 715 716 if ($Options{bitvectorcomparisonmode} =~ /^All$/i) { 717 push @SpecifiedComparisonMeasures, @SupportedComparisonMeasures; 718 } 719 else { 720 # Comma delimited list of similarity coefficients... 721 my($BitVectorComparisonMode, @SpecifiedMeasures, @UnsupportedSpecifiedMeasures); 722 723 $BitVectorComparisonMode = $Options{bitvectorcomparisonmode}; 724 $BitVectorComparisonMode =~ s/ //g; 725 @SpecifiedMeasures = split ",", $BitVectorComparisonMode; 726 @UnsupportedSpecifiedMeasures = (); 727 728 for $SpecifiedMeasure (@SpecifiedMeasures) { 729 if (exists($SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)})) { 730 push @SpecifiedComparisonMeasures, $SpecifiedMeasure; 731 } 732 else { 733 push @UnsupportedSpecifiedMeasures, $SpecifiedMeasure; 734 } 735 } 736 if (@UnsupportedSpecifiedMeasures) { 737 if (@UnsupportedSpecifiedMeasures > 1) { 738 warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedMeasures, ", ", 0)," - for option \"-b --BitVectorComparisonMode\" are not valid.\n"; 739 } 740 else { 741 warn "Error: The value specified, @UnsupportedSpecifiedMeasures, for option \"-b --BitVectorComparisonMode\" is not valid.\n"; 742 } 743 die "Allowed values:", JoinWords(\@SupportedComparisonMeasures, ", ", 0), "\n"; 744 } 745 } 746 for $SpecifiedMeasure (@SpecifiedComparisonMeasures) { 747 $SpecifiedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)}; 748 $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresNameMap{lc($SpecifiedMeasure)}; 749 } 750 751 $OptionsInfo{BitVectorComparisonMode} = $Options{bitvectorcomparisonmode}; 752 $OptionsInfo{SpecifiedBitVectorComparisonsRef} = \@SpecifiedComparisonMeasures; 753 $OptionsInfo{SpecifiedBitVectorComparisonsNameRef} = \%SpecifiedComparisonMeasuresNameMap; 754 $OptionsInfo{SpecifiedBitVectorComparisonsMethodRef} = \%SpecifiedComparisonMeasuresMethodMap; 755 756 # Make sure valid alpha parameter is specified for Tversky calculation... 757 my($SpecifiedMeasure1, $SpecifiedMeasure2); 758 $OptionsInfo{Alpha} = ''; 759 $SpecifiedMeasure1 = 'TverskySimilarity'; 760 $SpecifiedMeasure2 = 'WeightedTverskySimilarity'; 761 if ($SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure1)} || $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure2)}) { 762 if (IsEmpty($Options{alpha})) { 763 die "Error: You must specify a value for \"-a, --alpha\" option in \"$SpecifiedMeasure1, $SpecifiedMeasure2, or All\" \"-m --mode\". \n"; 764 } 765 my($Alpha); 766 $Alpha = $Options{alpha}; 767 if (!(IsFloat($Alpha) && $Alpha >=0 && $Alpha <= 1)) { 768 die "Error: The value specified, $Options{alpha}, for option \"-a, --alpha\" is not valid. Allowed values: >= 0 and <= 1\n"; 769 } 770 $OptionsInfo{Alpha} = $Alpha; 771 } 772 773 # Make sure valid beta parameter is specified for WeightedTanimoto and WeightedTversky 774 # calculations... 775 $OptionsInfo{Beta} = ''; 776 $SpecifiedMeasure1 = 'WeightedTverskySimilarity'; 777 $SpecifiedMeasure2 = 'WeightedTanimotoSimilarity'; 778 if ($SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure1)} || $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure2)}) { 779 if (IsEmpty($Options{beta})) { 780 die "Error: You must specify a value for \"-b, --beta\" option in \"$SpecifiedMeasure1, $SpecifiedMeasure2, or All\" \"-m --mode\". \n"; 781 } 782 my($Beta); 783 $Beta = $Options{beta}; 784 if (!(IsFloat($Beta) && $Beta >=0 && $Beta <= 1)) { 785 die "Error: The value specified, $Options{beta}, for option \"-b, --beta\" is not valid. Allowed values: >= 0 and <= 1\n"; 786 } 787 $OptionsInfo{Beta} = $Beta; 788 } 789 790 # Setup any parameters required for specified comparison menthod... 791 for $SpecifiedMeasure (@SpecifiedComparisonMeasures) { 792 @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}} = (); 793 if ($SpecifiedMeasure =~ /^TverskySimilarity$/i) { 794 push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Alpha}; 795 } 796 elsif ($SpecifiedMeasure =~ /^WeightedTverskySimilarity$/i) { 797 push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Alpha}; 798 push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Beta}; 799 } 800 elsif ($SpecifiedMeasure =~ /^WeightedTanimotoSimilarity$/i) { 801 push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Beta}; 802 } 803 } 804 $OptionsInfo{SpecifiedBitVectorComparisonsParameterRef} = \%SpecifiedComparisonMeasuresParameterMap; 805 } 806 807 # Process options related to comparion of vector strings... 808 # 809 sub ProcessVectorComparisonOptions { 810 # Setup specified similarity coefficients for vector strings.. 811 my($ComparisonMeasure, $SupportedComparisonMeasure, @SupportedComparisonMeasures, %SupportedComparisonMeasuresNameMap, %SupportedComparisonMeasuresMethodMap); 812 813 @SupportedComparisonMeasures = (); 814 %SupportedComparisonMeasuresNameMap = (); 815 %SupportedComparisonMeasuresMethodMap = (); 816 for $SupportedComparisonMeasure (Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients()) { 817 # Similarity and distance coefficient function/method names contain "Coefficient" in their names. 818 # So take 'em out and setup a map to original function/method name... 819 $ComparisonMeasure = $SupportedComparisonMeasure; 820 if ($ComparisonMeasure =~ /Coefficient$/i) { 821 $ComparisonMeasure =~ s/Coefficient$//i; 822 } 823 push @SupportedComparisonMeasures, $ComparisonMeasure; 824 $SupportedComparisonMeasuresNameMap{lc($ComparisonMeasure)} = $ComparisonMeasure; 825 $SupportedComparisonMeasuresMethodMap{lc($ComparisonMeasure)} = $SupportedComparisonMeasure; 826 } 827 828 # Setup a list of similarity coefficients to use for calculating similarity matrices for bit vector strings... 829 my($SpecifiedMeasure, @SpecifiedComparisonMeasures, %SpecifiedComparisonMeasuresNameMap, %SpecifiedComparisonMeasuresMethodMap, %SpecifiedComparisonMeasuresParameterMap); 830 831 @SpecifiedComparisonMeasures = (); 832 %SpecifiedComparisonMeasuresNameMap = (); 833 %SpecifiedComparisonMeasuresMethodMap = (); 834 835 if ($Options{vectorcomparisonmode} =~ /^All$/i) { 836 push @SpecifiedComparisonMeasures, @SupportedComparisonMeasures; 837 } 838 else { 839 # Comma delimited list of similarity coefficients... 840 my($VectorComparisonMode, @SpecifiedMeasures, @UnsupportedSpecifiedMeasures); 841 842 $VectorComparisonMode = $Options{vectorcomparisonmode}; 843 $VectorComparisonMode =~ s/ //g; 844 @SpecifiedMeasures = split ",", $VectorComparisonMode; 845 @UnsupportedSpecifiedMeasures = (); 846 847 for $SpecifiedMeasure (@SpecifiedMeasures) { 848 if (exists($SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)})) { 849 push @SpecifiedComparisonMeasures, $SpecifiedMeasure; 850 } 851 else { 852 push @UnsupportedSpecifiedMeasures, $SpecifiedMeasure; 853 } 854 } 855 if (@UnsupportedSpecifiedMeasures) { 856 if (@UnsupportedSpecifiedMeasures > 1) { 857 warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedMeasures, ", ", 0)," - for option \"-v --VectorComparisonMode\" are not valid.\n"; 858 } 859 else { 860 warn "Error: The value specified, @UnsupportedSpecifiedMeasures, for option \"-v --VectorComparisonMode\" is not valid.\n"; 861 } 862 die "Allowed values:", JoinWords(\@SupportedComparisonMeasures, ", ", 0), "\n"; 863 } 864 } 865 for $SpecifiedMeasure (@SpecifiedComparisonMeasures) { 866 $SpecifiedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)}; 867 $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresNameMap{lc($SpecifiedMeasure)}; 868 } 869 870 $OptionsInfo{VectorComparisonMode} = $Options{vectorcomparisonmode}; 871 $OptionsInfo{SpecifiedVectorComparisonsRef} = \@SpecifiedComparisonMeasures; 872 $OptionsInfo{SpecifiedVectorComparisonsNameRef} = \%SpecifiedComparisonMeasuresNameMap; 873 $OptionsInfo{SpecifiedVectorComparisonsMethodRef} = \%SpecifiedComparisonMeasuresMethodMap; 874 875 # Setup specified vector comparison calculation modes... 876 my(@SpecifiedVectorComparisonModes); 877 @SpecifiedVectorComparisonModes = (); 878 if ($Options{vectorcomparisonformulism} =~ /^All$/i) { 879 push @SpecifiedVectorComparisonModes, ("AlgebraicForm", "BinaryForm", "SetTheoreticForm"); 880 } 881 else { 882 my($SpecifiedFormulism, @SpecifiedFormulismWords); 883 884 @SpecifiedFormulismWords = split /\,/, $Options{vectorcomparisonformulism}; 885 for $SpecifiedFormulism (@SpecifiedFormulismWords) { 886 if ($SpecifiedFormulism !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) { 887 die "Error: The value specified, $SpecifiedFormulism, for option \"--VectorComparisonFormulism\" is not valid. Allowed values: AlgebraicForm, BinaryForm or SetTheoreticForm\n"; 888 } 889 push @SpecifiedVectorComparisonModes, $SpecifiedFormulism; 890 } 891 } 892 $OptionsInfo{VectorComparisonFormulism} = $Options{vectorcomparisonformulism}; 893 $OptionsInfo{SpecifiedVectorComparisonModesRef} = \@SpecifiedVectorComparisonModes; 894 895 # Setup any parameters required for specified comparison menthod... 896 for $SpecifiedMeasure (@SpecifiedComparisonMeasures) { 897 @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}} = (); 898 push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, ($Options{fast} ? 1 : 0); 899 } 900 $OptionsInfo{SpecifiedVectorComparisonsParameterRef} = \%SpecifiedComparisonMeasuresParameterMap; 901 } 902 903 # Setup script usage and retrieve command line arguments specified using various options... 904 sub SetupScriptUsage { 905 906 # Retrieve all the options... 907 %Options = (); 908 909 $Options{alpha} = 0.5; 910 $Options{beta} = 1; 911 912 $Options{bitvectorcomparisonmode} = "TanimotoSimilarity"; 913 914 $Options{colmode} = 'colnum'; 915 916 $Options{compoundidprefix} = 'Cmpd'; 917 $Options{compoundidmode} = 'LabelPrefix'; 918 919 $Options{detail} = 1; 920 921 $Options{indelim} = 'comma'; 922 $Options{outdelim} = 'comma'; 923 924 $Options{inputdatamode} = 'LoadInMemory'; 925 926 $Options{mode} = 'AutoDetect'; 927 928 $Options{outmatrixformat} = 'RowsAndColumns'; 929 930 $Options{outmatrixtype} = 'FullMatrix'; 931 932 $Options{quote} = 'yes'; 933 $Options{precision} = 2; 934 935 $Options{vectorcomparisonmode} = "TanimotoSimilarity"; 936 $Options{vectorcomparisonformulism} = "AlgebraicForm"; 937 938 if (!GetOptions(\%Options, "alpha=f", "beta=f", "bitvectorcomparisonmode|b=s", "colmode|c=s", "compoundidcol=s", "compoundidprefix=s", "compoundidfield=s", "compoundidmode=s", "detail|d=i", "fast|f", "fingerprintscol=s", "fingerprintsfield=s", "help|h", "indelim=s", "inputdatamode=s", "mode|m=s", "outdelim=s", "overwrite|o", "outmatrixformat=s", "outmatrixtype=s", "precision|p=s", "quote|q=s", "root|r=s", "vectorcomparisonmode|v=s", "vectorcomparisonformulism=s", "workingdir|w=s")) { 939 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 940 } 941 if ($Options{workingdir}) { 942 if (! -d $Options{workingdir}) { 943 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 944 } 945 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 946 } 947 if ($Options{colmode} !~ /^(ColNum|ColLabel)$/i) { 948 die "Error: The value specified, $Options{colmode}, for option \"-c, --ColMode\" is not valid. Allowed values: ColNum, or ColLabel\n"; 949 } 950 if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) { 951 die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n"; 952 } 953 if (!IsPositiveInteger($Options{detail})) { 954 die "Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n"; 955 } 956 if ($Options{inputdatamode} !~ /^(LoadInMemory|ScanFile)$/i) { 957 die "Error: The value specified, $Options{inputdatamode}, for option \"--InputDataMode\" is not valid. Allowed values: LoadInMemory or ScanFile\n"; 958 } 959 if ($Options{mode} !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) { 960 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString \n"; 961 } 962 if ($Options{indelim} !~ /^(comma|semicolon)$/i) { 963 die "Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n"; 964 } 965 if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) { 966 die "Error: The value specified, $Options{outdelim}, for option \"--OutDelim\" is not valid. Allowed values: comma, tab, or semicolon\n"; 967 } 968 if ($Options{outmatrixformat} !~ /^(RowsAndColumns|IDPairsAndValue)$/i) { 969 die "Error: The value specified, $Options{outmatrixformat}, for option \"--OutMatrixFormat\" is not valid. Allowed values: RowsAndColumns or IDPairsAndValue\n"; 970 } 971 if ($Options{outmatrixtype} !~ /^(FullMatrix|UpperTriangularMatrix|LowerTriangularMatrix)$/i) { 972 die "Error: The value specified, $Options{outmatrixtype}, for option \"--OutMatrixType\" is not valid. Allowed values: FullMatrix, UpperTriangularMatrix or LowerTriangularMatrix\n"; 973 } 974 if ($Options{quote} !~ /^(Yes|No)$/i) { 975 die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n"; 976 } 977 if (!IsPositiveInteger($Options{precision})) { 978 die "Error: The value specified, $Options{precision}, for option \"--precision\" is not valid. Allowed values: > 0 \n"; 979 } 980 } 981