MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: SimilarityMatricesFingerprints.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use File::Copy;
  31 use Text::ParseWords;
  32 use Benchmark;
  33 use FileUtil;
  34 use TextUtil;
  35 use Fingerprints::FingerprintsFileUtil;
  36 use Fingerprints::FingerprintsBitVector;
  37 use Fingerprints::FingerprintsVector;
  38 
  39 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  40 
  41 # Autoflush STDOUT
  42 $| = 1;
  43 
  44 # Starting message...
  45 $ScriptName = basename($0);
  46 print "\n$ScriptName: Starting...\n\n";
  47 $StartTime = new Benchmark;
  48 
  49 # Get the options and setup script...
  50 SetupScriptUsage();
  51 if ($Options{help} || @ARGV < 1) {
  52   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  53 }
  54 
  55 my(@FingerprintsFilesList);
  56 @FingerprintsFilesList = ExpandFileNames(\@ARGV, "sdf sd fpf fp csv tsv");
  57 
  58 # Process options...
  59 print "Processing options...\n";
  60 my(%OptionsInfo);
  61 ProcessOptions();
  62 
  63 # Setup information about input files...
  64 print "Checking input fingerprints file(s)...\n";
  65 my(%FingerprintsFilesInfo);
  66 RetrieveFingerprintsFilesInfo();
  67 
  68 # Process input files..
  69 my($FileIndex);
  70 if (@FingerprintsFilesList > 1) {
  71   print "\nProcessing fingerprints files...\n";
  72 }
  73 for $FileIndex (0 .. $#FingerprintsFilesList) {
  74   if ($FingerprintsFilesInfo{FileOkay}[$FileIndex]) {
  75     print "\nProcessing file $FingerprintsFilesList[$FileIndex]...\n";
  76     GenerateSimilarityMatrices($FileIndex);
  77   }
  78 }
  79 print "\n$ScriptName:Done...\n\n";
  80 
  81 $EndTime = new Benchmark;
  82 $TotalTime = timediff ($EndTime, $StartTime);
  83 print "Total time: ", timestr($TotalTime), "\n";
  84 
  85 ###############################################################################
  86 
  87 # Generate similarity matrices using fingerprints data in text file...
  88 #
  89 sub GenerateSimilarityMatrices {
  90   my($FileIndex) = @_;
  91 
  92   ProcessFingerprintsData($FileIndex);
  93 
  94   if ($FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$FileIndex]) {
  95     GenerateSimilarityMatricesForFingerprintsBitVectors($FileIndex);
  96   }
  97   elsif ($FingerprintsFilesInfo{FingerprintsVectorStringMode}[$FileIndex]) {
  98     GenerateSimilarityMatricesForFingerprintsVectors($FileIndex);
  99   }
 100 
 101   CleanupFingerprintsData($FileIndex);
 102 }
 103 
 104 # Generate bit vector similarity matrices...
 105 #
 106 sub GenerateSimilarityMatricesForFingerprintsBitVectors {
 107   my($FileIndex) = @_;
 108   my($SpecifiedComparisonMeasure, $ComparisonMeasure, $NewTextFile, $SimilarityMatrixRef, $MethodName, @MethodParameters);
 109 
 110   for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedBitVectorComparisonsRef}}) {
 111     $ComparisonMeasure = $OptionsInfo{SpecifiedBitVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 112     $NewTextFile = $FingerprintsFilesInfo{OutFileRoot}[$FileIndex] . "${ComparisonMeasure}." . $FingerprintsFilesInfo{OutFileExt}[$FileIndex];
 113 
 114     $MethodName = $OptionsInfo{SpecifiedBitVectorComparisonsMethodRef}->{lc($ComparisonMeasure)};
 115 
 116     @MethodParameters = ();
 117     @MethodParameters = @{$OptionsInfo{SpecifiedBitVectorComparisonsParameterRef}->{lc($ComparisonMeasure)}};
 118 
 119     GenerateSimilarityMatrix($FileIndex, $NewTextFile, $MethodName, \@MethodParameters);
 120   }
 121 }
 122 
 123 # Generate vector similarity and/or distance matrices...
 124 #
 125 sub GenerateSimilarityMatricesForFingerprintsVectors {
 126   my($FileIndex) = @_;
 127   my($SpecifiedComparisonMeasure, $ComparisonMode, $ComparisonMeasure, $NewTextFile, $MethodName, @MethodParameters);
 128 
 129   for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedVectorComparisonsRef}}) {
 130     $ComparisonMeasure = $OptionsInfo{SpecifiedVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 131 
 132     for $ComparisonMode (@{$OptionsInfo{SpecifiedVectorComparisonModesRef}}) {
 133       $NewTextFile = $FingerprintsFilesInfo{OutFileRoot}[$FileIndex] . "${ComparisonMeasure}${ComparisonMode}." . $FingerprintsFilesInfo{OutFileExt}[$FileIndex];
 134 
 135       $MethodName = $OptionsInfo{SpecifiedVectorComparisonsMethodRef}->{lc($ComparisonMeasure)};
 136 
 137       @MethodParameters = ();
 138       push @MethodParameters, $ComparisonMode;
 139       push @MethodParameters, @{$OptionsInfo{SpecifiedVectorComparisonsParameterRef}->{lc($ComparisonMeasure)}};
 140 
 141       GenerateSimilarityMatrix($FileIndex, $NewTextFile, $MethodName, \@MethodParameters);
 142     }
 143   }
 144 }
 145 
 146 # Calculate similarity matrix and write it out...
 147 #
 148 sub GenerateSimilarityMatrix {
 149   my($FileIndex, $NewTextFile, $MethodName, $MethodParametersRef) = @_;
 150 
 151   print "\nGenerating $NewTextFile...\n";
 152 
 153   # Open new file and write out column labels...
 154   open NEWTEXTFILE, ">$NewTextFile" or die "Error: Can't open $NewTextFile: $! \n";
 155   WriteColumnLabels($FileIndex, \*NEWTEXTFILE);
 156 
 157   # Calculate and write out similarity matrix values...
 158   if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 159     GenerateSimilarityMatrixUsingMemoryData($FileIndex, \*NEWTEXTFILE, $MethodName, $MethodParametersRef);
 160   }
 161   elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 162     GenerateSimilarityMatrixUsingFileData($FileIndex, \*NEWTEXTFILE, $MethodName, $MethodParametersRef);
 163   }
 164   else {
 165     warn "Warning: Input data mode, $OptionsInfo{InputDataMode}, is not supported.\n";
 166   }
 167 
 168   # Close new text file...
 169   close NEWTEXTFILE;
 170 
 171 }
 172 
 173 # Calculate and write out similarity values using fingerprints data already loaded in
 174 # memory...
 175 #
 176 sub GenerateSimilarityMatrixUsingMemoryData {
 177   my($FileIndex, $NewTextFileRef, $MethodName, $MethodParametersRef) = @_;
 178   my($RowIndex, $ColIndex, $CmpdID1, $CmpdID2, $FingerprintsObject1, $FingerprintsObject2, $Value, $Line, @LineWords);
 179 
 180   for $RowIndex (0 .. $#{$FingerprintsFilesInfo{FingerprintsObjectsRef}}) {
 181     $FingerprintsObject1 = $FingerprintsFilesInfo{FingerprintsObjectsRef}->[$RowIndex];
 182     $CmpdID1 = $FingerprintsFilesInfo{CompundIDsRef}->[$RowIndex];
 183 
 184     if ($OptionsInfo{WriteRowsAndColumns}) {
 185       print $NewTextFileRef "$OptionsInfo{OutQuoteValue}${CmpdID1}$OptionsInfo{OutQuoteValue}";
 186     }
 187 
 188     COLINDEX: for $ColIndex (0 .. $#{$FingerprintsFilesInfo{FingerprintsObjectsRef}}) {
 189       if (SkipMatrixData($RowIndex, $ColIndex)) {
 190         next COLINDEX;
 191       }
 192 
 193       $FingerprintsObject2 = $FingerprintsFilesInfo{FingerprintsObjectsRef}->[$ColIndex];
 194 
 195       $Value = $FingerprintsObject1->$MethodName($FingerprintsObject2, @{$MethodParametersRef});
 196       $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : '';
 197 
 198       if ($OptionsInfo{WriteRowsAndColumns}) {
 199         print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${Value}$OptionsInfo{OutQuoteValue}";
 200       }
 201       elsif ($OptionsInfo{WriteIDPairsAndValue}) {
 202         $CmpdID2 = $FingerprintsFilesInfo{CompundIDsRef}->[$ColIndex];
 203 
 204         @LineWords = ();
 205         push @LineWords,  ($CmpdID1, $CmpdID2, $Value);
 206         $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 207         print $NewTextFileRef "$Line\n";
 208       }
 209     }
 210     if ($OptionsInfo{WriteRowsAndColumns}) {
 211       print $NewTextFileRef "\n";
 212     }
 213   }
 214 }
 215 
 216 # Calculate and write out similarity values by retrieving and prcessing data
 217 # from fingerprint file...
 218 #
 219 sub GenerateSimilarityMatrixUsingFileData {
 220   my($FileIndex, $NewTextFileRef, $MethodName, $MethodParametersRef) = @_;
 221   my($RowIndex, $ColIndex, $FingerprintsFileIO, $TmpFingerprintsFileIO, $FingerprintsObject1, $FingerprintsObject2, $CmpdID1, $CmpdID2, $FingerprintsCount, $IgnoredFingerprintsCount, $Value, $Line, @LineWords);
 222 
 223   print "\nReading and processing fingerprints data...\n";
 224 
 225   $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]});
 226   $FingerprintsFileIO->Open();
 227 
 228   $RowIndex = 0; $ColIndex = 0;
 229   $FingerprintsCount = 0; $IgnoredFingerprintsCount = 0;
 230 
 231   FINGERPRINTSFILEIO: while ($FingerprintsFileIO->Read()) {
 232     $FingerprintsCount++;
 233 
 234     if (!$FingerprintsFileIO->IsFingerprintsDataValid()) {
 235       $IgnoredFingerprintsCount++;
 236       next FINGERPRINTSFILEIO;
 237     }
 238     $RowIndex++;
 239     $FingerprintsObject1 = $FingerprintsFileIO->GetFingerprints();
 240     $CmpdID1 = $FingerprintsFileIO->GetCompoundID();
 241 
 242     if ($OptionsInfo{WriteRowsAndColumns}) {
 243       print $NewTextFileRef "$OptionsInfo{OutQuoteValue}${CmpdID1}$OptionsInfo{OutQuoteValue}";
 244     }
 245 
 246     # Force detail level of 1 to avoid duplicate printing of diagnostic messages for invalid
 247     # fingerprints data...
 248     $TmpFingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$FileIndex]}, "DetailLevel" => 1);
 249     $TmpFingerprintsFileIO->Open();
 250 
 251     $ColIndex = 0;
 252     TMPFINGERPRINTSFILEIO: while ($TmpFingerprintsFileIO->Read()) {
 253       if (!$TmpFingerprintsFileIO->IsFingerprintsDataValid()) {
 254         next TMPFINGERPRINTSFILEIO;
 255       }
 256       $ColIndex++;
 257 
 258       if (SkipMatrixData($RowIndex, $ColIndex)) {
 259         next TMPFINGERPRINTSFILEIO;
 260       }
 261 
 262       $FingerprintsObject2 = $TmpFingerprintsFileIO->GetFingerprints();
 263 
 264       $Value = $FingerprintsObject1->$MethodName($FingerprintsObject2, @{$MethodParametersRef});
 265       $Value = (defined($Value) && length($Value)) ? (sprintf("%.$OptionsInfo{Precision}f", $Value) + 0) : '';
 266 
 267       if ($OptionsInfo{WriteRowsAndColumns}) {
 268         print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${Value}$OptionsInfo{OutQuoteValue}";
 269       }
 270       elsif ($OptionsInfo{WriteIDPairsAndValue}) {
 271         $CmpdID2 = $TmpFingerprintsFileIO->GetCompoundID();
 272 
 273         @LineWords = ();
 274         push @LineWords,  ($CmpdID1, $CmpdID2, $Value);
 275         $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 276         print $NewTextFileRef "$Line\n";
 277       }
 278     }
 279     $TmpFingerprintsFileIO->Close();
 280 
 281     if ($OptionsInfo{WriteRowsAndColumns}) {
 282       print $NewTextFileRef "\n";
 283     }
 284   }
 285 
 286   $FingerprintsFileIO->Close();
 287 
 288   print "Number of fingerprints data entries in database fingerprints file: $FingerprintsCount\n";
 289   print "Number of fingerprints date entries processed successfully: ", ($FingerprintsCount - $IgnoredFingerprintsCount)  , "\n";
 290   print "Number of fingerprints data entries ignored due to missing/invalid data: $IgnoredFingerprintsCount\n\n";
 291 }
 292 
 293 # Check whether matrix data need to be skipped...
 294 #
 295 sub SkipMatrixData {
 296   my($RowIndex, $ColIndex) = @_;
 297 
 298   if ($OptionsInfo{WriteFullMatrix}) {
 299     return 0;
 300   }
 301   elsif ($OptionsInfo{WriteUpperTriangularMatrix}) {
 302     return ($RowIndex > $ColIndex) ? 1 : 0;
 303   }
 304   elsif ($OptionsInfo{WriteLowerTriangularMatrix}) {
 305     return ($RowIndex < $ColIndex) ? 1 : 0;
 306   }
 307 
 308   return 0;
 309 }
 310 
 311 # Write out column labels...
 312 #
 313 sub WriteColumnLabels {
 314   my($FileIndex, $NewTextFileRef) = @_;
 315   my($Line, @LineWords);
 316 
 317   if ($OptionsInfo{OutMatrixFormat} =~ /^IDPairsAndValue$/i) {
 318     @LineWords = ();
 319     push @LineWords, ('CmpdID1', 'CmpdID2', 'Coefficient Value');
 320     $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 321     print $NewTextFileRef "$Line\n";
 322   }
 323   elsif ($OptionsInfo{OutMatrixFormat} =~ /^RowsAndColumns$/i) {
 324     if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 325       @LineWords = ();
 326       push @LineWords, '';
 327       push @LineWords, @{$FingerprintsFilesInfo{CompundIDsRef}};
 328       $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 329       print $NewTextFileRef "$Line\n";
 330     }
 331     elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 332       my( $FingerprintsFileIO, $CmpdID);
 333 
 334       # Scan file to retrieve compound IDs...
 335       #
 336       print "\nProcessing fingerprints file to generate compound IDs...\n";
 337 
 338       # Force detail level of 1 to avoid diagnostics messages for invalid fingeprints data during
 339       # retrieval of compound IDs as these get printed out during calculation of matrix...
 340       #
 341       $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]}, "DetailLevel" => 1);
 342       $FingerprintsFileIO->Open();
 343 
 344       print $NewTextFileRef "$OptionsInfo{OutQuoteValue}$OptionsInfo{OutQuoteValue}";
 345 
 346       FINGERPRINTSFILEIO: while ($FingerprintsFileIO->Read()) {
 347         if (!$FingerprintsFileIO->IsFingerprintsDataValid()) {
 348           next FINGERPRINTSFILEIO;
 349         }
 350         $CmpdID = $FingerprintsFileIO->GetCompoundID();
 351         print $NewTextFileRef "$OptionsInfo{OutDelim}$OptionsInfo{OutQuoteValue}${CmpdID}$OptionsInfo{OutQuoteValue}";
 352       }
 353       $FingerprintsFileIO->Close();
 354 
 355       print $NewTextFileRef "\n";
 356 
 357       print "Processing fingerprints file to generate matrix...\n";
 358     }
 359   }
 360   else {
 361     warn "Warning: Output matrix format, $OptionsInfo{OutMatrixFormat}, is not supported.\n";
 362   }
 363 }
 364 
 365 # Process fingerprints data...
 366 #
 367 sub ProcessFingerprintsData {
 368   my($FileIndex) = @_;
 369   my($FingerprintsFileIO);
 370 
 371   $FingerprintsFilesInfo{CompundIDsRef}  = undef;
 372   $FingerprintsFilesInfo{FingerprintsObjectsRef} = undef;
 373 
 374   if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 375     my($FingerprintsFileIO);
 376 
 377     $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$FileIndex]});
 378     ($FingerprintsFilesInfo{CompundIDsRef}, $FingerprintsFilesInfo{FingerprintsObjectsRef}) = Fingerprints::FingerprintsFileUtil::ReadAndProcessFingerpritsData($FingerprintsFileIO);
 379   }
 380   elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 381     my($FingerprintsFile, $TmpFingerprintsFile);
 382 
 383     $FingerprintsFile = $FingerprintsFilesList[$FileIndex];
 384     $TmpFingerprintsFile = $FingerprintsFilesInfo{TmpFingerprintsFile}[$FileIndex];
 385 
 386     # Copy fingerprints file to a tmp file for calculating similarity matrix...
 387     print "\nCopying fingerprints file, $FingerprintsFile, to temporary fingperints file, $TmpFingerprintsFile...\n";
 388     copy $FingerprintsFile, $TmpFingerprintsFile or die "Error: Couldn't copy $FingerprintsFile to $TmpFingerprintsFile: $! \n";
 389   }
 390 }
 391 
 392 # Clean up fingerprints data...
 393 #
 394 sub CleanupFingerprintsData {
 395   my($FileIndex) = @_;
 396 
 397   if ($OptionsInfo{InputDataMode} =~ /^LoadInMemory$/i) {
 398     $FingerprintsFilesInfo{CompundIDsRef}  = undef;
 399     $FingerprintsFilesInfo{FingerprintsObjectsRef} = undef;
 400   }
 401   elsif ($OptionsInfo{InputDataMode} =~ /^ScanFile$/i) {
 402     my($TmpFingerprintsFile);
 403 
 404     # Delete temporary fingerprints file...
 405     $TmpFingerprintsFile = $FingerprintsFilesInfo{TmpFingerprintsFile}[$FileIndex];
 406 
 407     print "\nDeleting temporary fingerprints file $TmpFingerprintsFile...\n";
 408     unlink $TmpFingerprintsFile or die "Error: Couldn't unlink $TmpFingerprintsFile: $! \n";
 409   }
 410 }
 411 
 412 # Retrieve information about fingerprints files...
 413 #
 414 sub RetrieveFingerprintsFilesInfo {
 415   my($FingerprintsFile, $TmpFingerprintsFile, $FingerprintsFileIO, $FingerprintsBitVectorStringMode, $FingerprintsVectorStringMode, $FileType, $Index, $FileDir, $FileExt, $FileName, $InDelim, $OutFileRoot, $OutFileExt, %FingerprintsFileIOParameters);
 416 
 417   %FingerprintsFilesInfo = ();
 418   @{$FingerprintsFilesInfo{FileOkay}} = ();
 419   @{$FingerprintsFilesInfo{FileType}} = ();
 420   @{$FingerprintsFilesInfo{InDelim}} = ();
 421   @{$FingerprintsFilesInfo{OutFileRoot}} = ();
 422   @{$FingerprintsFilesInfo{OutFileExt}} = ();
 423 
 424   @{$FingerprintsFilesInfo{TmpFingerprintsFile}} = ();
 425 
 426   @{$FingerprintsFilesInfo{FingerprintsFileIOParameters}} = ();
 427   @{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}} = ();
 428 
 429   @{$FingerprintsFilesInfo{FingerprintsBitVectorStringMode}} = ();
 430   @{$FingerprintsFilesInfo{FingerprintsVectorStringMode}} = ();
 431 
 432   FILELIST: for $Index (0 .. $#FingerprintsFilesList) {
 433     $FingerprintsFilesInfo{FileOkay}[$Index] = 0;
 434     $FingerprintsFilesInfo{FileType}[$Index] = '';
 435     $FingerprintsFilesInfo{InDelim}[$Index] = "";
 436     $FingerprintsFilesInfo{OutFileRoot}[$Index] = '';
 437     $FingerprintsFilesInfo{OutFileExt}[$Index] = '';
 438 
 439     %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = ();
 440 
 441     $FingerprintsFilesInfo{TmpFingerprintsFile}[$Index] = "";
 442     %{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$Index]} = ();
 443 
 444     $FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$Index] = 0;
 445     $FingerprintsFilesInfo{FingerprintsVectorStringMode}[$Index] = 0;
 446 
 447     $FingerprintsFile = $FingerprintsFilesList[$Index];
 448     if (!(-e $FingerprintsFile)) {
 449       warn "Warning: Ignoring file $FingerprintsFile: It doesn't exist\n";
 450       next FILELIST;
 451     }
 452 
 453     $FileType = Fingerprints::FingerprintsFileUtil::GetFingerprintsFileType($FingerprintsFile);
 454     if (IsEmpty($FileType)) {
 455       warn "Warning: Ignoring file $FingerprintsFile: It's not a fingerprints file\n";
 456       next FILELIST;
 457     }
 458 
 459     $FileDir = ""; $FileName = ""; $FileExt = "";
 460     ($FileDir, $FileName, $FileExt) = ParseFileName($FingerprintsFile);
 461 
 462     # Setup temporary fingerprints file name for scan file mode...
 463     $TmpFingerprintsFile = "${FileName}Tmp.${FileExt}";
 464 
 465     $InDelim = ($FileExt =~ /^tsv$/i) ? 'Tab' : $OptionsInfo{InDelim};
 466 
 467     # Setup output file names...
 468     $OutFileExt = "csv";
 469     if ($Options{outdelim} =~ /^tab$/i) {
 470       $OutFileExt = "tsv";
 471     }
 472 
 473     $OutFileRoot = $FileName;
 474     if ($OptionsInfo{OutFileRoot} && (@FingerprintsFilesList == 1)) {
 475       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 476       if ($RootFileName && $RootFileExt) {
 477         $FileName = $RootFileName;
 478       }
 479       else {
 480         $FileName = $OptionsInfo{OutFileRoot};
 481       }
 482       $OutFileRoot = $FileName;
 483     }
 484 
 485     if (!$Options{overwrite}) {
 486       # Similarity matrices output file names for bit-vector strings...
 487       my($SpecifiedComparisonMeasure, $ComparisonMeasure);
 488       for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedBitVectorComparisonsRef}}) {
 489         $ComparisonMeasure = $OptionsInfo{SpecifiedBitVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 490         if (-e "${OutFileRoot}${ComparisonMeasure}.${OutFileExt}") {
 491           warn "Warning: Ignoring file $FingerprintsFile: The file ${OutFileRoot}${ComparisonMeasure}.${OutFileExt} already exists.\n";
 492           next FILELIST;
 493         }
 494       }
 495       # Similarity matrices output file names for vector strings...
 496       my($ComparisonMode);
 497       for $SpecifiedComparisonMeasure (@{$OptionsInfo{SpecifiedVectorComparisonsRef}}) {
 498         $ComparisonMeasure = $OptionsInfo{SpecifiedVectorComparisonsNameRef}->{lc($SpecifiedComparisonMeasure)};
 499         for $ComparisonMode (@{$OptionsInfo{SpecifiedVectorComparisonModesRef}}) {
 500           if (-e "${OutFileRoot}${ComparisonMeasure}${ComparisonMode}.${OutFileExt}") {
 501             warn "Warning: Ignoring file $FingerprintsFile: The file ${OutFileRoot}${ComparisonMeasure}${ComparisonMode}.${OutFileExt} already exists.\n";
 502             next FILELIST;
 503           }
 504         }
 505       }
 506     }
 507 
 508     # Setup FingerprintsFileIO parameters...
 509     %FingerprintsFileIOParameters = ();
 510     FILEIOPARAMETERS: {
 511       if ($FileType =~ /^SD$/i) {
 512         %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' =>  $OptionsInfo{Detail}, 'FingerprintsFieldLabel' => $OptionsInfo{FingerprintsField}, 'CompoundIDMode' => $OptionsInfo{CompoundIDMode}, 'CompoundIDFieldLabel' => $OptionsInfo{CompoundIDField}, 'CompoundIDPrefix' => $OptionsInfo{CompoundIDPrefix});
 513         last FILEIOPARAMETERS;
 514       }
 515       if ($FileType =~ /^FP$/i) {
 516         %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' =>  $OptionsInfo{Detail});
 517         last FILEIOPARAMETERS;
 518       }
 519       if ($FileType =~ /^Text$/i) {
 520         %FingerprintsFileIOParameters = ('Name' => $FingerprintsFile, 'Mode' => 'Read', 'FingerprintsStringMode' => $OptionsInfo{Mode}, 'ValidateData' => $OptionsInfo{ValidateData}, 'DetailLevel' =>  $OptionsInfo{Detail}, 'FingerprintsCol' => $OptionsInfo{FingerprintsCol}, 'ColMode' => $OptionsInfo{ColMode}, 'CompoundIDCol' => $OptionsInfo{CompoundIDCol}, 'CompoundIDPrefix' => $OptionsInfo{CompoundIDPrefix}, 'InDelim' => $OptionsInfo{InDelim});
 521         last FILEIOPARAMETERS;
 522       }
 523       warn "Warning: File type for fingerprints file, $FingerprintsFile, is not valid. Supported file types: SD, FP or Text\n";
 524       next FILELIST;
 525     }
 526 
 527     # Retrieve fingerints file string mode information...
 528     $FingerprintsFileIO = Fingerprints::FingerprintsFileUtil::NewFingerprintsFileIO(%FingerprintsFileIOParameters);
 529 
 530     if (!$FingerprintsFileIO) {
 531       warn "Warning: Ignoring fingerprints file $FingerprintsFile: It contains invalid fingerprints data\n";
 532       next FILELIST;
 533     }
 534     if (!$FingerprintsFileIO->IsFingerprintsFileDataValid()) {
 535       warn "Warning: Ignoring fingerprints file $FingerprintsFile: It contains invalid fingerprints data\n";
 536       next FILELIST;
 537     }
 538     $FingerprintsBitVectorStringMode = $FingerprintsFileIO->GetFingerprintsBitVectorStringMode();
 539     $FingerprintsVectorStringMode = $FingerprintsFileIO->GetFingerprintsVectorStringMode();
 540 
 541 
 542     $FingerprintsFilesInfo{FileOkay}[$Index] = 1;
 543     $FingerprintsFilesInfo{FileType}[$Index] = $FileType;
 544 
 545     $FingerprintsFilesInfo{InDelim}[$Index] = $InDelim;
 546 
 547     $FingerprintsFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 548     $FingerprintsFilesInfo{OutFileExt}[$Index] = $OutFileExt;
 549 
 550     %{$FingerprintsFilesInfo{FingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters;
 551 
 552     $FingerprintsFilesInfo{TmpFingerprintsFile}[$Index] = $TmpFingerprintsFile;
 553 
 554     $FingerprintsFileIOParameters{Name} = $TmpFingerprintsFile;
 555     %{$FingerprintsFilesInfo{TmpFingerprintsFileIOParameters}[$Index]} = %FingerprintsFileIOParameters;
 556 
 557     $FingerprintsFilesInfo{FingerprintsBitVectorStringMode}[$Index] = $FingerprintsBitVectorStringMode;
 558     $FingerprintsFilesInfo{FingerprintsVectorStringMode}[$Index] = $FingerprintsVectorStringMode;
 559   }
 560 }
 561 
 562 # Process option values...
 563 sub ProcessOptions {
 564   %OptionsInfo = ();
 565 
 566   $OptionsInfo{Mode} = $Options{mode};
 567 
 568   $OptionsInfo{InputDataMode} = $Options{inputdatamode};
 569 
 570   ProcessBitVectorComparisonOptions();
 571   ProcessVectorComparisonOptions();
 572 
 573   $OptionsInfo{CompoundIDPrefix} = $Options{compoundidprefix} ? $Options{compoundidprefix} : 'Cmpd';
 574 
 575   # Compound ID and fingerprints column options for text files...
 576   $OptionsInfo{ColMode} = $Options{colmode};
 577 
 578   if (IsNotEmpty($Options{compoundidcol})) {
 579     if ($Options{colmode} =~ /^ColNum$/i) {
 580       if (!IsPositiveInteger($Options{compoundidcol})) {
 581         die "Error: Column value, $Options{compoundidcol}, specified using \"--CompoundIDCol\" is not valid: Allowed integer values: > 0\n";
 582       }
 583     }
 584     $OptionsInfo{CompoundIDCol} = $Options{compoundidcol};
 585   }
 586   else {
 587     $OptionsInfo{CompoundIDCol} = 'AutoDetect';
 588   }
 589 
 590   if (IsNotEmpty($Options{fingerprintscol})) {
 591     if ($Options{colmode} =~ /^ColNum$/i) {
 592       if (!IsPositiveInteger($Options{fingerprintscol})) {
 593         die "Error: Column value, $Options{fingerprintscol}, specified using \"--FingerprintsCol\" is not valid: Allowed integer values: > 0\n";
 594       }
 595     }
 596     $OptionsInfo{FingerprintsCol} = $Options{fingerprintscol};
 597   }
 598   else {
 599     $OptionsInfo{FingerprintsCol} = 'AutoDetect';
 600   }
 601 
 602   if (IsNotEmpty($Options{compoundidcol}) && IsNotEmpty($Options{fingerprintscol})) {
 603     if (IsPositiveInteger($Options{compoundidcol}) && IsPositiveInteger($Options{fingerprintscol})) {
 604       if (($Options{compoundidcol} == $Options{fingerprintscol})) {
 605         die "Error: Values specified using \"--CompoundIDCol\" and \"--FingerprintsCol\", $Options{compoundidcol}, must be different.\n";
 606       }
 607     }
 608     else {
 609       if (($Options{compoundidcol} eq $Options{fingerprintscol})) {
 610         die "Error: Values specified using \"--CompoundIDCol\" and \"--FingerprintsCol\", $Options{compoundidcol}, must be different.\n";
 611       }
 612     }
 613   }
 614 
 615   # Compound ID and fingerprints field options for SD files...
 616   $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
 617   $OptionsInfo{CompoundIDField} = '';
 618 
 619   if ($Options{compoundidmode} =~ /^DataField$/i) {
 620     if (!$Options{compoundidfield}) {
 621       die "Error: You must specify a value for \"--CompoundIDField\" option in \"DataField\" \"--CompoundIDMode\". \n";
 622     }
 623     $OptionsInfo{CompoundIDField} = $Options{compoundidfield};
 624   }
 625 
 626 
 627   if (IsNotEmpty($Options{fingerprintsfield})) {
 628     $OptionsInfo{FingerprintsField} = $Options{fingerprintsfield};
 629   }
 630   else {
 631     $OptionsInfo{FingerprintsField} = 'AutoDetect';
 632   }
 633 
 634   if ($Options{compoundidfield} && IsNotEmpty($Options{fingerprintsfield})) {
 635     if (($Options{compoundidfield} eq $Options{fingerprintsfield})) {
 636       die "Error: Values specified using \"--CompoundIDField\" and \"--Fingerprintsfield\", $Options{compoundidfield}, must be different.\n";
 637     }
 638   }
 639 
 640   $OptionsInfo{Detail} = $Options{detail};
 641 
 642   $OptionsInfo{InDelim} = $Options{indelim};
 643   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,");
 644   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
 645   $OptionsInfo{OutQuoteValue} = ($Options{quote} =~ /^Yes$/i) ? '"' : '';
 646 
 647   $OptionsInfo{OutMatrixFormat} = $Options{outmatrixformat};
 648 
 649   $OptionsInfo{WriteRowsAndColumns} = 0; $OptionsInfo{WriteIDPairsAndValue} = 0;
 650   OUTMATRIXFORMAT: {
 651     if ($OptionsInfo{OutMatrixFormat} =~ /^RowsAndColumns$/i) {
 652       $OptionsInfo{WriteRowsAndColumns} = 1; last OUTMATRIXFORMAT;
 653     }
 654     if ($OptionsInfo{OutMatrixFormat} =~ /^IDPairsAndValue$/i) {
 655       $OptionsInfo{WriteIDPairsAndValue} = 1; last OUTMATRIXFORMAT;
 656     }
 657     die "Error: The value specified, $Options{outmatrixformat}, for option \"--OutMatrixFormat\" is not valid. Allowed values: RowsAndColumns or IDPairsAndValue\n";
 658   }
 659 
 660   $OptionsInfo{OutMatrixType} = $Options{outmatrixtype};
 661 
 662   $OptionsInfo{WriteFullMatrix} = 0;
 663   $OptionsInfo{WriteUpperTriangularMatrix} = 0; $OptionsInfo{WriteLowerTriangularMatrix} = 0;
 664   OUTMATRIXTYPE: {
 665     if ($OptionsInfo{OutMatrixType} =~ /^FullMatrix$/i) {
 666       $OptionsInfo{WriteFullMatrix} = 1; last OUTMATRIXTYPE;
 667     }
 668     if ($OptionsInfo{OutMatrixType} =~ /^UpperTriangularMatrix$/i) {
 669       $OptionsInfo{WriteUpperTriangularMatrix} = 1; last OUTMATRIXTYPE;
 670     }
 671     if ($OptionsInfo{OutMatrixType} =~ /^LowerTriangularMatrix$/i) {
 672       $OptionsInfo{WriteLowerTriangularMatrix} = 1; last OUTMATRIXTYPE;
 673     }
 674     die "Error: The value specified, $Options{outmatrixtype}, for option \"--OutMatrixType\" is not valid. Allowed values: FullMatrix, UpperTriangularMatrix or LowerTriangularMatrix\n";
 675   }
 676 
 677   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 678   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 679 
 680   $OptionsInfo{Fast} = $Options{fast} ? 1 : 0;
 681   $OptionsInfo{ValidateData} = $Options{fast} ? 0 : 1;
 682 
 683   $OptionsInfo{Precision} = $Options{precision};
 684 
 685 }
 686 
 687 # Process options related to comparion of bit vector strings...
 688 #
 689 sub ProcessBitVectorComparisonOptions {
 690   # Setup supported bit vector similarity coefficients for bit vector strings...
 691   my($ComparisonMeasure, $SupportedComparisonMeasure, @SupportedComparisonMeasures, %SupportedComparisonMeasuresNameMap, %SupportedComparisonMeasuresMethodMap);
 692 
 693   @SupportedComparisonMeasures = ();
 694   %SupportedComparisonMeasuresNameMap = ();
 695   %SupportedComparisonMeasuresMethodMap = ();
 696 
 697   for $SupportedComparisonMeasure (Fingerprints::FingerprintsBitVector::GetSupportedSimilarityCoefficients()) {
 698     # Similarity coefficient function/method names contain "Coefficient" in their names.
 699     # So take 'em out and setup a map to original function/method name...
 700     $ComparisonMeasure = $SupportedComparisonMeasure;
 701     $ComparisonMeasure =~ s/Coefficient$//;
 702 
 703     push @SupportedComparisonMeasures, $ComparisonMeasure;
 704     $SupportedComparisonMeasuresNameMap{lc($ComparisonMeasure)} = $ComparisonMeasure;
 705     $SupportedComparisonMeasuresMethodMap{lc($ComparisonMeasure)} = $SupportedComparisonMeasure;
 706   }
 707 
 708   # Setup a list of similarity coefficients to use for calculating similarity matrices for bit vector strings...
 709   my($SpecifiedMeasure, @SpecifiedComparisonMeasures, %SpecifiedComparisonMeasuresNameMap, %SpecifiedComparisonMeasuresMethodMap, %SpecifiedComparisonMeasuresParameterMap);
 710 
 711   @SpecifiedComparisonMeasures = ();
 712   %SpecifiedComparisonMeasuresNameMap = ();
 713   %SpecifiedComparisonMeasuresMethodMap = ();
 714   %SpecifiedComparisonMeasuresParameterMap = ();
 715 
 716   if ($Options{bitvectorcomparisonmode} =~ /^All$/i) {
 717     push @SpecifiedComparisonMeasures, @SupportedComparisonMeasures;
 718   }
 719   else {
 720     # Comma delimited list of similarity coefficients...
 721     my($BitVectorComparisonMode, @SpecifiedMeasures, @UnsupportedSpecifiedMeasures);
 722 
 723     $BitVectorComparisonMode = $Options{bitvectorcomparisonmode};
 724     $BitVectorComparisonMode =~ s/ //g;
 725     @SpecifiedMeasures = split ",", $BitVectorComparisonMode;
 726     @UnsupportedSpecifiedMeasures = ();
 727 
 728     for $SpecifiedMeasure (@SpecifiedMeasures) {
 729       if (exists($SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)})) {
 730         push @SpecifiedComparisonMeasures, $SpecifiedMeasure;
 731       }
 732       else {
 733         push @UnsupportedSpecifiedMeasures, $SpecifiedMeasure;
 734       }
 735     }
 736     if (@UnsupportedSpecifiedMeasures) {
 737       if (@UnsupportedSpecifiedMeasures > 1) {
 738         warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedMeasures, ", ", 0)," - for option \"-b --BitVectorComparisonMode\" are not valid.\n";
 739       }
 740       else {
 741         warn "Error: The value specified, @UnsupportedSpecifiedMeasures, for option \"-b --BitVectorComparisonMode\" is not valid.\n";
 742       }
 743       die "Allowed values:", JoinWords(\@SupportedComparisonMeasures, ", ", 0), "\n";
 744     }
 745   }
 746   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 747     $SpecifiedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)};
 748     $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresNameMap{lc($SpecifiedMeasure)};
 749   }
 750 
 751   $OptionsInfo{BitVectorComparisonMode} = $Options{bitvectorcomparisonmode};
 752   $OptionsInfo{SpecifiedBitVectorComparisonsRef} = \@SpecifiedComparisonMeasures;
 753   $OptionsInfo{SpecifiedBitVectorComparisonsNameRef} = \%SpecifiedComparisonMeasuresNameMap;
 754   $OptionsInfo{SpecifiedBitVectorComparisonsMethodRef} = \%SpecifiedComparisonMeasuresMethodMap;
 755 
 756   # Make sure valid alpha parameter is specified for Tversky calculation...
 757   my($SpecifiedMeasure1, $SpecifiedMeasure2);
 758   $OptionsInfo{Alpha} = '';
 759   $SpecifiedMeasure1 = 'TverskySimilarity';
 760   $SpecifiedMeasure2 = 'WeightedTverskySimilarity';
 761   if ($SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure1)} || $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure2)}) {
 762     if (IsEmpty($Options{alpha})) {
 763       die "Error: You must specify a value for \"-a, --alpha\" option in \"$SpecifiedMeasure1, $SpecifiedMeasure2, or All\" \"-m --mode\". \n";
 764     }
 765     my($Alpha);
 766     $Alpha = $Options{alpha};
 767     if (!(IsFloat($Alpha) && $Alpha >=0 && $Alpha <= 1)) {
 768       die "Error: The value specified, $Options{alpha}, for option \"-a, --alpha\" is not valid. Allowed values: >= 0 and <= 1\n";
 769     }
 770     $OptionsInfo{Alpha} = $Alpha;
 771   }
 772 
 773   # Make sure valid beta parameter is specified for WeightedTanimoto and WeightedTversky
 774   # calculations...
 775   $OptionsInfo{Beta} = '';
 776   $SpecifiedMeasure1 = 'WeightedTverskySimilarity';
 777   $SpecifiedMeasure2 = 'WeightedTanimotoSimilarity';
 778   if ($SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure1)} || $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure2)}) {
 779     if (IsEmpty($Options{beta})) {
 780       die "Error: You must specify a value for \"-b, --beta\" option in \"$SpecifiedMeasure1, $SpecifiedMeasure2, or All\" \"-m --mode\". \n";
 781     }
 782     my($Beta);
 783     $Beta = $Options{beta};
 784     if (!(IsFloat($Beta) && $Beta >=0 && $Beta <= 1)) {
 785       die "Error: The value specified, $Options{beta}, for option \"-b, --beta\" is not valid. Allowed values: >= 0 and <= 1\n";
 786     }
 787     $OptionsInfo{Beta} = $Beta;
 788   }
 789 
 790   # Setup any parameters required for specified comparison menthod...
 791   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 792     @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}} = ();
 793     if ($SpecifiedMeasure =~ /^TverskySimilarity$/i) {
 794       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Alpha};
 795     }
 796     elsif ($SpecifiedMeasure =~ /^WeightedTverskySimilarity$/i) {
 797       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Alpha};
 798       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Beta};
 799     }
 800     elsif ($SpecifiedMeasure =~ /^WeightedTanimotoSimilarity$/i) {
 801       push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, $OptionsInfo{Beta};
 802     }
 803   }
 804   $OptionsInfo{SpecifiedBitVectorComparisonsParameterRef} = \%SpecifiedComparisonMeasuresParameterMap;
 805 }
 806 
 807 # Process options related to comparion of vector strings...
 808 #
 809 sub ProcessVectorComparisonOptions {
 810   # Setup specified similarity coefficients for vector strings..
 811   my($ComparisonMeasure, $SupportedComparisonMeasure, @SupportedComparisonMeasures, %SupportedComparisonMeasuresNameMap, %SupportedComparisonMeasuresMethodMap);
 812 
 813   @SupportedComparisonMeasures = ();
 814   %SupportedComparisonMeasuresNameMap = ();
 815   %SupportedComparisonMeasuresMethodMap = ();
 816   for $SupportedComparisonMeasure (Fingerprints::FingerprintsVector::GetSupportedDistanceAndSimilarityCoefficients()) {
 817     # Similarity and distance coefficient function/method names contain "Coefficient" in their names.
 818     # So take 'em out and setup a map to original function/method name...
 819     $ComparisonMeasure = $SupportedComparisonMeasure;
 820     if ($ComparisonMeasure =~ /Coefficient$/i) {
 821       $ComparisonMeasure =~ s/Coefficient$//i;
 822     }
 823     push @SupportedComparisonMeasures, $ComparisonMeasure;
 824     $SupportedComparisonMeasuresNameMap{lc($ComparisonMeasure)} = $ComparisonMeasure;
 825     $SupportedComparisonMeasuresMethodMap{lc($ComparisonMeasure)} = $SupportedComparisonMeasure;
 826   }
 827 
 828   # Setup a list of similarity coefficients to use for calculating similarity matrices for bit vector strings...
 829   my($SpecifiedMeasure, @SpecifiedComparisonMeasures, %SpecifiedComparisonMeasuresNameMap, %SpecifiedComparisonMeasuresMethodMap, %SpecifiedComparisonMeasuresParameterMap);
 830 
 831   @SpecifiedComparisonMeasures = ();
 832   %SpecifiedComparisonMeasuresNameMap = ();
 833   %SpecifiedComparisonMeasuresMethodMap = ();
 834 
 835   if ($Options{vectorcomparisonmode} =~ /^All$/i) {
 836     push @SpecifiedComparisonMeasures, @SupportedComparisonMeasures;
 837   }
 838   else {
 839     # Comma delimited list of similarity coefficients...
 840     my($VectorComparisonMode, @SpecifiedMeasures, @UnsupportedSpecifiedMeasures);
 841 
 842     $VectorComparisonMode = $Options{vectorcomparisonmode};
 843     $VectorComparisonMode =~ s/ //g;
 844     @SpecifiedMeasures = split ",", $VectorComparisonMode;
 845     @UnsupportedSpecifiedMeasures = ();
 846 
 847     for $SpecifiedMeasure (@SpecifiedMeasures) {
 848       if (exists($SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)})) {
 849         push @SpecifiedComparisonMeasures, $SpecifiedMeasure;
 850       }
 851       else {
 852         push @UnsupportedSpecifiedMeasures, $SpecifiedMeasure;
 853       }
 854     }
 855     if (@UnsupportedSpecifiedMeasures) {
 856       if (@UnsupportedSpecifiedMeasures > 1) {
 857         warn "Error: The values specified - ", JoinWords(\@UnsupportedSpecifiedMeasures, ", ", 0)," - for option \"-v --VectorComparisonMode\" are not valid.\n";
 858       }
 859       else {
 860         warn "Error: The value specified, @UnsupportedSpecifiedMeasures, for option \"-v --VectorComparisonMode\" is not valid.\n";
 861       }
 862       die "Allowed values:", JoinWords(\@SupportedComparisonMeasures, ", ", 0), "\n";
 863     }
 864   }
 865   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 866     $SpecifiedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresMethodMap{lc($SpecifiedMeasure)};
 867     $SpecifiedComparisonMeasuresNameMap{lc($SpecifiedMeasure)} = $SupportedComparisonMeasuresNameMap{lc($SpecifiedMeasure)};
 868   }
 869 
 870   $OptionsInfo{VectorComparisonMode} = $Options{vectorcomparisonmode};
 871   $OptionsInfo{SpecifiedVectorComparisonsRef} = \@SpecifiedComparisonMeasures;
 872   $OptionsInfo{SpecifiedVectorComparisonsNameRef} = \%SpecifiedComparisonMeasuresNameMap;
 873   $OptionsInfo{SpecifiedVectorComparisonsMethodRef} = \%SpecifiedComparisonMeasuresMethodMap;
 874 
 875   # Setup specified vector comparison calculation modes...
 876   my(@SpecifiedVectorComparisonModes);
 877   @SpecifiedVectorComparisonModes = ();
 878   if ($Options{vectorcomparisonformulism} =~ /^All$/i) {
 879     push @SpecifiedVectorComparisonModes, ("AlgebraicForm", "BinaryForm", "SetTheoreticForm");
 880   }
 881   else {
 882     my($SpecifiedFormulism, @SpecifiedFormulismWords);
 883 
 884     @SpecifiedFormulismWords = split /\,/, $Options{vectorcomparisonformulism};
 885     for $SpecifiedFormulism (@SpecifiedFormulismWords) {
 886       if ($SpecifiedFormulism !~ /^(AlgebraicForm|BinaryForm|SetTheoreticForm)$/i) {
 887         die "Error: The value specified, $SpecifiedFormulism, for option \"--VectorComparisonFormulism\" is not valid. Allowed values: AlgebraicForm, BinaryForm or SetTheoreticForm\n";
 888       }
 889       push @SpecifiedVectorComparisonModes, $SpecifiedFormulism;
 890     }
 891   }
 892   $OptionsInfo{VectorComparisonFormulism} = $Options{vectorcomparisonformulism};
 893   $OptionsInfo{SpecifiedVectorComparisonModesRef} = \@SpecifiedVectorComparisonModes;
 894 
 895   # Setup any parameters required for specified comparison menthod...
 896   for $SpecifiedMeasure (@SpecifiedComparisonMeasures) {
 897     @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}} = ();
 898     push @{$SpecifiedComparisonMeasuresParameterMap{lc($SpecifiedMeasure)}}, ($Options{fast} ? 1 : 0);
 899   }
 900   $OptionsInfo{SpecifiedVectorComparisonsParameterRef} = \%SpecifiedComparisonMeasuresParameterMap;
 901 }
 902 
 903 # Setup script usage  and retrieve command line arguments specified using various options...
 904 sub SetupScriptUsage {
 905 
 906   # Retrieve all the options...
 907   %Options = ();
 908 
 909   $Options{alpha} = 0.5;
 910   $Options{beta} = 1;
 911 
 912   $Options{bitvectorcomparisonmode} = "TanimotoSimilarity";
 913 
 914   $Options{colmode} = 'colnum';
 915 
 916   $Options{compoundidprefix} = 'Cmpd';
 917   $Options{compoundidmode} = 'LabelPrefix';
 918 
 919   $Options{detail} = 1;
 920 
 921   $Options{indelim} = 'comma';
 922   $Options{outdelim} = 'comma';
 923 
 924   $Options{inputdatamode} = 'LoadInMemory';
 925 
 926   $Options{mode} = 'AutoDetect';
 927 
 928   $Options{outmatrixformat} = 'RowsAndColumns';
 929 
 930   $Options{outmatrixtype} = 'FullMatrix';
 931 
 932   $Options{quote} = 'yes';
 933   $Options{precision} = 2;
 934 
 935   $Options{vectorcomparisonmode} = "TanimotoSimilarity";
 936   $Options{vectorcomparisonformulism} = "AlgebraicForm";
 937 
 938   if (!GetOptions(\%Options, "alpha=f", "beta=f", "bitvectorcomparisonmode|b=s", "colmode|c=s", "compoundidcol=s", "compoundidprefix=s", "compoundidfield=s", "compoundidmode=s", "detail|d=i", "fast|f", "fingerprintscol=s", "fingerprintsfield=s", "help|h", "indelim=s", "inputdatamode=s", "mode|m=s", "outdelim=s", "overwrite|o", "outmatrixformat=s", "outmatrixtype=s", "precision|p=s", "quote|q=s", "root|r=s", "vectorcomparisonmode|v=s", "vectorcomparisonformulism=s", "workingdir|w=s")) {
 939     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 940   }
 941   if ($Options{workingdir}) {
 942     if (! -d $Options{workingdir}) {
 943       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 944     }
 945     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 946   }
 947   if ($Options{colmode} !~ /^(ColNum|ColLabel)$/i) {
 948     die "Error: The value specified, $Options{colmode}, for option \"-c, --ColMode\" is not valid. Allowed values: ColNum, or ColLabel\n";
 949   }
 950   if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
 951     die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
 952   }
 953   if (!IsPositiveInteger($Options{detail})) {
 954     die "Error: The value specified, $Options{detail}, for option \"-d, --detail\" is not valid. Allowed values: > 0 \n";
 955   }
 956   if ($Options{inputdatamode} !~ /^(LoadInMemory|ScanFile)$/i) {
 957     die "Error: The value specified, $Options{inputdatamode}, for option \"--InputDataMode\" is not valid. Allowed values: LoadInMemory or ScanFile\n";
 958   }
 959   if ($Options{mode} !~ /^(AutoDetect|FingerprintsBitVectorString|FingerprintsVectorString)$/i) {
 960     die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: AutoDetect, FingerprintsBitVectorString or FingerprintsVectorString \n";
 961   }
 962   if ($Options{indelim} !~ /^(comma|semicolon)$/i) {
 963     die "Error: The value specified, $Options{indelim}, for option \"--InDelim\" is not valid. Allowed values: comma, or semicolon\n";
 964   }
 965   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
 966     die "Error: The value specified, $Options{outdelim}, for option \"--OutDelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
 967   }
 968   if ($Options{outmatrixformat} !~ /^(RowsAndColumns|IDPairsAndValue)$/i) {
 969     die "Error: The value specified, $Options{outmatrixformat}, for option \"--OutMatrixFormat\" is not valid. Allowed values: RowsAndColumns or IDPairsAndValue\n";
 970   }
 971   if ($Options{outmatrixtype} !~ /^(FullMatrix|UpperTriangularMatrix|LowerTriangularMatrix)$/i) {
 972     die "Error: The value specified, $Options{outmatrixtype}, for option \"--OutMatrixType\" is not valid. Allowed values: FullMatrix, UpperTriangularMatrix or LowerTriangularMatrix\n";
 973   }
 974   if ($Options{quote} !~ /^(Yes|No)$/i) {
 975     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
 976   }
 977   if (!IsPositiveInteger($Options{precision})) {
 978     die "Error: The value specified, $Options{precision}, for option \"--precision\" is not valid. Allowed values: > 0 \n";
 979   }
 980 }
 981