MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: InfoSDFiles.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use Benchmark;
  31 use SDFileUtil;
  32 use TextUtil;
  33 use FileUtil;
  34 
  35 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  36 
  37 # Autoflush STDOUT
  38 $| = 1;
  39 
  40 # Starting message...
  41 $ScriptName = basename $0;
  42 print "\n$ScriptName:Starting...\n\n";
  43 $StartTime = new Benchmark;
  44 
  45 # Get the options and setup script...
  46 SetupScriptUsage();
  47 if ($Options{help} || @ARGV < 1) {
  48   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  49 }
  50 
  51 my(@SDFilesList);
  52 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
  53 
  54 # Process options...
  55 print "Processing options...\n";
  56 my(%OptionsInfo);
  57 ProcessOptions();
  58 
  59 # Setup information about input files...
  60 print "Checking input SD file(s)...\n";
  61 my(%SDFilesInfo, %SDCmpdsInfo);
  62 RetrieveSDFilesInfo();
  63 InitializeSDCmpdsInfo();
  64 
  65 # Process input files..
  66 my($FileIndex);
  67 if (@SDFilesList > 1) {
  68   print "\nProcessing SD files...\n";
  69 }
  70 for $FileIndex (0 .. $#SDFilesList) {
  71   if ($SDFilesInfo{FileOkay}[$FileIndex]) {
  72     print "\nProcessing file $SDFilesList[$FileIndex]...\n";
  73     ListSDFileInfo($FileIndex);
  74   }
  75 }
  76 ListTotalSizeOfFiles();
  77 
  78 print "\n$ScriptName:Done...\n\n";
  79 
  80 $EndTime = new Benchmark;
  81 $TotalTime = timediff ($EndTime, $StartTime);
  82 print "Total time: ", timestr($TotalTime), "\n";
  83 
  84 ###############################################################################
  85 
  86 # List appropriate information...
  87 sub ListSDFileInfo {
  88   my($Index) = @_;
  89   my($SDFile);
  90 
  91   $SDFile = $SDFilesList[$Index];
  92 
  93   if ($OptionsInfo{ProcessCmpdInfo}) {
  94     ListCompoundDetailsInfo($Index);
  95   }
  96   else {
  97     ListCompoundCountInfo($Index);
  98   }
  99 
 100   # File size and modification information...
 101   print "\nFile size: ", FormatFileSize($SDFilesInfo{FileSize}[$Index]), " \n";
 102   print "Last modified: ", $SDFilesInfo{FileLastModified}[$Index], " \n";
 103 }
 104 
 105 # List number of compounds in SD file...
 106 sub ListCompoundCountInfo {
 107   my($Index) = @_;
 108   my($SDFile, $CmpdCount);
 109 
 110   $SDFile = $SDFilesList[$Index];
 111 
 112   $CmpdCount = 0;
 113 
 114   open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
 115   while (<SDFILE>) {
 116     if (/^\$\$\$\$/) {
 117       $CmpdCount++;
 118     }
 119   }
 120   close SDFILE;
 121 
 122   $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
 123 
 124   print "\nNumber of compounds: $CmpdCount\n";
 125 }
 126 
 127 # List detailed compound information...
 128 sub ListCompoundDetailsInfo {
 129   my($Index) = @_;
 130   my($SDFile, $CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount, $CtabLinesCount, $PrintCmpdCounterHeader, $ProblematicCmpdData, $CmpdString, @CmpdLines);
 131 
 132   $SDFile = $SDFilesList[$Index];
 133 
 134   ($CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount) = (0) x 7;
 135 
 136   InitializeSDCmpdsInfo();
 137 
 138   $PrintCmpdCounterHeader = 1;
 139 
 140   open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n";
 141   while ($CmpdString = ReadCmpdString(\*SDFILE)) {
 142     $CmpdCount++;
 143     $ProblematicCmpdData = 0;
 144     if ($OptionsInfo{Detail} <= 1) {
 145       if (($CmpdCount % 5000) == 0) {
 146         if ($PrintCmpdCounterHeader) {
 147           $PrintCmpdCounterHeader = 0;
 148           print "Processing compounds:";
 149         }
 150         print "$CmpdCount...";
 151       }
 152     }
 153     @CmpdLines = split "\n", $CmpdString;
 154     $CtabLinesCount = GetCtabLinesCount(\@CmpdLines);
 155     if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
 156       if ($CtabLinesCount <= 0) {
 157         $EmptyCtabBlocksCount++;
 158         $ProblematicCmpdData = 1;
 159       }
 160     }
 161     if ($CtabLinesCount > 0) {
 162       my ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine($CmpdLines[3]);
 163       if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
 164         if ($CtabLinesCount != ($AtomCount + $BondCount)) {
 165           $MismatchCtabBlockCount++;
 166           $ProblematicCmpdData = 1;
 167           if ($OptionsInfo{Detail} >= 2) {
 168             print "\nMismatch found: Ctab lines count: $CtabLinesCount;  Atoms count: $AtomCount; Bond count: $BondCount\n";
 169           }
 170         }
 171       }
 172       if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
 173         if ($ChiralFlag == 1) {
 174           $ChiralCtabBlockCount++;
 175         }
 176       }
 177       if ($CtabLinesCount == ($AtomCount + $BondCount)) {
 178         if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
 179           my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) = GetUnknownAtoms(\@CmpdLines);
 180           if ($UnknownAtomCount) {
 181             $UnknownAtomsCtabBlockCount++;
 182             $ProblematicCmpdData = 1;
 183             if ($OptionsInfo{Detail} >= 2) {
 184               print "\nUnknown atom(s) found: $UnknownAtomCount\nUnknown atom(s) symbols:$UnknownAtoms\nUnknown atom(s) data lines:\n$UnknownAtomLines\n";
 185             }
 186           }
 187         }
 188         if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
 189           my($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) = GetInvalidAtomNumbers(\@CmpdLines);
 190           if ($InvalidAtomNumbersCount) {
 191             $InvalidAtomNumbersCtabBlockCount++;
 192             $ProblematicCmpdData = 1;
 193             if ($OptionsInfo{Detail} >= 2) {
 194               print "\nInvalid atom number(s) found: $InvalidAtomNumbersCount\nInvalid atom number(s):$InvalidAtomNumbers\nInvalid atom number(s) data lines:\n$InvalidAtomNumberLines\n";
 195             }
 196           }
 197         }
 198         if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
 199           my($FragmentsCount, $Fragments) = GetCmpdFragments(\@CmpdLines);
 200           if ($FragmentsCount > 1) {
 201             $SaltsCtabBlockCount++;
 202             $ProblematicCmpdData = 1;
 203             if ($OptionsInfo{Detail} >= 2) {
 204               print "\nSalts found: $FragmentsCount\nSalts atom numbers:\n$Fragments\n";
 205             }
 206           }
 207         }
 208       }
 209     }
 210     if ($OptionsInfo{ProcessCmpdData}) {
 211       ProcessCmpdInfo(\@CmpdLines, $CmpdCount);
 212     }
 213     if ($OptionsInfo{Detail} >= 3) {
 214       if ($ProblematicCmpdData) {
 215         print "\nCompound data:\n$CmpdString\n\n";
 216       }
 217     }
 218   }
 219   if ($OptionsInfo{Detail} <= 1) {
 220     if (!$PrintCmpdCounterHeader) {
 221       print "\n";
 222     }
 223   }
 224   close SDFILE;
 225 
 226   $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount;
 227 
 228   print "\nNumber of compounds: $CmpdCount\n";
 229 
 230   if ($OptionsInfo{All} || $OptionsInfo{Empty}) {
 231     print "Number of empty atom/bond blocks: $EmptyCtabBlocksCount\n";
 232   }
 233   if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) {
 234     print "Number of mismatched atom/bond blocks: $MismatchCtabBlockCount\n";
 235   }
 236   if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) {
 237     print "Number of atom blocks with unknown atom labels: $UnknownAtomsCtabBlockCount\n";
 238   }
 239   if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) {
 240     print "Number of bond blocks and atom property blocks with invalid atom numbers: $InvalidAtomNumbersCtabBlockCount\n";
 241   }
 242   if ($OptionsInfo{All} || $OptionsInfo{Salts}) {
 243     print "Number of atom blocks containing salts: $SaltsCtabBlockCount\n";
 244   }
 245   if ($OptionsInfo{All} || $OptionsInfo{Chiral}) {
 246     print "Number of chiral atom/bond blocks: $ChiralCtabBlockCount\n";
 247   }
 248   if ($OptionsInfo{ProcessCmpdData}) {
 249     PrintCmpdInfoSummary();
 250   }
 251 
 252 }
 253 
 254 # Initialize compound data information for a SD file...
 255 sub InitializeSDCmpdsInfo {
 256 
 257   if (!exists $SDCmpdsInfo{TotalCmpdCount}) {
 258     $SDCmpdsInfo{TotalCmpdCount} = 0;
 259   }
 260 
 261   @{$SDCmpdsInfo{FieldLabels}} = ();
 262   %{$SDCmpdsInfo{FieldLabelsMap}} = ();
 263   %{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}} = ();
 264   %{$SDCmpdsInfo{EmptyFieldValuesCountMap}} = ();
 265   %{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}} = ();
 266   %{$SDCmpdsInfo{NumericalFieldValuesCountMap}} = ();
 267 }
 268 
 269 # Process compound data header labels and figure out which ones are present for
 270 # all the compounds...
 271 sub ProcessCmpdInfo {
 272   my($CmpdLinesRef, $CmpdCount) = @_;
 273   my($Label);
 274 
 275   if (@{$SDCmpdsInfo{FieldLabels}}) {
 276     my (@CmpdFieldLabels) = GetCmpdDataHeaderLabels($CmpdLinesRef);
 277     my(%CmpdFieldLabelsMap) = ();
 278     # Setup a map for the current labels...
 279     for $Label (@CmpdFieldLabels) {
 280       $CmpdFieldLabelsMap{$Label} = "PresentInSome";
 281     }
 282     # Check the presence old labels for this compound; otherwise, mark 'em new...
 283     for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
 284       if (!$CmpdFieldLabelsMap{$Label}) {
 285         $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
 286       }
 287     }
 288     # Check the presence this compound in the old labels; otherwise, add 'em...
 289     for $Label (@CmpdFieldLabels ) {
 290       if (!$SDCmpdsInfo{FieldLabelsMap}{$Label}) {
 291         # It's a new label...
 292         push @{$SDCmpdsInfo{FieldLabels}}, $Label;
 293         $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome";
 294       }
 295     }
 296   }
 297   else {
 298     # Get the initial label set and set up a map...
 299     @{$SDCmpdsInfo{FieldLabels}} = GetCmpdDataHeaderLabels($CmpdLinesRef);
 300     for $Label (@{$SDCmpdsInfo{FieldLabels}}) {
 301       $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInAll";
 302     }
 303   }
 304   if ($OptionsInfo{CountEmptyData} || $OptionsInfo{CheckData}) {
 305     # Count empty data field values...
 306     my(%DataFieldAndValues, $Label, $Value);
 307 
 308     %DataFieldAndValues = GetCmpdDataHeaderLabelsAndValues($CmpdLinesRef);
 309     for $Label (keys %DataFieldAndValues) {
 310       $Value = $DataFieldAndValues{$Label};
 311       if ($OptionsInfo{CountEmptyData}) {
 312         if (IsNotEmpty($Value)) {
 313           if (exists($SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label})) {
 314             $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} += 1;
 315           }
 316           else {
 317             $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} = 1;
 318           }
 319         }
 320         else {
 321           if ($Options{detail} >= 2) {
 322             print "Compound record $CmpdCount: Empty data field <$Label>\n";
 323           }
 324           if (exists($SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label})) {
 325             $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} += 1;
 326           }
 327           else {
 328             $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} = 1;
 329           }
 330         }
 331       }
 332       if ($OptionsInfo{CheckData}) {
 333         if (IsNumerical($Value)) {
 334           if (exists($SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label})) {
 335             $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} += 1;
 336           }
 337           else {
 338             $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} = 1;
 339           }
 340         }
 341         else {
 342           if (exists($SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label})) {
 343             $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} += 1;
 344           }
 345           else {
 346             $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} = 1;
 347           }
 348         }
 349       }
 350     }
 351   }
 352 }
 353 
 354 # Print compound summary...
 355 sub PrintCmpdInfoSummary {
 356   if (@{$SDCmpdsInfo{FieldLabels}}) {
 357     my($PresentInAllCount, $Label, @FieldLabelsPresentInSome, @FieldLabelsPresentInAll);
 358 
 359     @FieldLabelsPresentInSome = ();
 360     @FieldLabelsPresentInAll = ();
 361 
 362     $PresentInAllCount = 0;
 363     print "\nNumber of data fields: ", scalar(@{$SDCmpdsInfo{FieldLabels}}), "\n";
 364     print "All data field labels: ";
 365     for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 366       print "<$Label> ";
 367     }
 368     print "\n";
 369     for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 370       if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
 371         $PresentInAllCount++;
 372         push @FieldLabelsPresentInAll, $Label;
 373       }
 374     }
 375     if ($PresentInAllCount != @{$SDCmpdsInfo{FieldLabels}}) {
 376       print "Data field labels present in all compounds: ";
 377       for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 378         if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") {
 379           print "<$Label> ";
 380         }
 381       }
 382       print "\n";
 383       print "Data field labels present in some compounds: ";
 384       for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) {
 385         if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInSome") {
 386           print "<$Label> ";
 387           push @FieldLabelsPresentInSome, $Label;
 388         }
 389       }
 390       print "\n";
 391     }
 392     # List empty data field values count...
 393     if ($OptionsInfo{CountEmptyData}) {
 394       print "\n";
 395       if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
 396         PrintDataInformation("Number of non-empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
 397         PrintDataInformation("Number of empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
 398       }
 399       else {
 400         PrintDataInformation("Number of non-empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
 401         PrintDataInformation("Number of empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
 402         PrintDataInformation("Number of non-empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}});
 403         PrintDataInformation("Number of empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}});
 404       }
 405       print "\n";
 406     }
 407     # List numerical data values count...
 408     if ($OptionsInfo{CheckData}) {
 409       print "\n";
 410       if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) {
 411         PrintDataInformation("Number of non-numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
 412         PrintDataInformation("Number of numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
 413       }
 414       else {
 415         PrintDataInformation("Number of non-numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
 416         PrintDataInformation("Number of numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
 417         PrintDataInformation("Number of non-numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}});
 418         PrintDataInformation("Number of numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}});
 419       }
 420       print "\n";
 421     }
 422   }
 423   else {
 424     print "\nNumber of data fields: 0\n";
 425   }
 426 }
 427 # List data information...
 428 sub PrintDataInformation {
 429   my($InfoLabel, $DataLabelRef, $DataLabelToValueMapRef) = @_;
 430   my($Line, $Label);
 431 
 432   $Line = "";
 433   for $Label (@{$DataLabelRef}) {
 434     $Line .= " <$Label> - " . (exists($DataLabelToValueMapRef->{$Label}) ? $DataLabelToValueMapRef->{$Label} : 0) . ",";
 435   }
 436   $Line =~ s/\,$//g;
 437   print "$InfoLabel: $Line\n";
 438 }
 439 
 440 # Total size of all the files...
 441 sub ListTotalSizeOfFiles {
 442   my($FileOkayCount, $TotalSize, $Index);
 443 
 444   $FileOkayCount = 0;
 445   $TotalSize = 0;
 446 
 447   for $Index (0 .. $#SDFilesList) {
 448     if ($SDFilesInfo{FileOkay}[$Index]) {
 449       $FileOkayCount++;
 450       $TotalSize += $SDFilesInfo{FileSize}[$Index];
 451     }
 452   }
 453   if ($FileOkayCount > 1) {
 454     print "\nTotal number of compounds in  $FileOkayCount SD files: $SDCmpdsInfo{TotalCmpdCount}\n";
 455     print "\nTotal size of $FileOkayCount SD files: ", FormatFileSize($TotalSize), "\n";
 456   }
 457 
 458 }
 459 
 460 # Retrieve information about SD files...
 461 sub RetrieveSDFilesInfo {
 462   my($Index, $SDFile, $ModifiedTimeString, $ModifiedDateString);
 463 
 464   %SDCmpdsInfo = ();
 465 
 466   %SDFilesInfo = ();
 467   @{$SDFilesInfo{FileOkay}} = ();
 468   @{$SDFilesInfo{FileSize}} = ();
 469   @{$SDFilesInfo{FileLastModified}} = ();
 470 
 471   FILELIST: for $Index (0 .. $#SDFilesList) {
 472     $SDFilesInfo{FileOkay}[$Index] = 0;
 473     $SDFilesInfo{FileSize}[$Index] = 0;
 474     $SDFilesInfo{FileLastModified}[$Index] = '';
 475 
 476     $SDFile = $SDFilesList[$Index];
 477     if (!(-e $SDFile)) {
 478       warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
 479       next FILELIST;
 480     }
 481     if (!CheckFileType($SDFile, "sdf sd")) {
 482       warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
 483       next FILELIST;
 484     }
 485     if (! open SDFILE, "$SDFile") {
 486       warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n";
 487       next FILELIST;
 488     }
 489     close SDFILE;
 490 
 491     $SDFilesInfo{FileOkay}[$Index] = 1;
 492     $SDFilesInfo{FileSize}[$Index] = FileSize($SDFile);
 493     ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SDFile);
 494     $SDFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString";
 495   }
 496 }
 497 
 498 # Process option values...
 499 sub ProcessOptions {
 500   %OptionsInfo = ();
 501 
 502   $OptionsInfo{All} = $Options{all} ? $Options{all} : 0;
 503   $OptionsInfo{Chiral} = $Options{chiral} ? $Options{chiral} : 0;
 504   $OptionsInfo{Count} = $Options{count} ? $Options{count} : 0;
 505   $OptionsInfo{DataCheck} = $Options{datacheck} ? $Options{datacheck} : 0;
 506   $OptionsInfo{Empty} = $Options{empty} ? $Options{empty} : 0;
 507   $OptionsInfo{Fields} = $Options{fields} ? $Options{fields} : 0;
 508   $OptionsInfo{InvalidAtomNumbers} = $Options{invalidatomnumbers} ? $Options{invalidatomnumbers} : 0;
 509   $OptionsInfo{Mismatch} = $Options{mismatch} ? $Options{mismatch} : 0;
 510   $OptionsInfo{Salts} = $Options{salts} ? $Options{salts} : 0;
 511   $OptionsInfo{UnknownAtoms} = $Options{unknownatoms} ? $Options{unknownatoms} : 0;
 512 
 513   $OptionsInfo{Detail} = $Options{detail};
 514 
 515   $OptionsInfo{ProcessCmpdInfo} = ($Options{all} ||  $Options{chiral} || $Options{empty} || $Options{fields} || $Options{invalidatomnumbers}  || $Options{mismatch} || $Options{salts} || $Options{unknownatoms} || $Options{datacheck}) ? 1 : 0;
 516 
 517   $OptionsInfo{ProcessCmpdData} = ($Options{all} || $Options{fields} || $Options{empty} || $Options{datacheck}) ? 1 : 0;
 518 
 519   $OptionsInfo{CountEmptyData} = ($Options{all} || $Options{empty}) ? 1 : 0;
 520   $OptionsInfo{CheckData} = ($Options{all} || $Options{datacheck}) ? 1 : 0;
 521 }
 522 
 523 # Setup script usage  and retrieve command line arguments specified using various options...
 524 sub SetupScriptUsage {
 525 
 526   # Setup default and retrieve all the options...
 527   %Options = ();
 528   $Options{detail} = 1;
 529   if (!GetOptions(\%Options, "all|a", "count|c", "chiral", "datacheck", "detail|d:i", "empty|e", "fields|f", "help|h", "invalidatomnumbers|i", "mismatch|m", "salts|s", "unknownatoms|u", "workingdir|w=s")) {
 530     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 531   }
 532   if ($Options{workingdir}) {
 533     if (! -d $Options{workingdir}) {
 534       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 535     }
 536     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 537   }
 538   if ($Options{detail} <= 0 || $Options{detail} > 3) {
 539     die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Possible values: 1 to 3\n";
 540   }
 541 }
 542