1 #!/usr/bin/perl -w 2 # 3 # File: InfoSDFiles.pl 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use FindBin; use lib "$FindBin::Bin/../lib"; 28 use Getopt::Long; 29 use File::Basename; 30 use Benchmark; 31 use SDFileUtil; 32 use TextUtil; 33 use FileUtil; 34 35 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 36 37 # Autoflush STDOUT 38 $| = 1; 39 40 # Starting message... 41 $ScriptName = basename $0; 42 print "\n$ScriptName:Starting...\n\n"; 43 $StartTime = new Benchmark; 44 45 # Get the options and setup script... 46 SetupScriptUsage(); 47 if ($Options{help} || @ARGV < 1) { 48 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 49 } 50 51 my(@SDFilesList); 52 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd"); 53 54 # Process options... 55 print "Processing options...\n"; 56 my(%OptionsInfo); 57 ProcessOptions(); 58 59 # Setup information about input files... 60 print "Checking input SD file(s)...\n"; 61 my(%SDFilesInfo, %SDCmpdsInfo); 62 RetrieveSDFilesInfo(); 63 InitializeSDCmpdsInfo(); 64 65 # Process input files.. 66 my($FileIndex); 67 if (@SDFilesList > 1) { 68 print "\nProcessing SD files...\n"; 69 } 70 for $FileIndex (0 .. $#SDFilesList) { 71 if ($SDFilesInfo{FileOkay}[$FileIndex]) { 72 print "\nProcessing file $SDFilesList[$FileIndex]...\n"; 73 ListSDFileInfo($FileIndex); 74 } 75 } 76 ListTotalSizeOfFiles(); 77 78 print "\n$ScriptName:Done...\n\n"; 79 80 $EndTime = new Benchmark; 81 $TotalTime = timediff ($EndTime, $StartTime); 82 print "Total time: ", timestr($TotalTime), "\n"; 83 84 ############################################################################### 85 86 # List appropriate information... 87 sub ListSDFileInfo { 88 my($Index) = @_; 89 my($SDFile); 90 91 $SDFile = $SDFilesList[$Index]; 92 93 if ($OptionsInfo{ProcessCmpdInfo}) { 94 ListCompoundDetailsInfo($Index); 95 } 96 else { 97 ListCompoundCountInfo($Index); 98 } 99 100 # File size and modification information... 101 print "\nFile size: ", FormatFileSize($SDFilesInfo{FileSize}[$Index]), " \n"; 102 print "Last modified: ", $SDFilesInfo{FileLastModified}[$Index], " \n"; 103 } 104 105 # List number of compounds in SD file... 106 sub ListCompoundCountInfo { 107 my($Index) = @_; 108 my($SDFile, $CmpdCount); 109 110 $SDFile = $SDFilesList[$Index]; 111 112 $CmpdCount = 0; 113 114 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n"; 115 while (<SDFILE>) { 116 if (/^\$\$\$\$/) { 117 $CmpdCount++; 118 } 119 } 120 close SDFILE; 121 122 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount; 123 124 print "\nNumber of compounds: $CmpdCount\n"; 125 } 126 127 # List detailed compound information... 128 sub ListCompoundDetailsInfo { 129 my($Index) = @_; 130 my($SDFile, $CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount, $CtabLinesCount, $PrintCmpdCounterHeader, $ProblematicCmpdData, $CmpdString, @CmpdLines); 131 132 $SDFile = $SDFilesList[$Index]; 133 134 ($CmpdCount, $EmptyCtabBlocksCount, $MismatchCtabBlockCount, $ChiralCtabBlockCount, $UnknownAtomsCtabBlockCount, $InvalidAtomNumbersCtabBlockCount, $SaltsCtabBlockCount) = (0) x 7; 135 136 InitializeSDCmpdsInfo(); 137 138 $PrintCmpdCounterHeader = 1; 139 140 open SDFILE, "$SDFile" or die "Couldn't open $SDFile: $! \n"; 141 while ($CmpdString = ReadCmpdString(\*SDFILE)) { 142 $CmpdCount++; 143 $ProblematicCmpdData = 0; 144 if ($OptionsInfo{Detail} <= 1) { 145 if (($CmpdCount % 5000) == 0) { 146 if ($PrintCmpdCounterHeader) { 147 $PrintCmpdCounterHeader = 0; 148 print "Processing compounds:"; 149 } 150 print "$CmpdCount..."; 151 } 152 } 153 @CmpdLines = split "\n", $CmpdString; 154 $CtabLinesCount = GetCtabLinesCount(\@CmpdLines); 155 if ($OptionsInfo{All} || $OptionsInfo{Empty}) { 156 if ($CtabLinesCount <= 0) { 157 $EmptyCtabBlocksCount++; 158 $ProblematicCmpdData = 1; 159 } 160 } 161 if ($CtabLinesCount > 0) { 162 my ($AtomCount, $BondCount, $ChiralFlag) = ParseCmpdCountsLine($CmpdLines[3]); 163 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) { 164 if ($CtabLinesCount != ($AtomCount + $BondCount)) { 165 $MismatchCtabBlockCount++; 166 $ProblematicCmpdData = 1; 167 if ($OptionsInfo{Detail} >= 2) { 168 print "\nMismatch found: Ctab lines count: $CtabLinesCount; Atoms count: $AtomCount; Bond count: $BondCount\n"; 169 } 170 } 171 } 172 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) { 173 if ($ChiralFlag == 1) { 174 $ChiralCtabBlockCount++; 175 } 176 } 177 if ($CtabLinesCount == ($AtomCount + $BondCount)) { 178 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) { 179 my($UnknownAtomCount, $UnknownAtoms, $UnknownAtomLines) = GetUnknownAtoms(\@CmpdLines); 180 if ($UnknownAtomCount) { 181 $UnknownAtomsCtabBlockCount++; 182 $ProblematicCmpdData = 1; 183 if ($OptionsInfo{Detail} >= 2) { 184 print "\nUnknown atom(s) found: $UnknownAtomCount\nUnknown atom(s) symbols:$UnknownAtoms\nUnknown atom(s) data lines:\n$UnknownAtomLines\n"; 185 } 186 } 187 } 188 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) { 189 my($InvalidAtomNumbersCount, $InvalidAtomNumbers, $InvalidAtomNumberLines) = GetInvalidAtomNumbers(\@CmpdLines); 190 if ($InvalidAtomNumbersCount) { 191 $InvalidAtomNumbersCtabBlockCount++; 192 $ProblematicCmpdData = 1; 193 if ($OptionsInfo{Detail} >= 2) { 194 print "\nInvalid atom number(s) found: $InvalidAtomNumbersCount\nInvalid atom number(s):$InvalidAtomNumbers\nInvalid atom number(s) data lines:\n$InvalidAtomNumberLines\n"; 195 } 196 } 197 } 198 if ($OptionsInfo{All} || $OptionsInfo{Salts}) { 199 my($FragmentsCount, $Fragments) = GetCmpdFragments(\@CmpdLines); 200 if ($FragmentsCount > 1) { 201 $SaltsCtabBlockCount++; 202 $ProblematicCmpdData = 1; 203 if ($OptionsInfo{Detail} >= 2) { 204 print "\nSalts found: $FragmentsCount\nSalts atom numbers:\n$Fragments\n"; 205 } 206 } 207 } 208 } 209 } 210 if ($OptionsInfo{ProcessCmpdData}) { 211 ProcessCmpdInfo(\@CmpdLines, $CmpdCount); 212 } 213 if ($OptionsInfo{Detail} >= 3) { 214 if ($ProblematicCmpdData) { 215 print "\nCompound data:\n$CmpdString\n\n"; 216 } 217 } 218 } 219 if ($OptionsInfo{Detail} <= 1) { 220 if (!$PrintCmpdCounterHeader) { 221 print "\n"; 222 } 223 } 224 close SDFILE; 225 226 $SDCmpdsInfo{TotalCmpdCount} += $CmpdCount; 227 228 print "\nNumber of compounds: $CmpdCount\n"; 229 230 if ($OptionsInfo{All} || $OptionsInfo{Empty}) { 231 print "Number of empty atom/bond blocks: $EmptyCtabBlocksCount\n"; 232 } 233 if ($OptionsInfo{All} || $OptionsInfo{Mismatch}) { 234 print "Number of mismatched atom/bond blocks: $MismatchCtabBlockCount\n"; 235 } 236 if ($OptionsInfo{All} || $OptionsInfo{UnknownAtoms}) { 237 print "Number of atom blocks with unknown atom labels: $UnknownAtomsCtabBlockCount\n"; 238 } 239 if ($OptionsInfo{All} || $OptionsInfo{InvalidAtomNumbers}) { 240 print "Number of bond blocks and atom property blocks with invalid atom numbers: $InvalidAtomNumbersCtabBlockCount\n"; 241 } 242 if ($OptionsInfo{All} || $OptionsInfo{Salts}) { 243 print "Number of atom blocks containing salts: $SaltsCtabBlockCount\n"; 244 } 245 if ($OptionsInfo{All} || $OptionsInfo{Chiral}) { 246 print "Number of chiral atom/bond blocks: $ChiralCtabBlockCount\n"; 247 } 248 if ($OptionsInfo{ProcessCmpdData}) { 249 PrintCmpdInfoSummary(); 250 } 251 252 } 253 254 # Initialize compound data information for a SD file... 255 sub InitializeSDCmpdsInfo { 256 257 if (!exists $SDCmpdsInfo{TotalCmpdCount}) { 258 $SDCmpdsInfo{TotalCmpdCount} = 0; 259 } 260 261 @{$SDCmpdsInfo{FieldLabels}} = (); 262 %{$SDCmpdsInfo{FieldLabelsMap}} = (); 263 %{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}} = (); 264 %{$SDCmpdsInfo{EmptyFieldValuesCountMap}} = (); 265 %{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}} = (); 266 %{$SDCmpdsInfo{NumericalFieldValuesCountMap}} = (); 267 } 268 269 # Process compound data header labels and figure out which ones are present for 270 # all the compounds... 271 sub ProcessCmpdInfo { 272 my($CmpdLinesRef, $CmpdCount) = @_; 273 my($Label); 274 275 if (@{$SDCmpdsInfo{FieldLabels}}) { 276 my (@CmpdFieldLabels) = GetCmpdDataHeaderLabels($CmpdLinesRef); 277 my(%CmpdFieldLabelsMap) = (); 278 # Setup a map for the current labels... 279 for $Label (@CmpdFieldLabels) { 280 $CmpdFieldLabelsMap{$Label} = "PresentInSome"; 281 } 282 # Check the presence old labels for this compound; otherwise, mark 'em new... 283 for $Label (@{$SDCmpdsInfo{FieldLabels}}) { 284 if (!$CmpdFieldLabelsMap{$Label}) { 285 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome"; 286 } 287 } 288 # Check the presence this compound in the old labels; otherwise, add 'em... 289 for $Label (@CmpdFieldLabels ) { 290 if (!$SDCmpdsInfo{FieldLabelsMap}{$Label}) { 291 # It's a new label... 292 push @{$SDCmpdsInfo{FieldLabels}}, $Label; 293 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInSome"; 294 } 295 } 296 } 297 else { 298 # Get the initial label set and set up a map... 299 @{$SDCmpdsInfo{FieldLabels}} = GetCmpdDataHeaderLabels($CmpdLinesRef); 300 for $Label (@{$SDCmpdsInfo{FieldLabels}}) { 301 $SDCmpdsInfo{FieldLabelsMap}{$Label} = "PresentInAll"; 302 } 303 } 304 if ($OptionsInfo{CountEmptyData} || $OptionsInfo{CheckData}) { 305 # Count empty data field values... 306 my(%DataFieldAndValues, $Label, $Value); 307 308 %DataFieldAndValues = GetCmpdDataHeaderLabelsAndValues($CmpdLinesRef); 309 for $Label (keys %DataFieldAndValues) { 310 $Value = $DataFieldAndValues{$Label}; 311 if ($OptionsInfo{CountEmptyData}) { 312 if (IsNotEmpty($Value)) { 313 if (exists($SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label})) { 314 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} += 1; 315 } 316 else { 317 $SDCmpdsInfo{NonEmptyFieldValuesCountMap}{$Label} = 1; 318 } 319 } 320 else { 321 if ($Options{detail} >= 2) { 322 print "Compound record $CmpdCount: Empty data field <$Label>\n"; 323 } 324 if (exists($SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label})) { 325 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} += 1; 326 } 327 else { 328 $SDCmpdsInfo{EmptyFieldValuesCountMap}{$Label} = 1; 329 } 330 } 331 } 332 if ($OptionsInfo{CheckData}) { 333 if (IsNumerical($Value)) { 334 if (exists($SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label})) { 335 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} += 1; 336 } 337 else { 338 $SDCmpdsInfo{NumericalFieldValuesCountMap}{$Label} = 1; 339 } 340 } 341 else { 342 if (exists($SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label})) { 343 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} += 1; 344 } 345 else { 346 $SDCmpdsInfo{NonNumericalFieldValuesCountMap}{$Label} = 1; 347 } 348 } 349 } 350 } 351 } 352 } 353 354 # Print compound summary... 355 sub PrintCmpdInfoSummary { 356 if (@{$SDCmpdsInfo{FieldLabels}}) { 357 my($PresentInAllCount, $Label, @FieldLabelsPresentInSome, @FieldLabelsPresentInAll); 358 359 @FieldLabelsPresentInSome = (); 360 @FieldLabelsPresentInAll = (); 361 362 $PresentInAllCount = 0; 363 print "\nNumber of data fields: ", scalar(@{$SDCmpdsInfo{FieldLabels}}), "\n"; 364 print "All data field labels: "; 365 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 366 print "<$Label> "; 367 } 368 print "\n"; 369 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 370 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") { 371 $PresentInAllCount++; 372 push @FieldLabelsPresentInAll, $Label; 373 } 374 } 375 if ($PresentInAllCount != @{$SDCmpdsInfo{FieldLabels}}) { 376 print "Data field labels present in all compounds: "; 377 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 378 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInAll") { 379 print "<$Label> "; 380 } 381 } 382 print "\n"; 383 print "Data field labels present in some compounds: "; 384 for $Label (sort keys %{$SDCmpdsInfo{FieldLabelsMap}}) { 385 if ($SDCmpdsInfo{FieldLabelsMap}{$Label} eq "PresentInSome") { 386 print "<$Label> "; 387 push @FieldLabelsPresentInSome, $Label; 388 } 389 } 390 print "\n"; 391 } 392 # List empty data field values count... 393 if ($OptionsInfo{CountEmptyData}) { 394 print "\n"; 395 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) { 396 PrintDataInformation("Number of non-empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}}); 397 PrintDataInformation("Number of empty values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}}); 398 } 399 else { 400 PrintDataInformation("Number of non-empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}}); 401 PrintDataInformation("Number of empty values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}}); 402 PrintDataInformation("Number of non-empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonEmptyFieldValuesCountMap}}); 403 PrintDataInformation("Number of empty values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{EmptyFieldValuesCountMap}}); 404 } 405 print "\n"; 406 } 407 # List numerical data values count... 408 if ($OptionsInfo{CheckData}) { 409 print "\n"; 410 if ($PresentInAllCount == @{$SDCmpdsInfo{FieldLabels}}) { 411 PrintDataInformation("Number of non-numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}}); 412 PrintDataInformation("Number of numerical values for data field(s)", \@{$SDCmpdsInfo{FieldLabels}}, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}}); 413 } 414 else { 415 PrintDataInformation("Number of non-numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}}); 416 PrintDataInformation("Number of numerical values for data field(s) present in all compounds", \@FieldLabelsPresentInAll, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}}); 417 PrintDataInformation("Number of non-numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NonNumericalFieldValuesCountMap}}); 418 PrintDataInformation("Number of numerical values for data field(s) present in some compounds", \@FieldLabelsPresentInSome, \%{$SDCmpdsInfo{NumericalFieldValuesCountMap}}); 419 } 420 print "\n"; 421 } 422 } 423 else { 424 print "\nNumber of data fields: 0\n"; 425 } 426 } 427 # List data information... 428 sub PrintDataInformation { 429 my($InfoLabel, $DataLabelRef, $DataLabelToValueMapRef) = @_; 430 my($Line, $Label); 431 432 $Line = ""; 433 for $Label (@{$DataLabelRef}) { 434 $Line .= " <$Label> - " . (exists($DataLabelToValueMapRef->{$Label}) ? $DataLabelToValueMapRef->{$Label} : 0) . ","; 435 } 436 $Line =~ s/\,$//g; 437 print "$InfoLabel: $Line\n"; 438 } 439 440 # Total size of all the files... 441 sub ListTotalSizeOfFiles { 442 my($FileOkayCount, $TotalSize, $Index); 443 444 $FileOkayCount = 0; 445 $TotalSize = 0; 446 447 for $Index (0 .. $#SDFilesList) { 448 if ($SDFilesInfo{FileOkay}[$Index]) { 449 $FileOkayCount++; 450 $TotalSize += $SDFilesInfo{FileSize}[$Index]; 451 } 452 } 453 if ($FileOkayCount > 1) { 454 print "\nTotal number of compounds in $FileOkayCount SD files: $SDCmpdsInfo{TotalCmpdCount}\n"; 455 print "\nTotal size of $FileOkayCount SD files: ", FormatFileSize($TotalSize), "\n"; 456 } 457 458 } 459 460 # Retrieve information about SD files... 461 sub RetrieveSDFilesInfo { 462 my($Index, $SDFile, $ModifiedTimeString, $ModifiedDateString); 463 464 %SDCmpdsInfo = (); 465 466 %SDFilesInfo = (); 467 @{$SDFilesInfo{FileOkay}} = (); 468 @{$SDFilesInfo{FileSize}} = (); 469 @{$SDFilesInfo{FileLastModified}} = (); 470 471 FILELIST: for $Index (0 .. $#SDFilesList) { 472 $SDFilesInfo{FileOkay}[$Index] = 0; 473 $SDFilesInfo{FileSize}[$Index] = 0; 474 $SDFilesInfo{FileLastModified}[$Index] = ''; 475 476 $SDFile = $SDFilesList[$Index]; 477 if (!(-e $SDFile)) { 478 warn "Warning: Ignoring file $SDFile: It doesn't exist\n"; 479 next FILELIST; 480 } 481 if (!CheckFileType($SDFile, "sdf sd")) { 482 warn "Warning: Ignoring file $SDFile: It's not a SD file\n"; 483 next FILELIST; 484 } 485 if (! open SDFILE, "$SDFile") { 486 warn "Warning: Ignoring file $SDFile: Couldn't open it: $! \n"; 487 next FILELIST; 488 } 489 close SDFILE; 490 491 $SDFilesInfo{FileOkay}[$Index] = 1; 492 $SDFilesInfo{FileSize}[$Index] = FileSize($SDFile); 493 ($ModifiedTimeString, $ModifiedDateString) = FormattedFileModificationTimeAndDate($SDFile); 494 $SDFilesInfo{FileLastModified}[$Index] = "$ModifiedTimeString; $ModifiedDateString"; 495 } 496 } 497 498 # Process option values... 499 sub ProcessOptions { 500 %OptionsInfo = (); 501 502 $OptionsInfo{All} = $Options{all} ? $Options{all} : 0; 503 $OptionsInfo{Chiral} = $Options{chiral} ? $Options{chiral} : 0; 504 $OptionsInfo{Count} = $Options{count} ? $Options{count} : 0; 505 $OptionsInfo{DataCheck} = $Options{datacheck} ? $Options{datacheck} : 0; 506 $OptionsInfo{Empty} = $Options{empty} ? $Options{empty} : 0; 507 $OptionsInfo{Fields} = $Options{fields} ? $Options{fields} : 0; 508 $OptionsInfo{InvalidAtomNumbers} = $Options{invalidatomnumbers} ? $Options{invalidatomnumbers} : 0; 509 $OptionsInfo{Mismatch} = $Options{mismatch} ? $Options{mismatch} : 0; 510 $OptionsInfo{Salts} = $Options{salts} ? $Options{salts} : 0; 511 $OptionsInfo{UnknownAtoms} = $Options{unknownatoms} ? $Options{unknownatoms} : 0; 512 513 $OptionsInfo{Detail} = $Options{detail}; 514 515 $OptionsInfo{ProcessCmpdInfo} = ($Options{all} || $Options{chiral} || $Options{empty} || $Options{fields} || $Options{invalidatomnumbers} || $Options{mismatch} || $Options{salts} || $Options{unknownatoms} || $Options{datacheck}) ? 1 : 0; 516 517 $OptionsInfo{ProcessCmpdData} = ($Options{all} || $Options{fields} || $Options{empty} || $Options{datacheck}) ? 1 : 0; 518 519 $OptionsInfo{CountEmptyData} = ($Options{all} || $Options{empty}) ? 1 : 0; 520 $OptionsInfo{CheckData} = ($Options{all} || $Options{datacheck}) ? 1 : 0; 521 } 522 523 # Setup script usage and retrieve command line arguments specified using various options... 524 sub SetupScriptUsage { 525 526 # Setup default and retrieve all the options... 527 %Options = (); 528 $Options{detail} = 1; 529 if (!GetOptions(\%Options, "all|a", "count|c", "chiral", "datacheck", "detail|d:i", "empty|e", "fields|f", "help|h", "invalidatomnumbers|i", "mismatch|m", "salts|s", "unknownatoms|u", "workingdir|w=s")) { 530 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 531 } 532 if ($Options{workingdir}) { 533 if (! -d $Options{workingdir}) { 534 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 535 } 536 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 537 } 538 if ($Options{detail} <= 0 || $Options{detail} > 3) { 539 die "Error: The value specified, $Options{detail}, for option \"-d --detail\" is not valid. Possible values: 1 to 3\n"; 540 } 541 } 542