MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: CalculatePhysicochemicalProperties.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use Text::ParseWords;
  31 use Benchmark;
  32 use FileUtil;
  33 use TextUtil;
  34 use SDFileUtil;
  35 use MoleculeFileIO;
  36 use Molecule;
  37 use AtomTypes::AtomicInvariantsAtomTypes;
  38 use AtomTypes::FunctionalClassAtomTypes;
  39 use MolecularDescriptors::MolecularDescriptorsGenerator;
  40 
  41 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  42 
  43 # Autoflush STDOUT
  44 $| = 1;
  45 
  46 # Starting message...
  47 $ScriptName = basename($0);
  48 print "\n$ScriptName: Starting...\n\n";
  49 $StartTime = new Benchmark;
  50 
  51 # Get the options and setup script...
  52 SetupScriptUsage();
  53 if ($Options{help} || @ARGV < 1) {
  54   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  55 }
  56 
  57 my(@SDFilesList);
  58 @SDFilesList = ExpandFileNames(\@ARGV, "sdf sd");
  59 
  60 # Process options...
  61 print "Processing options...\n";
  62 my(%OptionsInfo);
  63 ProcessOptions();
  64 
  65 # Setup information about input files...
  66 print "Checking input SD file(s)...\n";
  67 my(%SDFilesInfo);
  68 RetrieveSDFilesInfo();
  69 
  70 # Process input files..
  71 my($FileIndex);
  72 if (@SDFilesList > 1) {
  73   print "\nProcessing SD files...\n";
  74 }
  75 for $FileIndex (0 .. $#SDFilesList) {
  76   if ($SDFilesInfo{FileOkay}[$FileIndex]) {
  77     print "\nProcessing file $SDFilesList[$FileIndex]...\n";
  78     CalculatePhysicochemicalProperties($FileIndex);
  79   }
  80 }
  81 print "\n$ScriptName:Done...\n\n";
  82 
  83 $EndTime = new Benchmark;
  84 $TotalTime = timediff ($EndTime, $StartTime);
  85 print "Total time: ", timestr($TotalTime), "\n";
  86 
  87 ###############################################################################
  88 
  89 # Calculate physicochemical properties for a SD file...
  90 #
  91 sub CalculatePhysicochemicalProperties {
  92   my($FileIndex) = @_;
  93   my($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount, $SDFile, $MoleculeFileIO, $Molecule, $MolecularDescriptorsGenerator, $PhysicochemicalPropertiesDataRef, $NewSDFileRef, $NewTextFileRef);
  94 
  95   $SDFile = $SDFilesList[$FileIndex];
  96 
  97   # Setup output files...
  98   $NewSDFileRef = ''; $NewTextFileRef = '';
  99   ($NewSDFileRef, $NewTextFileRef) = SetupAndOpenOutputFiles($FileIndex);
 100 
 101   # Setup molecular descriptor generator to calculate property values for specifed
 102   # property names...
 103   $MolecularDescriptorsGenerator = SetupMolecularDescriptorsGenerator();
 104 
 105   ($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount) = ('0') x 4;
 106 
 107   $MoleculeFileIO = new MoleculeFileIO('Name' => $SDFile);
 108   $MoleculeFileIO->Open();
 109 
 110   COMPOUND: while ($Molecule = $MoleculeFileIO->ReadMolecule()) {
 111     $CmpdCount++;
 112 
 113     # Filter compound data before calculating physiochemical properties...
 114     if ($OptionsInfo{Filter}) {
 115       if (CheckAndFilterCompound($CmpdCount, $Molecule)) {
 116         $IgnoredCmpdCount++;
 117         next COMPOUND;
 118       }
 119     }
 120 
 121     # Calculate properties...
 122     $PhysicochemicalPropertiesDataRef = CalculateMoleculeProperties($MolecularDescriptorsGenerator, $Molecule);
 123 
 124     if (!defined($PhysicochemicalPropertiesDataRef)) {
 125       $IgnoredCmpdCount++;
 126       ProcessIgnoredCompound('PropertiesCalculationFailed', $CmpdCount, $Molecule);
 127       next COMPOUND;
 128     }
 129 
 130     # Calculate any rule violations...
 131     if ($OptionsInfo{RuleOf5Violations} && $PhysicochemicalPropertiesDataRef->{RuleOf5Violations}) {
 132       $RuleOf5ViolationsCount++;
 133     }
 134 
 135     if ($OptionsInfo{RuleOf3Violations} && $PhysicochemicalPropertiesDataRef->{RuleOf3Violations}) {
 136       $RuleOf3ViolationsCount++;
 137     }
 138 
 139     # Write out calculate properties...
 140     WriteDataToOutputFiles($FileIndex, $CmpdCount, $Molecule, $PhysicochemicalPropertiesDataRef, $NewSDFileRef, $NewTextFileRef);
 141   }
 142   $MoleculeFileIO->Close();
 143 
 144   if ($OptionsInfo{SDOutput} && $NewSDFileRef) {
 145     close $NewSDFileRef;
 146   }
 147   if ($OptionsInfo{TextOutput} && $NewTextFileRef) {
 148     close $NewTextFileRef;
 149   }
 150 
 151   WriteCalculationSummaryStatistics($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount);
 152 }
 153 
 154 # Process compound being ignored due to problems in physicochemical properties calculation...
 155 #
 156 sub ProcessIgnoredCompound {
 157   my($Mode, $CmpdCount, $Molecule) = @_;
 158   my($CmpdID, $DataFieldLabelAndValuesRef);
 159 
 160   $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 161   $CmpdID = SetupCmpdIDForTextFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 162 
 163   MODE: {
 164     if ($Mode =~ /^ContainsNonElementalData$/i) {
 165       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains atom data corresponding to non-elemental atom symbol(s)...\n\n";
 166       next MODE;
 167     }
 168 
 169     if ($Mode =~ /^ContainsNoElementalData$/i) {
 170       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Compound contains no atom data...\n\n";
 171       next MODE;
 172     }
 173 
 174     if ($Mode =~ /^PropertiesCalculationFailed$/i) {
 175       warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Physicochemical properties calculation didn't succeed...\n\n";
 176       next MODE;
 177     }
 178     warn "\nWarning: Ignoring compound record number $CmpdCount with ID $CmpdID: Physicochemical properties calculation didn't succeed...\n\n";
 179   }
 180 }
 181 
 182 # Check and filter compounds....
 183 #
 184 sub CheckAndFilterCompound {
 185   my($CmpdCount, $Molecule) = @_;
 186   my($ElementCount, $NonElementCount);
 187 
 188   ($ElementCount, $NonElementCount) = $Molecule->GetNumOfElementsAndNonElements();
 189 
 190   if ($NonElementCount) {
 191     ProcessIgnoredCompound('ContainsNonElementalData', $CmpdCount, $Molecule);
 192     return 1;
 193   }
 194 
 195   if (!$ElementCount) {
 196     ProcessIgnoredCompound('ContainsNoElementalData', $CmpdCount, $Molecule);
 197     return 1;
 198   }
 199 
 200   return 0;
 201 }
 202 
 203 # Write out compounds physicochemical properties calculation summary statistics...
 204 #
 205 sub WriteCalculationSummaryStatistics {
 206   my($CmpdCount, $IgnoredCmpdCount, $RuleOf5ViolationsCount, $RuleOf3ViolationsCount) = @_;
 207   my($ProcessedCmpdCount);
 208 
 209   $ProcessedCmpdCount = $CmpdCount - $IgnoredCmpdCount;
 210 
 211   print "\nNumber of compounds: $CmpdCount\n";
 212   print "Number of compounds processed successfully during physicochemical properties calculation: $ProcessedCmpdCount\n";
 213   print "Number of compounds ignored during physicochemical properties calculation: $IgnoredCmpdCount\n";
 214 
 215   if ($OptionsInfo{RuleOf5Violations}) {
 216     print "Number of compounds with one or more RuleOf5 violations: $RuleOf5ViolationsCount\n";
 217   }
 218 
 219   if ($OptionsInfo{RuleOf3Violations}) {
 220     print "Number of compounds with one or more RuleOf3 violations: $RuleOf3ViolationsCount\n";
 221   }
 222 
 223 }
 224 
 225 # Open output files...
 226 #
 227 sub SetupAndOpenOutputFiles {
 228   my($FileIndex) = @_;
 229   my($NewSDFile, $NewTextFile, $NewSDFileRef, $NewTextFileRef);
 230 
 231   $NewSDFileRef = '';
 232   $NewTextFileRef = '';
 233 
 234   if ($OptionsInfo{SDOutput}) {
 235     $NewSDFile = $SDFilesInfo{SDOutFileNames}[$FileIndex];
 236     print "Generating SD file $NewSDFile...\n";
 237     open NEWSDFILE, ">$NewSDFile" or die "Error: Couldn't open $NewSDFile: $! \n";
 238     $NewSDFileRef = \*NEWSDFILE;
 239   }
 240   if ($OptionsInfo{TextOutput}) {
 241     $NewTextFile = $SDFilesInfo{TextOutFileNames}[$FileIndex];
 242     print "Generating text file $NewTextFile...\n";
 243     open NEWTEXTFILE, ">$NewTextFile" or die "Error: Couldn't open $NewTextFile: $! \n";
 244     WriteTextFileCoulmnLabels($FileIndex, \*NEWTEXTFILE);
 245     $NewTextFileRef = \*NEWTEXTFILE;
 246   }
 247   return ($NewSDFileRef, $NewTextFileRef);
 248 }
 249 
 250 # Write calculated physicochemical properties and other data to appropriate output files...
 251 #
 252 sub WriteDataToOutputFiles {
 253   my($FileIndex, $CmpdCount, $Molecule, $PhysicochemicalPropertiesDataRef, $NewSDFileRef, $NewTextFileRef) = @_;
 254   my($PropertyName, $PropertyValue);
 255 
 256   if ($OptionsInfo{SDOutput}) {
 257     # Retrieve input compound string used to create molecule and write it out
 258     # without last line containing a delimiter...
 259     my($CmpdString);
 260     $CmpdString = $Molecule->GetInputMoleculeString();
 261     $CmpdString =~ s/\$\$\$\$$//;
 262     print $NewSDFileRef "$CmpdString";
 263 
 264     # Write out calculated physicochemical properties data...
 265     for $PropertyName (@{$OptionsInfo{SpecifiedPropertyNames}}) {
 266       $PropertyValue = $PhysicochemicalPropertiesDataRef->{$PropertyName};
 267       print $NewSDFileRef  ">  <$PropertyName>\n$PropertyValue\n\n";
 268     }
 269 
 270     # Write out RuleOf5 violations for molecule....
 271     if ($OptionsInfo{RuleOf5Violations}) {
 272       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf5Violations};
 273       print $NewSDFileRef  ">  <RuleOf5Violations>\n$PropertyValue\n\n";
 274     }
 275 
 276     # Write out RuleOf3 violations for molecule....
 277     if ($OptionsInfo{RuleOf3Violations}) {
 278       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf3Violations};
 279       print $NewSDFileRef  ">  <RuleOf3Violations>\n$PropertyValue\n\n";
 280     }
 281 
 282     # Write out delimiter...
 283     print $NewSDFileRef "\$\$\$\$\n";
 284   }
 285 
 286   if ($OptionsInfo{TextOutput}) {
 287     my($Line, $DataFieldLabelAndValuesRef, $DataFieldLabel, $DataFieldValue, @LineWords,);
 288 
 289     $DataFieldLabelAndValuesRef = $Molecule->GetDataFieldLabelAndValues();
 290     @LineWords = ();
 291     if ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 292       push @LineWords, SetupCmpdIDForTextFiles($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef);
 293     }
 294     elsif ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 295       @LineWords = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 296     }
 297     elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 298       @LineWords = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 299     }
 300     elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 301       @LineWords = map { exists $DataFieldLabelAndValuesRef->{$_} ? $DataFieldLabelAndValuesRef->{$_} : ''} @{$OptionsInfo{SpecifiedDataFields}};
 302     }
 303 
 304     # Append calculated physicochemical properties data...
 305     for $PropertyName (@{$OptionsInfo{SpecifiedPropertyNames}}) {
 306       $PropertyValue = $PhysicochemicalPropertiesDataRef->{$PropertyName};
 307       push @LineWords, $PropertyValue;
 308     }
 309 
 310     # Write out RuleOf5 violations for molecule....
 311     if ($OptionsInfo{RuleOf5Violations}) {
 312       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf5Violations};
 313       push @LineWords, $PropertyValue;
 314     }
 315 
 316     # Write out RuleOf3 violations for molecule....
 317     if ($OptionsInfo{RuleOf3Violations}) {
 318       $PropertyValue = $PhysicochemicalPropertiesDataRef->{RuleOf3Violations};
 319       push @LineWords, $PropertyValue;
 320     }
 321 
 322     $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 323     print $NewTextFileRef "$Line\n";
 324   }
 325 }
 326 
 327 # Write out approriate column labels to text file...
 328 sub WriteTextFileCoulmnLabels {
 329   my($FileIndex, $NewTextFileRef) = @_;
 330   my($Line, @LineWords);
 331 
 332   @LineWords = ();
 333   if ($OptionsInfo{DataFieldsMode} =~ /^All$/i) {
 334     push @LineWords, @{$SDFilesInfo{AllDataFieldsRef}[$FileIndex]};
 335   }
 336   elsif ($OptionsInfo{DataFieldsMode} =~ /^Common$/i) {
 337     push @LineWords, @{$SDFilesInfo{CommonDataFieldsRef}[$FileIndex]};
 338   }
 339   elsif ($OptionsInfo{DataFieldsMode} =~ /^Specify$/i) {
 340     push @LineWords, @{$OptionsInfo{SpecifiedDataFields}};
 341   }
 342   elsif ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) {
 343     push @LineWords, $OptionsInfo{CompoundIDLabel};
 344   }
 345   my($SpecifiedPropertyName);
 346 
 347   # Append physicochemical properties column labels...
 348   push @LineWords,  @{$OptionsInfo{SpecifiedPropertyNames}};
 349 
 350   # Write out RuleOf5 violations label...
 351   if ($OptionsInfo{RuleOf5Violations}) {
 352     push @LineWords, 'RuleOf5Violations';
 353   }
 354 
 355   # Write out RuleOf3 violations label...
 356   if ($OptionsInfo{RuleOf3Violations}) {
 357     push @LineWords, 'RuleOf3Violations';
 358   }
 359 
 360   $Line = JoinWords(\@LineWords, $OptionsInfo{OutDelim}, $OptionsInfo{OutQuote});
 361   print $NewTextFileRef "$Line\n";
 362 }
 363 
 364 # Generate compound ID for text files..
 365 #
 366 sub SetupCmpdIDForTextFiles {
 367   my($CmpdCount, $Molecule, $DataFieldLabelAndValuesRef) = @_;
 368   my($CmpdID);
 369 
 370   $CmpdID = '';
 371   if ($OptionsInfo{CompoundIDMode} =~ /^MolNameOrLabelPrefix$/i) {
 372     my($MolName);
 373     $MolName = $Molecule->GetName();
 374     $CmpdID = $MolName ? $MolName : "$OptionsInfo{CompoundID}${CmpdCount}";
 375   }
 376   elsif ($OptionsInfo{CompoundIDMode} =~ /^LabelPrefix$/i) {
 377     $CmpdID = "$OptionsInfo{CompoundID}${CmpdCount}";
 378   }
 379   elsif ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i) {
 380     my($SpecifiedDataField);
 381     $SpecifiedDataField = $OptionsInfo{CompoundID};
 382     $CmpdID = exists $DataFieldLabelAndValuesRef->{$SpecifiedDataField} ? $DataFieldLabelAndValuesRef->{$SpecifiedDataField} : '';
 383   }
 384   elsif ($OptionsInfo{CompoundIDMode} =~ /^MolName$/i) {
 385     $CmpdID = $Molecule->GetName();
 386   }
 387   return $CmpdID;
 388 }
 389 
 390 # Calculate physicochemical properties for molecule...
 391 #
 392 sub CalculateMoleculeProperties {
 393   my($MolecularDescriptorsGenerator, $Molecule) = @_;
 394   my($PropertyName, $PropertyValue, $MolecularDescriptorsObject, %CalculatedPhysicochemicalProperties);
 395 
 396   %CalculatedPhysicochemicalProperties = ();
 397 
 398   if ($OptionsInfo{KeepLargestComponent}) {
 399     $Molecule->KeepLargestComponent();
 400   }
 401 
 402   if (!$Molecule->DetectRings()) {
 403     return undef;
 404   }
 405   $Molecule->SetAromaticityModel($OptionsInfo{AromaticityModel});
 406   $Molecule->DetectAromaticity();
 407 
 408   if ($OptionsInfo{AddHydrogens}) {
 409     $Molecule->AddHydrogens();
 410   }
 411 
 412   # Calculate physicochemical properties...
 413   $MolecularDescriptorsGenerator->SetMolecule($Molecule);
 414   $MolecularDescriptorsGenerator->GenerateDescriptors();
 415 
 416   if (!$MolecularDescriptorsGenerator->IsDescriptorsGenerationSuccessful()) {
 417     return undef;
 418   }
 419 
 420   %CalculatedPhysicochemicalProperties = $MolecularDescriptorsGenerator->GetDescriptorNamesAndValues();
 421 
 422   # Count RuleOf3 violations...
 423   if ($OptionsInfo{RuleOf3Violations}) {
 424     CalculateRuleViolationsCount('RuleOf3Violations', \%CalculatedPhysicochemicalProperties);
 425   }
 426 
 427   # Count RuleOf5 violations...
 428   if ($OptionsInfo{RuleOf5Violations}) {
 429     CalculateRuleViolationsCount('RuleOf5Violations', \%CalculatedPhysicochemicalProperties);
 430   }
 431 
 432   return \%CalculatedPhysicochemicalProperties;
 433 }
 434 
 435 # Setup molecular descriptor generator to calculate property values for specifed
 436 # property names...
 437 #
 438 sub SetupMolecularDescriptorsGenerator {
 439   my($PropertyName, $MolecularDescriptorsGenerator);
 440 
 441   $MolecularDescriptorsGenerator = new MolecularDescriptors::MolecularDescriptorsGenerator('Mode' => 'Specify', 'DescriptorNames' => \@{$OptionsInfo{SpecifiedPropertyNames}});
 442 
 443   # Setup molecular desciptor calculation parameters...
 444   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('MolecularWeight')}) || exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('ExactMass')}) ) {
 445     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'WeightAndMassDescriptors', %{$OptionsInfo{PrecisionParametersMap}});
 446   }
 447 
 448   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('RotatableBonds')})) {
 449     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'RotatableBondsDescriptors', %{$OptionsInfo{RotatableBondsParametersMap}});
 450   }
 451 
 452   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('HydrogenBondDonors')}) || exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('HydrogenBondAcceptors')}) ) {
 453     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'HydrogenBondsDescriptors', 'HydrogenBondsType' => $OptionsInfo{HydrogenBonds});
 454   }
 455 
 456   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('TPSA')})) {
 457     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'TPSADescriptors', %{$OptionsInfo{TPSAParametersMap}});
 458   }
 459 
 460   if (exists($OptionsInfo{SpecifiedPropertyNamesMap}{lc('MolecularComplexity')})) {
 461     $MolecularDescriptorsGenerator->SetDescriptorClassParameters('DescriptorClassName' => 'MolecularComplexityDescriptors', %{$OptionsInfo{MolecularComplexityParametersMap}});
 462   }
 463 
 464   return $MolecularDescriptorsGenerator;
 465 }
 466 
 467 # Calculate RuleOf3 or RuleOf5 violations count...
 468 #
 469 sub CalculateRuleViolationsCount {
 470   my($RuleViolationsType, $CalculatedPropertiesMapRef) = @_;
 471   my($RuleViolationsCount, $PropertyName);
 472 
 473   $RuleViolationsCount = 0;
 474 
 475   RULEVIOLATIONSTYPE: {
 476     if ($RuleViolationsType =~ /^RuleOf3Violations$/i) {
 477       for $PropertyName (@{$OptionsInfo{RuleOf3PropertyNames}}) {
 478         if ($CalculatedPropertiesMapRef->{$PropertyName} > $OptionsInfo{RuleOf3MaxPropertyValuesMap}{$PropertyName}) {
 479           $RuleViolationsCount++;
 480         }
 481       }
 482       last RULEVIOLATIONSTYPE;
 483     }
 484 
 485     if ($RuleViolationsType =~ /^RuleOf5Violations$/i) {
 486       for $PropertyName (@{$OptionsInfo{RuleOf5PropertyNames}}) {
 487         if ($CalculatedPropertiesMapRef->{$PropertyName} > $OptionsInfo{RuleOf5MaxPropertyValuesMap}{$PropertyName}) {
 488           $RuleViolationsCount++;
 489         }
 490       }
 491       last RULEVIOLATIONSTYPE;
 492     }
 493 
 494     die "Warning: Unknown rule violation type: $RuleViolationsType...";
 495   }
 496 
 497   # Set rule violation count...
 498   $CalculatedPropertiesMapRef->{$RuleViolationsType} = $RuleViolationsCount;
 499 
 500 }
 501 
 502 # Retrieve information about SD files...
 503 #
 504 sub RetrieveSDFilesInfo {
 505   my($SDFile, $Index, $FileDir, $FileExt, $FileName, $OutFileRoot, $TextOutFileExt, $SDOutFileExt, $NewSDFileName, $NewTextFileName, $CheckDataField, $CollectDataFields, $AllDataFieldsRef, $CommonDataFieldsRef);
 506 
 507   %SDFilesInfo = ();
 508   @{$SDFilesInfo{FileOkay}} = ();
 509   @{$SDFilesInfo{OutFileRoot}} = ();
 510   @{$SDFilesInfo{SDOutFileNames}} = ();
 511   @{$SDFilesInfo{TextOutFileNames}} = ();
 512   @{$SDFilesInfo{AllDataFieldsRef}} = ();
 513   @{$SDFilesInfo{CommonDataFieldsRef}} = ();
 514 
 515   $CheckDataField = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^CompoundID$/i) && ($OptionsInfo{CompoundIDMode} =~ /^DataField$/i)) ? 1 : 0;
 516   $CollectDataFields = ($OptionsInfo{TextOutput} && ($OptionsInfo{DataFieldsMode} =~ /^(All|Common)$/i)) ? 1 : 0;
 517 
 518   FILELIST: for $Index (0 .. $#SDFilesList) {
 519     $SDFile = $SDFilesList[$Index];
 520 
 521     $SDFilesInfo{FileOkay}[$Index] = 0;
 522     $SDFilesInfo{OutFileRoot}[$Index] = '';
 523     $SDFilesInfo{SDOutFileNames}[$Index] = '';
 524     $SDFilesInfo{TextOutFileNames}[$Index] = '';
 525 
 526     $SDFile = $SDFilesList[$Index];
 527     if (!(-e $SDFile)) {
 528       warn "Warning: Ignoring file $SDFile: It doesn't exist\n";
 529       next FILELIST;
 530     }
 531     if (!CheckFileType($SDFile, "sd sdf")) {
 532       warn "Warning: Ignoring file $SDFile: It's not a SD file\n";
 533       next FILELIST;
 534     }
 535 
 536     if ($CheckDataField) {
 537       # Make sure data field exists in SD file..
 538       my($CmpdString, $SpecifiedDataField, @CmpdLines, %DataFieldValues);
 539 
 540       @CmpdLines = ();
 541       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 542       $CmpdString = ReadCmpdString(\*SDFILE);
 543       close SDFILE;
 544       @CmpdLines = split "\n", $CmpdString;
 545       %DataFieldValues = GetCmpdDataHeaderLabelsAndValues(\@CmpdLines);
 546       $SpecifiedDataField = $OptionsInfo{CompoundID};
 547       if (!exists $DataFieldValues{$SpecifiedDataField}) {
 548         warn "Warning: Ignoring file $SDFile: Data field value, $SpecifiedDataField, using  \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\" doesn't exist\n";
 549         next FILELIST;
 550       }
 551     }
 552 
 553     $AllDataFieldsRef = '';
 554     $CommonDataFieldsRef = '';
 555     if ($CollectDataFields) {
 556       my($CmpdCount);
 557       open SDFILE, "$SDFile" or die "Error: Couldn't open $SDFile: $! \n";
 558       ($CmpdCount, $AllDataFieldsRef, $CommonDataFieldsRef) = GetAllAndCommonCmpdDataHeaderLabels(\*SDFILE);
 559       close SDFILE;
 560     }
 561 
 562     # Setup output file names...
 563     $FileDir = ""; $FileName = ""; $FileExt = "";
 564     ($FileDir, $FileName, $FileExt) = ParseFileName($SDFile);
 565 
 566     $TextOutFileExt = "csv";
 567     if ($Options{outdelim} =~ /^tab$/i) {
 568       $TextOutFileExt = "tsv";
 569     }
 570     $SDOutFileExt = $FileExt;
 571 
 572     if ($OptionsInfo{OutFileRoot} && (@SDFilesList == 1)) {
 573       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 574       if ($RootFileName && $RootFileExt) {
 575         $FileName = $RootFileName;
 576       }
 577       else {
 578         $FileName = $OptionsInfo{OutFileRoot};
 579       }
 580       $OutFileRoot = $FileName;
 581     }
 582     else {
 583       $OutFileRoot = "${FileName}PhysicochemicalProperties";
 584     }
 585 
 586     $NewSDFileName = "${OutFileRoot}.${SDOutFileExt}";
 587     $NewTextFileName = "${OutFileRoot}.${TextOutFileExt}";
 588 
 589     if ($OptionsInfo{SDOutput}) {
 590       if ($SDFile =~ /$NewSDFileName/i) {
 591         warn "Warning: Ignoring input file $SDFile: Same output, $NewSDFileName, and input file names.\n";
 592         print "Specify a different name using \"-r --root\" option or use default name.\n";
 593         next FILELIST;
 594       }
 595     }
 596 
 597     if (!$OptionsInfo{OverwriteFiles}) {
 598       # Check SD and text outout files...
 599       if ($OptionsInfo{SDOutput}) {
 600         if (-e $NewSDFileName) {
 601           warn "Warning: Ignoring file $SDFile: The file $NewSDFileName already exists\n";
 602           next FILELIST;
 603         }
 604       }
 605       if ($OptionsInfo{TextOutput}) {
 606         if (-e $NewTextFileName) {
 607           warn "Warning: Ignoring file $SDFile: The file $NewTextFileName already exists\n";
 608           next FILELIST;
 609         }
 610       }
 611     }
 612 
 613     $SDFilesInfo{FileOkay}[$Index] = 1;
 614 
 615     $SDFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 616     $SDFilesInfo{SDOutFileNames}[$Index] = $NewSDFileName;
 617     $SDFilesInfo{TextOutFileNames}[$Index] = $NewTextFileName;
 618 
 619     $SDFilesInfo{AllDataFieldsRef}[$Index] = $AllDataFieldsRef;
 620     $SDFilesInfo{CommonDataFieldsRef}[$Index] = $CommonDataFieldsRef;
 621   }
 622 }
 623 
 624 # Process option values...
 625 sub ProcessOptions {
 626   %OptionsInfo = ();
 627 
 628   $OptionsInfo{AromaticityModel} = $Options{aromaticitymodel};
 629 
 630   # Process property name related options...
 631   ProcessPropertyNamesOption();
 632 
 633   # Setup RuleOf3 and RuleOf5 violation calculations...
 634   $OptionsInfo{RuleOf3Violations} = ($Options{ruleof3violations} =~ /^Yes$/i) ? 1 : 0;
 635   $OptionsInfo{RuleOf5Violations} = ($Options{ruleof5violations} =~ /^Yes$/i) ? 1 : 0;
 636 
 637   $OptionsInfo{CompoundIDMode} = $Options{compoundidmode};
 638   $OptionsInfo{CompoundIDLabel} = $Options{compoundidlabel};
 639   $OptionsInfo{DataFieldsMode} = $Options{datafieldsmode};
 640 
 641   my(@SpecifiedDataFields);
 642   @SpecifiedDataFields = ();
 643 
 644   @{$OptionsInfo{SpecifiedDataFields}} = ();
 645   $OptionsInfo{CompoundID} = '';
 646 
 647   if ($Options{datafieldsmode} =~ /^CompoundID$/i) {
 648     if ($Options{compoundidmode} =~ /^DataField$/i) {
 649       if (!$Options{compoundid}) {
 650         die "Error: You must specify a value for \"--CompoundID\" option in \"DataField\" \"--CompoundIDMode\". \n";
 651       }
 652       $OptionsInfo{CompoundID} = $Options{compoundid};
 653     }
 654     elsif ($Options{compoundidmode} =~ /^(LabelPrefix|MolNameOrLabelPrefix)$/i) {
 655       $OptionsInfo{CompoundID} = $Options{compoundid} ? $Options{compoundid} : 'Cmpd';
 656     }
 657   }
 658   elsif ($Options{datafieldsmode} =~ /^Specify$/i) {
 659     if (!$Options{datafields}) {
 660       die "Error: You must specify a value for \"--DataFields\" option in \"Specify\" \"-d, --DataFieldsMode\". \n";
 661     }
 662     @SpecifiedDataFields = split /\,/, $Options{datafields};
 663     push @{$OptionsInfo{SpecifiedDataFields}}, @SpecifiedDataFields;
 664   }
 665 
 666   # Types of hydrogen bonds...
 667   $OptionsInfo{HydrogenBonds} = $Options{hydrogenbonds};
 668 
 669   # Process precision value parameters...
 670   ProcessPrecisionOption();
 671 
 672   # Process rotatable bonds parameters...
 673   ProcessRotatableBondsOption();
 674 
 675   # Process TPSA parameters...
 676   ProcessTPSAOption();
 677 
 678   # Process molecular complexity parameters...
 679   ProcessMolecularComplexityOption();
 680 
 681   $OptionsInfo{Filter} = ($Options{filter} =~ /^Yes$/i) ? 1 : 0;
 682 
 683   $OptionsInfo{KeepLargestComponent} = ($Options{keeplargestcomponent} =~ /^Yes$/i) ? 1 : 0;
 684 
 685   $OptionsInfo{Output} = $Options{output};
 686   $OptionsInfo{SDOutput} = ($Options{output} =~ /^(SD|Both)$/i) ? 1 : 0;
 687   $OptionsInfo{TextOutput} = ($Options{output} =~ /^(Text|Both)$/i) ? 1 : 0;
 688 
 689   $OptionsInfo{OutDelim} = ($Options{outdelim} =~ /tab/i ) ? "\t" : (($Options{outdelim} =~ /semicolon/i) ? "\;" : "\,");
 690   $OptionsInfo{OutQuote} = ($Options{quote} =~ /^Yes$/i) ? 1 : 0;
 691 
 692   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 693   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 694 }
 695 
 696 # Process property name related options...
 697 #
 698 sub ProcessPropertyNamesOption {
 699 
 700   # Setup supported physicochemical properties...
 701   my($SupportedProperty);
 702 
 703   @{$OptionsInfo{SupportedPropertyNames}} = ();
 704   %{$OptionsInfo{SupportedPropertyNamesMap}} = ();
 705 
 706   @{$OptionsInfo{RuleOf5PropertyNames}} = ();
 707   %{$OptionsInfo{RuleOf5MaxPropertyValuesMap}} = ();
 708 
 709   @{$OptionsInfo{RuleOf3PropertyNames}} = ();
 710   %{$OptionsInfo{RuleOf3MaxPropertyValuesMap}} = ();
 711 
 712   @{$OptionsInfo{DefaultPropertyNames}} = ();
 713 
 714   @{$OptionsInfo{SupportedPropertyNames}} = qw(MolecularWeight ExactMass HeavyAtoms Rings AromaticRings MolecularVolume RotatableBonds HydrogenBondDonors HydrogenBondAcceptors SLogP SMR TPSA Fsp3Carbons Sp3Carbons MolecularComplexity);
 715 
 716   @{$OptionsInfo{RuleOf5PropertyNames}} = qw(MolecularWeight HydrogenBondDonors HydrogenBondAcceptors SLogP);
 717   %{$OptionsInfo{RuleOf5MaxPropertyValuesMap}} = ('MolecularWeight' => 500, 'HydrogenBondDonors' => 5, 'HydrogenBondAcceptors' => 10,  'SLogP' => 5);
 718 
 719   @{$OptionsInfo{RuleOf3PropertyNames}} = qw(MolecularWeight RotatableBonds HydrogenBondDonors HydrogenBondAcceptors SLogP TPSA);
 720   %{$OptionsInfo{RuleOf3MaxPropertyValuesMap}} = ('MolecularWeight' => 300, 'RotatableBonds' => 3, 'HydrogenBondDonors' => 3, 'HydrogenBondAcceptors' => 3, 'SLogP' => 3, 'TPSA' => 60);
 721 
 722   @{$OptionsInfo{DefaultPropertyNames}} = qw(MolecularWeight HeavyAtoms MolecularVolume RotatableBonds HydrogenBondDonors HydrogenBondAcceptors SLogP TPSA);
 723 
 724   for $SupportedProperty (@{$OptionsInfo{SupportedPropertyNames}}) {
 725     $OptionsInfo{SupportedPropertyNamesMap}{lc($SupportedProperty)} = $SupportedProperty;
 726   }
 727 
 728   # Process specified properties....
 729   my($SpecifiedPropertyName, @SpecifiedPropertyNames, %SpecifiedPropertyNamesMap);
 730 
 731   @SpecifiedPropertyNames = ();
 732   %SpecifiedPropertyNamesMap = ();
 733 
 734   @{$OptionsInfo{SpecifiedPropertyNames}} = ();
 735   %{$OptionsInfo{SpecifiedPropertyNamesMap}} = ();
 736 
 737   if ($Options{mode} =~ /^All$/i) {
 738     @SpecifiedPropertyNames = @{$OptionsInfo{SupportedPropertyNames}};
 739   }
 740   elsif ($Options{mode} =~ /^RuleOf5$/i) {
 741     @SpecifiedPropertyNames = @{$OptionsInfo{RuleOf5PropertyNames}};
 742   }
 743   elsif ($Options{mode} =~ /^RuleOf3$/i) {
 744     @SpecifiedPropertyNames = @{$OptionsInfo{RuleOf3PropertyNames}};
 745   }
 746   elsif (IsEmpty($Options{mode})) {
 747     @SpecifiedPropertyNames = @{$OptionsInfo{DefaultPropertyNames}};
 748   }
 749   else {
 750     # Comma delimited lisr of specified property names...
 751     my($Mode, $PropertyName, @PropertyNames, @UnsupportedPropertyNames);
 752 
 753     $Mode = $Options{mode};
 754     $Mode =~ s/ //g;
 755 
 756     @PropertyNames = split ",", $Mode;
 757     @UnsupportedPropertyNames = ();
 758 
 759     for $PropertyName (@PropertyNames) {
 760       if (exists($OptionsInfo{SupportedPropertyNamesMap}{lc($PropertyName)})) {
 761         push @SpecifiedPropertyNames, $PropertyName;
 762       }
 763       else {
 764         push @UnsupportedPropertyNames, $PropertyName;
 765       }
 766     }
 767     if (@UnsupportedPropertyNames) {
 768       if (@UnsupportedPropertyNames > 1) {
 769         warn "Error: The physicochemical property names specified - ", JoinWords(\@UnsupportedPropertyNames, ", ", 0)," - for option \"-m --mode\" are not valid.\n";
 770       }
 771       else {
 772         warn "Error: The physicochemical property name specified, @UnsupportedPropertyNames , for option \"-m --mode\" is not valid.\n";
 773       }
 774       die "Allowed values:", JoinWords(\@{$OptionsInfo{SupportedPropertyNames}}, ", ", 0), "\n";
 775     }
 776     if (!@SpecifiedPropertyNames) {
 777       die "Error: No valid physicochemical property names specified for option \"-m --mode\".\n";
 778     }
 779   }
 780 
 781   # Set up specified property names map...
 782   PROPERTY: for $SpecifiedPropertyName (@SpecifiedPropertyNames) {
 783     if (exists $SpecifiedPropertyNamesMap{lc($SpecifiedPropertyName)}) {
 784       warn "Warning: The physicochemical property name, $SpecifiedPropertyName, is specified multiple times as value of option \"-m --mode\" .\n";
 785       next PROPERTY;
 786     }
 787     # Canonical specified property name...
 788     $SpecifiedPropertyNamesMap{lc($SpecifiedPropertyName)} = $OptionsInfo{SupportedPropertyNamesMap}{lc($SpecifiedPropertyName)};
 789   }
 790 
 791   # Make sure for calculation of  RuleOf3Violations, all appropriate property names are specified...
 792   if ($Options{ruleof3violations} =~ /^Yes$/i && $Options{mode} =~ /^RuleOf5$/i) {
 793     die "Error: The value specified, $Options{ruleof3violations}, for  \"--RuleOf3Violations\" option in \"RuleOf5\" \"-m --Mode\" is not valid. You must specify RuleOf3 value for \"-m --Mode\" to calculate RuleOf3 violations.\n";
 794   }
 795 
 796   if ($Options{ruleof3violations} =~ /^Yes$/i) {
 797     my($RuleOf3PropertyName, @MissingRuleOf3Names);
 798 
 799     @MissingRuleOf3Names = ();
 800     PROPERTY: for $RuleOf3PropertyName (@{$OptionsInfo{RuleOf3PropertyNames}}) {
 801       if (exists $SpecifiedPropertyNamesMap{lc($RuleOf3PropertyName)}) {
 802         next PROPERTY;
 803       }
 804       push @MissingRuleOf3Names, $RuleOf3PropertyName;
 805 
 806       # Add property name to specified properties names list and map...
 807       push @SpecifiedPropertyNames, $RuleOf3PropertyName;
 808       $SpecifiedPropertyNamesMap{lc($RuleOf3PropertyName)} = $OptionsInfo{SupportedPropertyNamesMap}{lc($RuleOf3PropertyName)};
 809     }
 810     if (@MissingRuleOf3Names) {
 811       warn "Warning: The following physicochemical property names not specified in \"-m --Mode\" option are required for calculating RuleOf3Violations and have been added to the list of property names: @MissingRuleOf3Names\n";
 812     }
 813   }
 814 
 815   # Make sure for calculation of  RuleOf5Violations, all appropriate property names are specified...
 816   if ($Options{ruleof5violations} =~ /^Yes$/i && $Options{mode} =~ /^RuleOf3$/i) {
 817     die "Error: The value specified, $Options{ruleof5violations}, for  \"--RuleOf5Violations\" option in \"RuleOf3\" \"-m --Mode\" is not valid. You must specify RuleOf5 value for \"-m --Mode\" to calculate RuleOf5 violations.\n";
 818   }
 819 
 820   if ($Options{ruleof5violations} =~ /^Yes$/i) {
 821     my($RuleOf5PropertyName, @MissingRuleOf5Names);
 822 
 823     @MissingRuleOf5Names = ();
 824     PROPERTY: for $RuleOf5PropertyName (@{$OptionsInfo{RuleOf5PropertyNames}}) {
 825       if (exists $SpecifiedPropertyNamesMap{lc($RuleOf5PropertyName)}) {
 826         next PROPERTY;
 827       }
 828       push @MissingRuleOf5Names, $RuleOf5PropertyName;
 829 
 830       # Add property name to specified properties names list and map...
 831       push @SpecifiedPropertyNames, $RuleOf5PropertyName;
 832       $SpecifiedPropertyNamesMap{lc($RuleOf5PropertyName)} = $OptionsInfo{SupportedPropertyNamesMap}{lc($RuleOf5PropertyName)};
 833     }
 834     if (@MissingRuleOf5Names) {
 835       warn "Warning: The following physicochemical property names not specified in \"-m --Mode\" option are required for calculating RuleOf5Violations and have been added to the list of property names: @MissingRuleOf5Names\n";
 836     }
 837   }
 838   $OptionsInfo{Mode} = $Options{mode};
 839 
 840   # Setup canonical specified property names corresponding to supported names in mixed case...
 841   my(@SpecifiedCanonicalPropertyNames);
 842 
 843   @SpecifiedCanonicalPropertyNames = ();
 844   for $SpecifiedPropertyName (@SpecifiedPropertyNames) {
 845     push @SpecifiedCanonicalPropertyNames, $SpecifiedPropertyNamesMap{lc($SpecifiedPropertyName)};
 846   }
 847   @{$OptionsInfo{SpecifiedPropertyNames}} = @SpecifiedCanonicalPropertyNames;
 848   %{$OptionsInfo{SpecifiedPropertyNamesMap}} = %SpecifiedPropertyNamesMap;
 849 
 850   # Based on specified property names, figure out whether hydrogens need to be added before
 851   # calculation of properties...
 852   #
 853   $OptionsInfo{AddHydrogens} = 0;
 854   if (exists($SpecifiedPropertyNamesMap{lc('MolecularVolume')}) || exists($SpecifiedPropertyNamesMap{lc('SLogP')}) || exists($SpecifiedPropertyNamesMap{lc('SMR')})) {
 855     $OptionsInfo{AddHydrogens} = 1;
 856   }
 857 }
 858 
 859 # Process precision option...
 860 #
 861 sub ProcessPrecisionOption {
 862   my($ParameterName, $ParameterValue, %PrecisionParametersMap, %PrecisionParameterNamesMap);
 863 
 864   %{$OptionsInfo{PrecisionParametersMap}} = ();
 865 
 866   %PrecisionParametersMap = ('WeightPrecision' => 2, 'MassPrecision' => 4);
 867   %PrecisionParameterNamesMap = ('molecularweight' => 'WeightPrecision', 'exactmass' => 'MassPrecision');
 868 
 869   if ($Options{precision}) {
 870     # Process specified values...
 871     my($Index, $SpecifiedPrecision, @SpecifiedPrecisionValuePairs);
 872 
 873     $SpecifiedPrecision = $Options{precision};
 874     $SpecifiedPrecision =~ s/ //g;
 875     @SpecifiedPrecisionValuePairs = split ",", $SpecifiedPrecision;
 876     if (@SpecifiedPrecisionValuePairs % 2) {
 877       die "Error: Invalid number of values specified using \"--Precision\" option: It must contain even number of values.\n";
 878     }
 879     for ($Index = 0; (($Index + 1) < @SpecifiedPrecisionValuePairs); $Index += 2 ) {
 880       $ParameterName = $SpecifiedPrecisionValuePairs[$Index];
 881       $ParameterValue = $SpecifiedPrecisionValuePairs[$Index + 1];
 882       if (!exists $PrecisionParameterNamesMap{lc($ParameterName)}) {
 883         die "Error: The precision parameter name specified, $ParameterName, for option \"--Precision\" is not valid.\n";
 884       }
 885       if (!IsPositiveInteger($ParameterValue)) {
 886         die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--Precision\" is not valid. Allowed values: positive integer. \n";
 887       }
 888       $ParameterName = $PrecisionParameterNamesMap{lc($ParameterName)};
 889       $PrecisionParametersMap{$ParameterName} = $ParameterValue;
 890     }
 891   }
 892   $OptionsInfo{Precision} = $Options{precision};
 893   %{$OptionsInfo{PrecisionParametersMap}} = %PrecisionParametersMap;
 894 }
 895 
 896 # Process rotatable bonds option...
 897 sub ProcessRotatableBondsOption {
 898   my($ParameterName, $ParameterValue, %RotatableBondsParametersMap, %RotatableBondsParameterNamesMap);
 899 
 900   %{$OptionsInfo{RotatableBondsParametersMap}} = ();
 901   %RotatableBondsParametersMap = ('IgnoreTerminalBonds' => 1, 'IgnoreBondsToTripleBonds' => 1, 'IgnoreAmideBonds' => 1, 'IgnoreThioamideBonds' => 1, 'IgnoreSulfonamideBonds' => 1);
 902 
 903   for $ParameterName (keys %RotatableBondsParametersMap) {
 904     $RotatableBondsParameterNamesMap{lc($ParameterName)} = $ParameterName;
 905   }
 906 
 907   if ($Options{rotatablebonds}) {
 908     # Process specified values...
 909     my($Index, $SpecifiedRotatableBonds, @SpecifiedRotatableBondsValuePairs);
 910 
 911     $SpecifiedRotatableBonds = $Options{rotatablebonds};
 912     $SpecifiedRotatableBonds =~ s/ //g;
 913     @SpecifiedRotatableBondsValuePairs = split ",", $SpecifiedRotatableBonds;
 914     if (@SpecifiedRotatableBondsValuePairs % 2) {
 915       die "Error: Invalid number of values specified using \"--RotatableBonds\" option: It must contain even number of values.\n";
 916     }
 917     for ($Index = 0; (($Index + 1) < @SpecifiedRotatableBondsValuePairs); $Index += 2 ) {
 918       $ParameterName = $SpecifiedRotatableBondsValuePairs[$Index];
 919       $ParameterValue = $SpecifiedRotatableBondsValuePairs[$Index + 1];
 920       if (!exists $RotatableBondsParameterNamesMap{lc($ParameterName)}) {
 921         die "Error: The rotatable bonds parameter name specified, $ParameterName, for option \"--RotatableBonds\" is not valid.\n";
 922       }
 923       if ($ParameterValue !~ /^(Yes|No)$/i) {
 924         die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--RotatableBonds\" is not valid. Allowed values: Yes or No. \n";
 925       }
 926       $ParameterName = $RotatableBondsParameterNamesMap{lc($ParameterName)};
 927       $ParameterValue = ($ParameterValue =~ /^Yes$/i) ? 1 : 0;
 928       $RotatableBondsParametersMap{$ParameterName} = $ParameterValue;
 929     }
 930   }
 931   $OptionsInfo{RotatableBonds} = $Options{rotatablebonds};
 932   %{$OptionsInfo{RotatableBondsParametersMap}} = %RotatableBondsParametersMap;
 933 }
 934 
 935 # Process TPSA option...
 936 #
 937 sub ProcessTPSAOption {
 938   my($ParameterName, $ParameterValue, %TPSAParametersMap, %TPSAParameterNamesMap);
 939 
 940   %{$OptionsInfo{TPSAParametersMap}} = ();
 941 
 942   %TPSAParametersMap = ('IgnorePhosphorus' => 1, 'IgnoreSulfur' => 1);
 943   for $ParameterName (keys %TPSAParametersMap) {
 944     $TPSAParameterNamesMap{lc($ParameterName)} = $ParameterName;
 945   }
 946 
 947   if ($Options{tpsa}) {
 948     # Process specified values...
 949     my($Index, $SpecifiedTPSA, @SpecifiedTPSAValuePairs);
 950 
 951     $SpecifiedTPSA = $Options{tpsa};
 952     $SpecifiedTPSA =~ s/ //g;
 953     @SpecifiedTPSAValuePairs = split ",", $SpecifiedTPSA;
 954     if (@SpecifiedTPSAValuePairs % 2) {
 955       die "Error: Invalid number of values specified using \"--TPSA\" option: It must contain even number of values.\n";
 956     }
 957     for ($Index = 0; (($Index + 1) < @SpecifiedTPSAValuePairs); $Index += 2 ) {
 958       $ParameterName = $SpecifiedTPSAValuePairs[$Index];
 959       $ParameterValue = $SpecifiedTPSAValuePairs[$Index + 1];
 960       if (!exists $TPSAParameterNamesMap{lc($ParameterName)}) {
 961         die "Error: The TPSA parameter name specified, $ParameterName, for option \"--TPSA\" is not valid.\n";
 962       }
 963       if ($ParameterValue !~ /^(Yes|No)$/i) {
 964         die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--TPSA\" is not valid. Allowed values: Yes or No. \n";
 965       }
 966       $ParameterName = $TPSAParameterNamesMap{lc($ParameterName)};
 967       $ParameterValue = ($ParameterValue =~ /^Yes$/i) ? 1 : 0;
 968       $TPSAParametersMap{$ParameterName} = $ParameterValue;
 969     }
 970   }
 971   $OptionsInfo{TPSA} = $Options{tpsa};
 972   %{$OptionsInfo{TPSAParametersMap}} = %TPSAParametersMap;
 973 }
 974 
 975 # Process molecular complexity parameters...
 976 #
 977 sub ProcessMolecularComplexityOption {
 978   my($MolecularComplexityType, $ParameterName, $ParameterValue, @ParameterNames, @ParameterValues, @AtomIdentifierTypeParameters, %ComplexityParametersMap, %ComplexityParameterNamesMap);
 979 
 980   %{$OptionsInfo{MolecularComplexityParametersMap}} = ();
 981 
 982   %ComplexityParametersMap = ('MolecularComplexityType' => '', 'AtomIdentifierType' => '',
 983                               'AtomicInvariantsToUse' => '', 'FunctionalClassesToUse' => '',
 984                               'MACCSKeysSize' => '166', 'NeighborhoodRadius' => '2',
 985                               'MinPathLength' => '1', 'MaxPathLength' => '8', 'UseBondSymbols' => '1',
 986                               'MinDistance' => '1', 'MaxDistance' => '10', 'UseTriangleInequality' => '',
 987                               'DistanceBinSize' => '2', 'NormalizationMethodology' => 'None');
 988 
 989   %ComplexityParameterNamesMap = ();
 990   for $ParameterName (keys %ComplexityParametersMap) {
 991     $ComplexityParameterNamesMap{lc($ParameterName)} = $ParameterName;
 992   }
 993 
 994   if ($Options{molecularcomplexity}) {
 995     # Process specified values...
 996     my($Index, $SpecifiedComplexity, @SpecifiedComplexityValuePairs);
 997 
 998     $SpecifiedComplexity = $Options{molecularcomplexity};
 999 
1000     @SpecifiedComplexityValuePairs = split ",", $SpecifiedComplexity;
1001     if (@SpecifiedComplexityValuePairs % 2) {
1002       die "Error: Invalid number of values specified using \"--MolecularComplexity\" option: It must contain even number of values.\n";
1003     }
1004 
1005     for ($Index = 0; (($Index + 1) < @SpecifiedComplexityValuePairs); $Index += 2 ) {
1006       $ParameterName = $SpecifiedComplexityValuePairs[$Index];
1007       $ParameterValue = $SpecifiedComplexityValuePairs[$Index + 1];
1008 
1009       $ParameterName = RemoveLeadingAndTrailingWhiteSpaces($ParameterName);
1010       $ParameterValue = RemoveLeadingAndTrailingWhiteSpaces($ParameterValue);
1011 
1012       if (!exists $ComplexityParameterNamesMap{lc($ParameterName)}) {
1013         die "Error: The molecular complexity parameter name specified, $ParameterName, for option \"--MolecularComplexity\" is not valid.\n";
1014       }
1015       $ParameterName = $ComplexityParameterNamesMap{lc($ParameterName)};
1016 
1017       if ($ParameterName =~ /^AtomicInvariantsToUse$/i) {
1018         my($AtomSymbolFound);
1019 
1020         $AtomSymbolFound = 0;
1021         @ParameterValues = split(' ', $ParameterValue);
1022         for $ParameterValue (@ParameterValues) {
1023           if (!AtomTypes::AtomicInvariantsAtomTypes::IsAtomicInvariantAvailable($ParameterValue)) {
1024             die "Error: The atomic invariant specified, $ParameterValue, for  AtomicInvariantsToUse in option \"--MolecularComplexity\" is not valid.\n";
1025           }
1026           if ($ParameterValue =~ /^(AS|AtomSymbol)$/i) {
1027             $AtomSymbolFound = 1;
1028           }
1029         }
1030         if (!$AtomSymbolFound) {
1031           die "Error: The atomic invariants specified using AtomicInvariantsToUse in option \"--MolecularComplexity\" is not valid: AtomicInvariant atom symbol, AS or AtomSymbol, must be specified.\n";
1032         }
1033         $ParameterValue = JoinWords(\@ParameterValues, ",", 0);
1034       }
1035       elsif ($ParameterName =~ /^FunctionalClassesToUse$/i) {
1036         @ParameterValues = split(' ', $ParameterValue);
1037         for $ParameterValue (@ParameterValues) {
1038           if (!AtomTypes::FunctionalClassAtomTypes::IsFunctionalClassAvailable($ParameterValue)) {
1039             die "Error: The functional class specified, $ParameterValue, for  FunctionalClassesToUse in option \"--MolecularComplexity\" is not valid.\n";
1040           }
1041         }
1042         $ParameterValue = JoinWords(\@ParameterValues, ",", 0);
1043       }
1044       else {
1045         if ($ParameterValue =~ / /) {
1046           $ParameterValue =~ s/ //g;
1047         }
1048         if ($ParameterValue =~ /^(Yes|No)$/i) {
1049           $ParameterValue = ($ParameterValue =~ /^Yes$/i) ? 1 : 0;
1050         }
1051       }
1052 
1053       if ($ParameterName =~ /^MolecularComplexityType$/i) {
1054         if ($ParameterValue !~ /^(AtomTypesFingerprints|ExtendedConnectivityFingerprints|MACCSKeys|PathLengthFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints|TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
1055           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: AtomTypesFingerprints, ExtendedConnectivityFingerprints, MACCSKeys, PathLengthFingerprints, TopologicalAtomPairsFingerprints, TopologicalAtomTripletsFingerprints, TopologicalAtomTorsionsFingerprints, TopologicalPharmacophoreAtomPairsFingerprints, or TopologicalPharmacophoreAtomTripletsFingerprints..\n";
1056         }
1057       }
1058       elsif ($ParameterName =~ /^AtomIdentifierType$/i) {
1059         if ($ParameterValue !~ /^(AtomicInvariantsAtomTypes|FunctionalClassAtomTypes|DREIDINGAtomTypes|EStateAtomTypes|MMFF94AtomTypes|SLogPAtomTypes|SYBYLAtomTypes|TPSAAtomTypes|UFFAtomTypes)$/i) {
1060           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Supported atom identifier types in current release of MayaChemTools: AtomicInvariantsAtomTypes, FunctionalClassAtomTypes, DREIDINGAtomTypes, EStateAtomTypes, MMFF94AtomTypes, SLogPAtomTypes, SYBYLAtomTypes, TPSAAtomTypes and UFFAtomTypes.\n";
1061         }
1062       }
1063       elsif ($ParameterName =~ /^(MACCSKeysSize|MinPathLength|MaxPathLength|MinDistance|MaxDistance|DistanceBinSize)$/i) {
1064         if (!IsPositiveInteger($ParameterValue)) {
1065           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: positive integer. \n";
1066         }
1067       }
1068       elsif ($ParameterName =~ /^NeighborhoodRadius$/i) {
1069         if (!(IsInteger($ParameterValue) && $ParameterValue >=0)) {
1070           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: 0 or positive integer. \n";
1071         }
1072       }
1073       elsif ($ParameterName =~ /^NormalizationMethodology$/i) {
1074         if ($ParameterValue !~ /^(None|ByHeavyAtomsCount|ByPossibleKeysCount)$/i) {
1075           die "Error: The parameter value specified, $ParameterValue, for parameter name, $ParameterName in option \"--MolecularComplexity\" is not valid. Allowed values: None, ByHeavyAtomsCount, or ByPossibleKeysCount\n";
1076         }
1077       }
1078       $ComplexityParametersMap{$ParameterName} = $ParameterValue;
1079     }
1080 
1081     if ($ComplexityParametersMap{MACCSKeysSize} !~ /^(166|322)$/i) {
1082       die "Error: The parameter value specified, $ComplexityParametersMap{MACCSKeysSize}, for parameter name, MACCSKeysSize in option \"--MolecularComplexity\" is not valid. Allowed values: 166 or 322\n";
1083     }
1084     if ($ComplexityParametersMap{MinPathLength} > $ComplexityParametersMap{MaxPathLength}) {
1085       die "Error: The parameter value specified for MinPathLength, $ComplexityParametersMap{MinPathLength}, must be <= MaxPathLength, $ComplexityParametersMap{MaxPathLength} ...\n";
1086     }
1087     if ($ComplexityParametersMap{MinDistance} > $ComplexityParametersMap{MaxDistance}) {
1088       die "Error: The parameter value specified for MinDistance, $ComplexityParametersMap{MinDistance}, must be <= MaxDistance, $ComplexityParametersMap{MaxDistance} ...\n";
1089     }
1090   }
1091 
1092   # Set default parameter values...
1093 
1094   if (IsEmpty($ComplexityParametersMap{MolecularComplexityType})) {
1095     $ComplexityParametersMap{MolecularComplexityType} = 'MACCSKeys';
1096   }
1097   $MolecularComplexityType = $ComplexityParametersMap{MolecularComplexityType};
1098 
1099 
1100   if (IsEmpty($ComplexityParametersMap{AtomIdentifierType})) {
1101     $ComplexityParametersMap{AtomIdentifierType} = ($MolecularComplexityType =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) ? "FunctionalClassAtomTypes" : "AtomicInvariantsAtomTypes";
1102   }
1103 
1104   if (IsEmpty($ComplexityParametersMap{AtomicInvariantsToUse})) {
1105     my($AtomicInvariantsToUse);
1106 
1107     if ($MolecularComplexityType =~ /^(AtomTypesFingerprints|TopologicalAtomPairsFingerprints|TopologicalAtomTripletsFingerprints|TopologicalAtomTorsionsFingerprints)$/i) {
1108       $AtomicInvariantsToUse = "AS,X,BO,H,FC";
1109     }
1110     elsif ($MolecularComplexityType =~ /^ExtendedConnectivityFingerprints$/i) {
1111       $AtomicInvariantsToUse = "AS,X,BO,H,FC,MN";
1112     }
1113     else {
1114       $AtomicInvariantsToUse = "AS";
1115     }
1116     $ComplexityParametersMap{AtomicInvariantsToUse} = $AtomicInvariantsToUse;
1117   }
1118 
1119   if (IsEmpty($ComplexityParametersMap{FunctionalClassesToUse})) {
1120     my($FunctionalClassesToUse);
1121 
1122     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) {
1123       $FunctionalClassesToUse = "HBD,HBA,PI,NI,H";
1124     }
1125     elsif ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
1126       $FunctionalClassesToUse = "HBD,HBA,PI,NI,H,Ar";
1127     }
1128     else {
1129       $FunctionalClassesToUse = "HBD,HBA,PI,NI,H,Ar,Hal";
1130     }
1131     $ComplexityParametersMap{FunctionalClassesToUse} = $FunctionalClassesToUse;
1132   }
1133 
1134   my(@AtomicInvariantsToUse);
1135   @AtomicInvariantsToUse = split ',', $ComplexityParametersMap{AtomicInvariantsToUse};
1136   $ComplexityParametersMap{AtomicInvariantsToUse} = \@AtomicInvariantsToUse;
1137 
1138   my(@FunctionalClassesToUse);
1139   @FunctionalClassesToUse = split ',', $ComplexityParametersMap{FunctionalClassesToUse};
1140   $ComplexityParametersMap{FunctionalClassesToUse} = \@FunctionalClassesToUse;
1141 
1142   if (IsEmpty($ComplexityParametersMap{UseTriangleInequality})) {
1143     $ComplexityParametersMap{UseTriangleInequality} = 0;
1144     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
1145       $ComplexityParametersMap{UseTriangleInequality} = 1;
1146     }
1147   }
1148 
1149   if ($MolecularComplexityType =~ /^(TopologicalPharmacophoreAtomPairsFingerprints|TopologicalPharmacophoreAtomTripletsFingerprints)$/i) {
1150     if ($ComplexityParametersMap{AtomIdentifierType} !~ /^FunctionalClassAtomTypes$/i) {
1151       die "Error: The parameter value specified for AtomIdentifierType, $ComplexityParametersMap{AtomIdentifierType}, in option \"--MolecularComplexity\" is not valid for MolecularComplexityType, $MolecularComplexityType: Allowed value: FunctionalClassAtomTypes...\n";
1152     }
1153   }
1154 
1155   # Set up approprate paremeter names for specified molecular complexity...
1156 
1157   @ParameterNames = ();
1158   push @ParameterNames, 'MolecularComplexityType';
1159 
1160   @AtomIdentifierTypeParameters = ();
1161   push @AtomIdentifierTypeParameters, 'AtomIdentifierType';
1162   if ($ComplexityParametersMap{AtomIdentifierType} =~ /^AtomicInvariantsAtomTypes$/i) {
1163     push @AtomIdentifierTypeParameters, 'AtomicInvariantsToUse';
1164   }
1165   elsif ($ComplexityParametersMap{AtomIdentifierType} =~ /^FunctionalClassAtomTypes$/i) {
1166     push @AtomIdentifierTypeParameters, 'FunctionalClassesToUse';
1167   }
1168 
1169   COMPLEXITYTYPE: {
1170     if ($MolecularComplexityType =~ /^AtomTypesFingerprints$/i) {
1171       push @ParameterNames, @AtomIdentifierTypeParameters;
1172       last COMPLEXITYTYPE;
1173     }
1174     if ($MolecularComplexityType =~ /^ExtendedConnectivityFingerprints$/i) {
1175       push @ParameterNames, @AtomIdentifierTypeParameters;
1176       push @ParameterNames, ('NeighborhoodRadius', 'NormalizationMethodology');
1177       last COMPLEXITYTYPE;
1178     }
1179     if ($MolecularComplexityType =~ /^MACCSKeys$/i) {
1180       push @ParameterNames, 'MACCSKeysSize';
1181       last COMPLEXITYTYPE;
1182     }
1183     if ($MolecularComplexityType =~ /^PathLengthFingerprints$/i) {
1184       push @ParameterNames, @AtomIdentifierTypeParameters;
1185       push @ParameterNames, ('MinPathLength', 'MaxPathLength', 'UseBondSymbols');
1186       last COMPLEXITYTYPE;
1187     }
1188     if ($MolecularComplexityType =~ /^TopologicalAtomPairsFingerprints$/i) {
1189       push @ParameterNames, @AtomIdentifierTypeParameters;
1190       push @ParameterNames, ('MinDistance', 'MaxDistance');
1191       last COMPLEXITYTYPE;
1192     }
1193     if ($MolecularComplexityType =~ /^TopologicalAtomTripletsFingerprints$/i) {
1194       push @ParameterNames, @AtomIdentifierTypeParameters;
1195       push @ParameterNames, ('MinDistance', 'MaxDistance', 'UseTriangleInequality');
1196       last COMPLEXITYTYPE;
1197     }
1198     if ($MolecularComplexityType =~ /^TopologicalAtomTorsionsFingerprints$/i) {
1199       push @ParameterNames, @AtomIdentifierTypeParameters;
1200       last COMPLEXITYTYPE;
1201     }
1202     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomPairsFingerprints$/i) {
1203       push @ParameterNames, ('AtomIdentifierType', 'FunctionalClassesToUse', 'MinDistance', 'MaxDistance', 'NormalizationMethodology');
1204       last COMPLEXITYTYPE;
1205     }
1206     if ($MolecularComplexityType =~ /^TopologicalPharmacophoreAtomTripletsFingerprints$/i) {
1207       push @ParameterNames, ('AtomIdentifierType', 'FunctionalClassesToUse', 'MinDistance', 'MaxDistance', 'UseTriangleInequality', 'NormalizationMethodology', 'DistanceBinSize');
1208       last COMPLEXITYTYPE;
1209     }
1210     die "Error: The parameter value specified, $ParameterValue, for parameter name MolecularComplexityType using \"--MolecularComplexity\" is not valid.\n";
1211   }
1212 
1213   $OptionsInfo{MolecularComplexity} = $Options{molecularcomplexity};
1214 
1215   %{$OptionsInfo{MolecularComplexityParametersMap}} = ();
1216   for $ParameterName (@ParameterNames) {
1217     $ParameterValue = $ComplexityParametersMap{$ParameterName};
1218     $OptionsInfo{MolecularComplexityParametersMap}{$ParameterName} = $ParameterValue;
1219   }
1220 }
1221 
1222 # Setup script usage  and retrieve command line arguments specified using various options...
1223 sub SetupScriptUsage {
1224 
1225   # Retrieve all the options...
1226   %Options = ();
1227 
1228   $Options{aromaticitymodel} = 'MayaChemToolsAromaticityModel';
1229 
1230   $Options{compoundidmode} = 'LabelPrefix';
1231   $Options{compoundidlabel} = 'CompoundID';
1232   $Options{datafieldsmode} = 'CompoundID';
1233 
1234   $Options{filter} = 'Yes';
1235 
1236   $Options{hydrogenbonds} = 'HBondsType2';
1237 
1238   $Options{keeplargestcomponent} = 'Yes';
1239 
1240   # Default mode values are set later...
1241   $Options{mode} = '';
1242 
1243   # Default moelcular complexity values are set later...
1244   $Options{molecularcomplexity} = '';
1245 
1246   # Default precision values are set later...
1247   $Options{precision} = '';
1248 
1249   $Options{output} = 'text';
1250   $Options{outdelim} = 'comma';
1251   $Options{quote} = 'yes';
1252 
1253   # Default rotatable bond parameter values are set later...
1254   $Options{rotatablebonds} = '';
1255 
1256   $Options{ruleof3violations} = 'No';
1257   $Options{ruleof5violations} = 'No';
1258 
1259   # Default TPSA paramater values are set later...
1260   $Options{tpsa} = '';
1261 
1262   if (!GetOptions(\%Options, "aromaticitymodel=s", "compoundid=s", "compoundidlabel=s", "compoundidmode=s", "datafields=s", "datafieldsmode|d=s", "filter|f=s", "help|h", "hydrogenbonds=s", "keeplargestcomponent|k=s", "mode|m=s", "molecularcomplexity=s", "outdelim=s", "output=s", "overwrite|o", "precision=s", "rotatablebonds=s", "ruleof3violations=s", "ruleof5violations=s", "quote|q=s", "root|r=s", "tpsa=s", "workingdir|w=s")) {
1263     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
1264   }
1265   if ($Options{workingdir}) {
1266     if (! -d $Options{workingdir}) {
1267       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
1268     }
1269     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
1270   }
1271   if (!Molecule::IsSupportedAromaticityModel($Options{aromaticitymodel})) {
1272     my(@SupportedModels) = Molecule::GetSupportedAromaticityModels();
1273     die "Error: The value specified, $Options{aromaticitymodel}, for option \"--AromaticityModel\" is not valid. Supported aromaticity models in current release of MayaChemTools: @SupportedModels\n";
1274   }
1275   if ($Options{compoundidmode} !~ /^(DataField|MolName|LabelPrefix|MolNameOrLabelPrefix)$/i) {
1276     die "Error: The value specified, $Options{compoundidmode}, for option \"--CompoundIDMode\" is not valid. Allowed values: DataField, MolName, LabelPrefix or MolNameOrLabelPrefix\n";
1277   }
1278   if ($Options{datafieldsmode} !~ /^(All|Common|Specify|CompoundID)$/i) {
1279     die "Error: The value specified, $Options{datafieldsmode}, for option \"-d, --DataFieldsMode\" is not valid. Allowed values: All, Common, Specify or CompoundID\n";
1280   }
1281   if ($Options{filter} !~ /^(Yes|No)$/i) {
1282     die "Error: The value specified, $Options{filter}, for option \"-f, --Filter\" is not valid. Allowed values: Yes or No\n";
1283   }
1284   if ($Options{hydrogenbonds} !~ /^(HBondsType1|HydrogenBondsType1|HBondsType2|HydrogenBondsType2)$/i) {
1285     die "Error: The value specified, $Options{hydrogenbonds}, for option \"--HydrogenBonds\" is not valid. Allowed values: HBondsType1, HydrogenBondsType1, HBondsType2, HydrogenBondsType2\n";
1286   }
1287   if ($Options{keeplargestcomponent} !~ /^(Yes|No)$/i) {
1288     die "Error: The value specified, $Options{keeplargestcomponent}, for option \"-k, --KeepLargestComponent\" is not valid. Allowed values: Yes or No\n";
1289   }
1290   if ($Options{output} !~ /^(SD|text|both)$/i) {
1291     die "Error: The value specified, $Options{output}, for option \"--output\" is not valid. Allowed values: SD, text, or both\n";
1292   }
1293   if ($Options{outdelim} !~ /^(comma|semicolon|tab)$/i) {
1294     die "Error: The value specified, $Options{outdelim}, for option \"--outdelim\" is not valid. Allowed values: comma, tab, or semicolon\n";
1295   }
1296   if ($Options{quote} !~ /^(Yes|No)$/i) {
1297     die "Error: The value specified, $Options{quote}, for option \"-q --quote\" is not valid. Allowed values: Yes or No\n";
1298   }
1299   if ($Options{ruleof3violations} !~ /^(Yes|No)$/i) {
1300     die "Error: The value specified, $Options{ruleof3violations}, for option \"--RuleOf3Violations\" is not valid. Allowed values: Yes or No\n";
1301   }
1302   if ($Options{ruleof5violations} !~ /^(Yes|No)$/i) {
1303     die "Error: The value specified, $Options{ruleof5violations}, for option \"--RuleOf5Violations\" is not valid. Allowed values: Yes or No\n";
1304   }
1305 }
1306