MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: ExtractFromPDBFiles.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use Text::ParseWords;
  31 use Benchmark;
  32 use FileUtil;
  33 use TextUtil;
  34 use PDBFileUtil;
  35 use AminoAcids;
  36 use SequenceFileUtil;
  37 
  38 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  39 
  40 # Autoflush STDOUT
  41 $| = 1;
  42 
  43 # Starting message...
  44 $ScriptName = basename($0);
  45 print "\n$ScriptName: Starting...\n\n";
  46 $StartTime = new Benchmark;
  47 
  48 # Get the options and setup script...
  49 SetupScriptUsage();
  50 if ($Options{help} || @ARGV < 1) {
  51   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  52 }
  53 
  54 my(@PDBFilesList);
  55 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
  56 
  57 # Process options...
  58 print "Processing options...\n";
  59 my(%OptionsInfo);
  60 ProcessOptions();
  61 
  62 # Setup information about input files...
  63 print "Checking input PDB file(s)...\n";
  64 my(%PDBFilesInfo);
  65 RetrievePDBFilesInfo();
  66 
  67 # Process input files..
  68 my($FileIndex);
  69 if (@PDBFilesList > 1) {
  70   print "\nProcessing PDB files...\n";
  71 }
  72 for $FileIndex (0 .. $#PDBFilesList) {
  73   if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
  74     print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
  75     ExtractFromPDBFiles($FileIndex);
  76   }
  77 }
  78 print "\n$ScriptName:Done...\n\n";
  79 
  80 $EndTime = new Benchmark;
  81 $TotalTime = timediff ($EndTime, $StartTime);
  82 print "Total time: ", timestr($TotalTime), "\n";
  83 
  84 ###############################################################################
  85 
  86 # Extract appropriate information...
  87 sub ExtractFromPDBFiles {
  88   my($FileIndex) = @_;
  89   my($PDBFile, $PDBRecordLinesRef);
  90 
  91   # Get PDB data...
  92   $PDBFile = $PDBFilesList[$FileIndex];
  93   $PDBRecordLinesRef = ReadPDBFile($PDBFile);
  94 
  95   if ($OptionsInfo{Mode} =~ /Chains/i) {
  96     ExtractChains($FileIndex, $PDBRecordLinesRef);
  97   }
  98   elsif ($OptionsInfo{Mode} =~ /Sequences/i) {
  99     ExtractSequences($FileIndex, $PDBRecordLinesRef);
 100   }
 101   elsif ($OptionsInfo{Mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i) {
 102     ExtractByAtoms($FileIndex, $PDBRecordLinesRef);
 103   }
 104   elsif ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) {
 105     ExtractByResidues($FileIndex, $PDBRecordLinesRef);
 106   }
 107   elsif ($OptionsInfo{Mode} =~ /Distance/i) {
 108     ExtractByDistance($FileIndex, $PDBRecordLinesRef);
 109   }
 110   elsif ($OptionsInfo{Mode} =~ /NonWater/i) {
 111     ExtractNonWaterRecords($FileIndex, $PDBRecordLinesRef);
 112   }
 113   elsif ($OptionsInfo{Mode} =~ /NonHydrogens/i) {
 114     ExtractNonHydrogenRecords($FileIndex, $PDBRecordLinesRef);
 115   }
 116 }
 117 
 118 # Extract chains and generate new PDB files...
 119 #
 120 sub ExtractChains {
 121   my($FileIndex, $PDBRecordLinesRef) = @_;
 122   my($ChainIndex, $ChainID, $ChainLabel, $PDBFileName, $RecordLine, $ChainsAndResiduesInfoRef, $AtomNumber, $AtomName, $ResidueName, $AtomChainID, $ResidueNumber, $AlternateLocation, $InsertionCode, $ConectRecordLinesRef, $CollectChainResiduesBeyondTER, %ChainAtomNumbersMap);
 123 
 124   $CollectChainResiduesBeyondTER = ($OptionsInfo{ChainsRecordMode} =~ /^AcrossTER$/i) ? 1 : 0;
 125 
 126   # Get chains and residues data...
 127   $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'AtomAndHetatm', $CollectChainResiduesBeyondTER, 1);
 128 
 129   if ($OptionsInfo{CombineChains}) {
 130     $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 131     print "Generating PDBFileName file $PDBFileName...\n";
 132 
 133     open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 134 
 135     # Write out header and other older records...
 136     WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 137   }
 138 
 139   for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 140     $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 141     $ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex];
 142 
 143     if (!$OptionsInfo{CombineChains}) {
 144       $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex];
 145       print "Generating PDBFileName file $PDBFileName...\n";
 146 
 147       open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 148 
 149       # Write out header and other older recors...
 150       WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 151     }
 152 
 153     # Write out ATOM/HETATM line for chain and collect all ATOM/HETATM serial numbers
 154     # for writing out appropriate CONECT records...
 155     %ChainAtomNumbersMap = ();
 156     for $RecordLine (@{$ChainsAndResiduesInfoRef->{Lines}{$ChainID}}) {
 157       if (CheckRecordType($RecordLine)) {
 158         print OUTFILE "$RecordLine\n";
 159         ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode) = ParseAtomRecordLine($RecordLine);
 160         $AtomNumber = int $AtomNumber;
 161         $ChainAtomNumbersMap{$AtomNumber} = $AtomName;
 162       }
 163     }
 164     # Write out TER record using information from last chain record...
 165     $AtomNumber += 1;
 166     print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $AtomChainID, $ResidueNumber, $InsertionCode), "\n";
 167 
 168     # Write out CONECT records...
 169     $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%ChainAtomNumbersMap);
 170 
 171     for $RecordLine (@{$ConectRecordLinesRef}) {
 172       print OUTFILE "$RecordLine\n";
 173     }
 174 
 175     if (!$OptionsInfo{CombineChains}) {
 176       # Write out END record...
 177       print OUTFILE GenerateEndRecordLine(), "\n";
 178 
 179       close OUTFILE;
 180     }
 181   }
 182 
 183   if ($OptionsInfo{CombineChains}) {
 184     # Write out END record...
 185     print OUTFILE GenerateEndRecordLine(), "\n";
 186 
 187     close OUTFILE;
 188   }
 189 
 190 }
 191 
 192 # Extract sequences for individual chains or combine all the chains...
 193 sub ExtractSequences {
 194   my($FileIndex, $PDBRecordLinesRef) = @_;
 195   my($ChainIndex, $ChainID, $ChainLabel, $SequenceFileName, $Residue, $ResidueCode, $StandardResidue, $ChainSequence, $WrappedChainSequence, $ChainSequenceID, $ChainsAndResiduesInfoRef, $ChainResiduesRef, %ChainSequencesDataMap);
 196 
 197   if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) {
 198     $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes');
 199   }
 200   else {
 201     $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
 202   }
 203 
 204   # Generate sequence data for all the chains...
 205   %ChainSequencesDataMap = ();
 206   @{$ChainSequencesDataMap{IDs}} = ();
 207   %{$ChainSequencesDataMap{Sequence}} = ();
 208   %{$ChainSequencesDataMap{Description}} = ();
 209 
 210   for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 211     $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 212     $ChainLabel = $PDBFilesInfo{ChainLabels}[$FileIndex][$ChainIndex];
 213 
 214     # Setup sequence ID...
 215     $ChainSequenceID = $PDBFilesInfo{ChainSequenceIDs}[$FileIndex][$ChainIndex];
 216     push @{$ChainSequencesDataMap{IDs}}, $ChainSequenceID;
 217     $ChainSequencesDataMap{Description}{$ChainID} = $ChainSequenceID;
 218 
 219     # Collect sequence data for the chain...
 220     if ($OptionsInfo{SequenceRecordSource} =~ /^SeqRes/i) {
 221       $ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
 222     }
 223     else {
 224       $ChainResiduesRef = \@{$ChainsAndResiduesInfoRef->{Residues}{$ChainID}};
 225     }
 226     # Setup sequence data...
 227     $ChainSequence = '';
 228     RESIDUE: for $Residue (@{$ChainResiduesRef}) {
 229       ($ResidueCode, $StandardResidue) = GetResidueCode($Residue);
 230       if (!$StandardResidue) {
 231         if ($OptionsInfo{KeepNonStandardSequences}) {
 232           $ResidueCode = $OptionsInfo{NonStandardSequenceCode};
 233           warn "Warning: Keeping nonstandard residue $Residue in $ChainLabel...\n";
 234         }
 235         else {
 236           warn "Warning: Ignoring nonstandard residue $Residue in $ChainLabel...\n";
 237           next RESIDUE;
 238         }
 239       }
 240       $ChainSequence .= $ResidueCode;
 241     }
 242     $ChainSequencesDataMap{Sequence}{$ChainID} = $ChainSequence;
 243 
 244   }
 245 
 246   # Write out the sequence files...
 247   my($SequenceID, $SequenceDescription, $Sequence, %SequencesDataMap );
 248   if ($OptionsInfo{CombineChainSequences}) {
 249     # Combine all the chain sequences...
 250     $Sequence = '';
 251     for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 252       $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 253 
 254       $Sequence .= $ChainSequencesDataMap{Sequence}{$ChainID};
 255     }
 256     $SequenceID = $PDBFilesInfo{ChainSequenceIDsPrefix}[$FileIndex][0] . "_CombinedChains|PDB";;
 257     $SequenceDescription = $SequenceID;
 258     $SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 259 
 260     print "Generating sequence file $SequenceFileName...\n";
 261     %SequencesDataMap = ();
 262     @{$SequencesDataMap{IDs}} = ();
 263     %{$SequencesDataMap{Sequence}} = ();
 264     %{$SequencesDataMap{Description}} = ();
 265 
 266     push @{$SequencesDataMap{IDs}}, $SequenceID;
 267     $SequencesDataMap{Description}{$SequenceID} = $SequenceDescription;
 268     $SequencesDataMap{Sequence}{$SequenceID} = $Sequence;
 269 
 270     WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength});
 271   }
 272   else {
 273     # For each specifed chain, write out the sequences...
 274     for $ChainIndex (0 .. $#{$PDBFilesInfo{SpecifiedChains}[$FileIndex]}) {
 275       $ChainID = $PDBFilesInfo{SpecifiedChains}[$FileIndex][$ChainIndex];
 276 
 277       $SequenceFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][$ChainIndex];
 278 
 279       $SequenceID = $ChainSequencesDataMap{IDs}[$ChainIndex];
 280       $SequenceDescription = $ChainSequencesDataMap{Description}{$ChainID};
 281       $Sequence = $ChainSequencesDataMap{Sequence}{$ChainID};
 282 
 283       print "Generating sequence file $SequenceFileName...\n";
 284       %SequencesDataMap = ();
 285       @{$SequencesDataMap{IDs}} = ();
 286       %{$SequencesDataMap{Sequence}} = ();
 287       %{$SequencesDataMap{Description}} = ();
 288 
 289       push @{$SequencesDataMap{IDs}}, $SequenceID;
 290       $SequencesDataMap{Description}{$SequenceID} = $SequenceDescription;
 291       $SequencesDataMap{Sequence}{$SequenceID} = $Sequence;
 292 
 293       WritePearsonFastaSequenceFile($SequenceFileName, \%SequencesDataMap, $OptionsInfo{MaxSequenceLength});
 294     }
 295   }
 296 }
 297 
 298 # Extract atoms...
 299 sub ExtractByAtoms {
 300   my($FileIndex, $PDBRecordLinesRef) = @_;
 301   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $IgnoreRecord, $ConectRecordLinesRef, %AtomNumbersMap);
 302 
 303   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 304   print "Generating PDBFileName file $PDBFileName...\n";
 305   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 306 
 307   # Write out header and other older recors...
 308   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 309 
 310   # Write out all ATOM records along with TER and model records to indicate
 311   # chains and multiple models..
 312   %AtomNumbersMap = ();
 313   $ChainRecordCount = 0;
 314   for $RecordLine (@{$PDBRecordLinesRef}) {
 315     if (CheckRecordType($RecordLine)) {
 316       ($AtomNumber, $AtomName) = ParseAtomRecordLine($RecordLine);
 317 
 318       # Check atoms...
 319       $IgnoreRecord = 1;
 320       if ($OptionsInfo{Mode} =~ /^Atoms$/i) {
 321         $IgnoreRecord = 0;
 322       }
 323       elsif ($OptionsInfo{Mode} =~ /^(CAlphas|AtomNames)$/i) {
 324         if (exists $OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName}) {
 325           $IgnoreRecord = 0;
 326         }
 327       }
 328       elsif ($OptionsInfo{Mode} =~ /^AtomNums$/i) {
 329         if (exists $OptionsInfo{SpecifiedAtomNumsMap}{$AtomNumber}) {
 330           $IgnoreRecord = 0;
 331         }
 332       }
 333       elsif ($OptionsInfo{Mode} =~ /^AtomsRange$/i) {
 334         if ($AtomNumber >= $OptionsInfo{SpecifiedStartAtomNum} && $AtomNumber <= $OptionsInfo{SpecifiedEndAtomNum}) {
 335           $IgnoreRecord = 0;
 336         }
 337       }
 338 
 339       if (!$IgnoreRecord) {
 340         $ChainRecordCount++;
 341         print OUTFILE "$RecordLine\n";
 342 
 343         $AtomNumber = int $AtomNumber;
 344         $AtomNumbersMap{$AtomNumber} = $AtomName;
 345       }
 346     }
 347     elsif (IsTerRecordType($RecordLine)) {
 348       if ($ChainRecordCount) {
 349         print OUTFILE GenerateTerRecordLine(), "\n";
 350       }
 351       $ChainRecordCount = 0;
 352     }
 353     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 354       print OUTFILE "$RecordLine\n";
 355     }
 356   }
 357 
 358   # Write out appropriate CONECT records...
 359   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 360   for $RecordLine (@{$ConectRecordLinesRef}) {
 361     print OUTFILE "$RecordLine\n";
 362   }
 363 
 364   # Write out END record...
 365   print OUTFILE GenerateEndRecordLine(), "\n";
 366 
 367   close OUTFILE;
 368 }
 369 
 370 # Extract residues...
 371 sub ExtractByResidues {
 372   my($FileIndex, $PDBRecordLinesRef) = @_;
 373   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $ConectRecordLinesRef, $IgnoreRecord, %AtomNumbersMap);
 374 
 375   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 376   print "Generating PDBFileName file $PDBFileName...\n";
 377   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 378 
 379   # Write out header and other older recors...
 380   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 381 
 382   # Write out all ATOM records for specified residues with TER and model records to indicate
 383   # chains and multiple models...
 384   %AtomNumbersMap = ();
 385   $ChainRecordCount = 0;
 386   for $RecordLine (@{$PDBRecordLinesRef}) {
 387     if (CheckRecordType($RecordLine)) {
 388       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber) = ParseAtomRecordLine($RecordLine);
 389 
 390       # Check residues...
 391       $IgnoreRecord = 1;
 392       if ($OptionsInfo{Mode} =~ /^ResidueNums$/i) {
 393         if (exists $OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNumber}) {
 394           $IgnoreRecord = 0;
 395         }
 396       }
 397       elsif ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) {
 398         if ($ResidueNumber >= $OptionsInfo{SpecifiedStartResidueNum} && $ResidueNumber <= $OptionsInfo{SpecifiedEndResidueNum}) {
 399           $IgnoreRecord = 0;
 400         }
 401       }
 402       elsif ($OptionsInfo{Mode} =~ /^ResidueNames$/i) {
 403         if (exists $OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName}) {
 404           $IgnoreRecord = 0;
 405         }
 406       }
 407       if (!$IgnoreRecord) {
 408         $ChainRecordCount++;
 409         print OUTFILE "$RecordLine\n";
 410         $AtomNumber = int $AtomNumber;
 411         $AtomNumbersMap{$AtomNumber} = $AtomName;
 412       }
 413     }
 414     elsif (IsTerRecordType($RecordLine)) {
 415       if ($ChainRecordCount) {
 416         print OUTFILE GenerateTerRecordLine(), "\n";
 417       }
 418       $ChainRecordCount = 0;
 419     }
 420     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 421       print OUTFILE "$RecordLine\n";
 422     }
 423   }
 424 
 425   # Write out appropriate CONECT records...
 426   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 427   for $RecordLine (@{$ConectRecordLinesRef}) {
 428     print OUTFILE "$RecordLine\n";
 429   }
 430   # Write out END record...
 431   print OUTFILE GenerateEndRecordLine(), "\n";
 432 
 433   close OUTFILE;
 434 }
 435 
 436 # Extract non water records...
 437 sub ExtractNonWaterRecords {
 438   my($FileIndex, $PDBRecordLinesRef) = @_;
 439   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ConectRecordLinesRef, %AtomNumbersMap);
 440 
 441   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 442   print "Generating PDBFileName file $PDBFileName...\n";
 443   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 444 
 445   # Write out header and other older recors...
 446   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 447 
 448   # Write out all ATOM/HETATM non water records along with TER and model records to indicate
 449   # chains and multiple models..
 450   %AtomNumbersMap = ();
 451   $ChainRecordCount = 0;
 452   for $RecordLine (@{$PDBRecordLinesRef}) {
 453     if (CheckRecordType($RecordLine)) {
 454       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName) = ParseAtomRecordLine($RecordLine);
 455       if (! exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName} ) {
 456         $ChainRecordCount++;
 457         print OUTFILE "$RecordLine\n";
 458         $AtomNumber = int $AtomNumber;
 459         $AtomNumbersMap{$AtomNumber} = $AtomName;
 460       }
 461     }
 462     elsif (IsTerRecordType($RecordLine)) {
 463       if ($ChainRecordCount) {
 464         print OUTFILE GenerateTerRecordLine(), "\n";
 465       }
 466       $ChainRecordCount = 0;
 467     }
 468     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 469       print OUTFILE "$RecordLine\n";
 470     }
 471   }
 472 
 473   # Write out appropriate CONECT records...
 474   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 475   for $RecordLine (@{$ConectRecordLinesRef}) {
 476     print OUTFILE "$RecordLine\n";
 477   }
 478   # Write out END record...
 479   print OUTFILE GenerateEndRecordLine(), "\n";
 480 
 481   close OUTFILE;
 482 }
 483 
 484 # Extract non hydrogen records...
 485 sub ExtractNonHydrogenRecords {
 486   my($FileIndex, $PDBRecordLinesRef) = @_;
 487   my($PDBFileName,  $RecordLine, $ChainRecordCount, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $ConectRecordLinesRef, %AtomNumbersMap);
 488 
 489   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 490   print "Generating PDBFileName file $PDBFileName...\n";
 491   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 492 
 493   # Write out header and other older recors...
 494   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 495 
 496   # Write out all ATOM/HETATM non hydrogen records along with TER and model records to indicate
 497   # chains and multiple models..
 498   %AtomNumbersMap = ();
 499   $ChainRecordCount = 0;
 500   for $RecordLine (@{$PDBRecordLinesRef}) {
 501     if (CheckRecordType($RecordLine)) {
 502       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 503       if ($ElementSymbol !~ /^H$/i) {
 504         $ChainRecordCount++;
 505         print OUTFILE "$RecordLine\n";
 506         $AtomNumber = int $AtomNumber;
 507         $AtomNumbersMap{$AtomNumber} = $AtomName;
 508       }
 509     }
 510     elsif (IsTerRecordType($RecordLine)) {
 511       if ($ChainRecordCount) {
 512         print OUTFILE GenerateTerRecordLine(), "\n";
 513       }
 514       $ChainRecordCount = 0;
 515     }
 516     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 517       print OUTFILE "$RecordLine\n";
 518     }
 519   }
 520 
 521   # Write out appropriate CONECT records...
 522   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 523   for $RecordLine (@{$ConectRecordLinesRef}) {
 524     print OUTFILE "$RecordLine\n";
 525   }
 526   # Write out END record...
 527   print OUTFILE GenerateEndRecordLine(), "\n";
 528 
 529   close OUTFILE;
 530 }
 531 
 532 # Extract ATOM/HETATM records by distance...
 533 sub ExtractByDistance {
 534   my($FileIndex, $PDBRecordLinesRef) = @_;
 535   my($PDBFileName,  $RecordLine, $RecordLineNum, $ChainRecordCount, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $IgnoreRecord, $ResidueID, @OriginCoords, @Coords, %AtomNumbersMap, %ResiduesDataMap);
 536 
 537   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 538   print "Generating PDBFileName file $PDBFileName...\n";
 539   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 540 
 541   # Write out header and other older recors...
 542   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 543 
 544   # Setup coordinates of origin to calculate distance...
 545   @OriginCoords = ();
 546   push @OriginCoords, @{$PDBFilesInfo{DistanceOrigin}[$FileIndex]};
 547 
 548   # Write out all ATOM records for which meet specified criteria along with TER and model records to indicate
 549   # chains and multiple models...
 550   %AtomNumbersMap = ();
 551 
 552   %ResiduesDataMap = ();
 553   %{$ResiduesDataMap{ID}} = ();
 554   %{$ResiduesDataMap{Status}} = ();
 555 
 556   $ChainRecordCount = 0;
 557   $RecordLineNum = 0;
 558 
 559   for $RecordLine (@{$PDBRecordLinesRef}) {
 560     $RecordLineNum++;
 561     if (CheckRecordType($RecordLine)) {
 562       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
 563       @Coords = (); push @Coords, ($X, $Y, $Z);
 564 
 565       $IgnoreRecord = 1;
 566       if ($OptionsInfo{DistanceSelectionMode} =~ /^ByResidue$/i) {
 567         $ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}";
 568         if (exists $ResiduesDataMap{ID}{$ResidueID}) {
 569           # Residue data has been processed; check its selection status...
 570           if ($ResiduesDataMap{Status}{$ResidueID}) {
 571             $IgnoreRecord = 0;
 572           }
 573         }
 574         else {
 575           # Residue hasn't been processed...
 576           $ResiduesDataMap{ID}{$ResidueID} = $ResidueID;
 577           $ResiduesDataMap{Status}{$ResidueID} = 0;
 578           if (CheckResidueDistance($ResidueID, $RecordLineNum, $PDBRecordLinesRef, \@OriginCoords)) {
 579             $IgnoreRecord = 0;
 580             $ResiduesDataMap{Status}{$ResidueID} = 1;
 581           }
 582         }
 583       }
 584       elsif ($OptionsInfo{DistanceSelectionMode} =~ /^ByAtom$/i) {
 585         if (CheckDistance(\@Coords, \@OriginCoords)) {
 586           $IgnoreRecord = 0;
 587         }
 588       }
 589 
 590       if (!$IgnoreRecord) {
 591         $ChainRecordCount++;
 592         print OUTFILE "$RecordLine\n";
 593         $AtomNumber = int $AtomNumber;
 594         $AtomNumbersMap{$AtomNumber} = $AtomName;
 595       }
 596     }
 597     elsif (IsTerRecordType($RecordLine)) {
 598       if ($ChainRecordCount) {
 599         print OUTFILE GenerateTerRecordLine(), "\n";
 600       }
 601       $ChainRecordCount = 0;
 602     }
 603     elsif (IsModelRecordType($RecordLine) || IsEndmdlRecordType($RecordLine)) {
 604       print OUTFILE "$RecordLine\n";
 605     }
 606   }
 607 
 608   # Write out appropriate CONECT records...
 609   $ConectRecordLinesRef = GetConectRecordLines($PDBRecordLinesRef, \%AtomNumbersMap);
 610   for $RecordLine (@{$ConectRecordLinesRef}) {
 611     print OUTFILE "$RecordLine\n";
 612   }
 613 
 614   # Write out END record...
 615   print OUTFILE GenerateEndRecordLine(), "\n";
 616 
 617   close OUTFILE;
 618 }
 619 
 620 # Does record type correspond to the specified record type?
 621 sub CheckRecordType {
 622   my($RecordLine) = @_;
 623   my($Status);
 624 
 625   $Status = 0;
 626   if ($OptionsInfo{RecordMode} =~ /^Atom$/i) {
 627     $Status = IsAtomRecordType($RecordLine) ? 1 : 0;
 628   }
 629   elsif ($OptionsInfo{RecordMode} =~ /^Hetatm$/i) {
 630     $Status = IsHetatmRecordType($RecordLine) ? 1 : 0;
 631   }
 632   elsif ($OptionsInfo{RecordMode} =~ /^AtomAndHetatm$/i) {
 633     $Status = (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) ? 1 : 0;
 634   }
 635 
 636   return $Status;
 637 }
 638 
 639 # Does record meets distance citerion specified by the user?
 640 sub CheckResidueDistance {
 641   my($SpecifiedResidueID, $StartingLineNum, $PDBRecordLinesRef, $OriginCoordsRef) = @_;
 642   my($Status, $RecordLine, $RecordLineIndex, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $ResidueID, @Coords);
 643 
 644   $Status = 0;
 645 
 646   RECORDLINE: for $RecordLineIndex (($StartingLineNum - 1) .. $#{$PDBRecordLinesRef}) {
 647     $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
 648     if (!CheckRecordType($RecordLine)) {
 649       next RECORDLINE;
 650     }
 651     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
 652     $ResidueID = "${ResidueName}_${ResidueNumber}_${ChainID}";
 653 
 654     if ($ResidueID !~ /^$SpecifiedResidueID$/i) {
 655       # It's a new residue line...
 656       last RECORDLINE;
 657     }
 658 
 659     # Check distance...
 660     @Coords = (); push @Coords, ($X, $Y, $Z);
 661     if (CheckDistance(\@Coords, $OriginCoordsRef)) {
 662       # Distance criterion is met for at least one record in the residue...
 663       $Status = 1;
 664       last RECORDLINE;
 665     }
 666   }
 667   return $Status;
 668 }
 669 
 670 # Does record meets distance citerion specified by the user?
 671 sub CheckDistance {
 672   my($CoordsRef, $OriginCoordsRef) = @_;
 673   my($Status, $Index, $Distance, $DistanceSquare);
 674 
 675   $Status = 0;
 676 
 677   if ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
 678     # Go over coordinates of all the atoms in the residue...
 679     my($ResidueCoordsCount) = scalar @{$OriginCoordsRef};
 680     INDEX: for ($Index = 0; $Index < $ResidueCoordsCount; $Index += 3) {
 681       $DistanceSquare = ($CoordsRef->[0] - $OriginCoordsRef->[$Index])**2 + ($CoordsRef->[1] - $OriginCoordsRef->[$Index + 1])**2 + ($CoordsRef->[2] - $OriginCoordsRef->[$Index + 2])**2;
 682       $Distance = sqrt $DistanceSquare;
 683       if ($Distance <= $OptionsInfo{MaxExtractionDistance}) {
 684         $Status = 1;
 685         last INDEX;
 686       }
 687     }
 688   }
 689   else {
 690     $DistanceSquare = 0;
 691     for $Index (0 .. 2) {
 692       $DistanceSquare += ($CoordsRef->[$Index] - $OriginCoordsRef->[$Index])**2;
 693     }
 694     $Distance = sqrt $DistanceSquare;
 695     $Status = ($Distance <= $OptionsInfo{MaxExtractionDistance}) ? 1 : 0;
 696   }
 697 
 698   return $Status;
 699 }
 700 
 701 # Write out modifed header and other older records...
 702 sub WriteHeaderAndOlderRecords {
 703   my($OutFileRef, $PDBRecordLinesRef) = @_;
 704 
 705   if ($OptionsInfo{ModifyHeaderRecord}) {
 706     # Write out modified HEADER record...
 707     my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
 708     $Classification = 'Data extracted using MayaChemTools';
 709     print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
 710   }
 711   else {
 712     print $OutFileRef $PDBRecordLinesRef->[0], "\n";
 713   }
 714 
 715   # Write out any old records...
 716   if ($OptionsInfo{KeepOldRecords}) {
 717     my($RecordLineIndex, $RecordLine);
 718     # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
 719     RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
 720       $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
 721       if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 722         last RECORDLINE;
 723       }
 724       print $OutFileRef "$RecordLine\n";
 725     }
 726   }
 727 }
 728 
 729 # Get header record information assuming it's the first record...
 730 sub GetHeaderRecordInformation {
 731   my($PDBRecordLinesRef) = @_;
 732   my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
 733 
 734   ($Classification, $DepositionDate, $IDCode) = ('') x 3;
 735   $HeaderRecordLine = $PDBRecordLinesRef->[0];
 736   if (IsHeaderRecordType($HeaderRecordLine)) {
 737     ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
 738   }
 739   return ($Classification, $DepositionDate, $IDCode);
 740 }
 741 
 742 # Get one letter residue code...
 743 sub GetResidueCode {
 744   my($ResidueName) = @_;
 745   my($ResidueCode, $StandardResidue);
 746 
 747   $ResidueCode = $OptionsInfo{NonStandardSequenceCode};
 748   $StandardResidue = 0;
 749 
 750   if (length($ResidueName) == 3) {
 751     # Assume it's an amino acid...
 752     if (AminoAcids::IsAminoAcid($ResidueName)) {
 753       # Standard amino acid...
 754       $ResidueCode = AminoAcids::GetAminoAcidOneLetterCode($ResidueName);
 755       $StandardResidue = 1;
 756     }
 757   }
 758   elsif (length($ResidueName) == 1) {
 759     # Assume it's a nucleic acid...
 760     if ($ResidueName =~ /^(A|G|T|U|C)$/i) {
 761       $ResidueCode = $ResidueName;
 762       $StandardResidue = 1;
 763     }
 764   }
 765 
 766   return ($ResidueCode, $StandardResidue);
 767 }
 768 
 769 # Process option values...
 770 sub ProcessOptions {
 771   %OptionsInfo = ();
 772   $OptionsInfo{Mode} = $Options{mode};
 773 
 774   my(@SpecifiedChains) = ();
 775   if ($Options{chains} =~ /^(First|All)$/i) {
 776     $OptionsInfo{ChainsToExtract} = $Options{chains};
 777   }
 778   else {
 779     @SpecifiedChains = split /\,/, $Options{chains};
 780     $OptionsInfo{ChainsToExtract} = 'Specified';
 781   }
 782   @{$OptionsInfo{SpecifiedChains}} = ();
 783   push @{$OptionsInfo{SpecifiedChains}}, @SpecifiedChains;
 784 
 785   $OptionsInfo{ChainsRecordMode} = $Options{chainsrecordmode};
 786 
 787   $OptionsInfo{CombineChains} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0;
 788 
 789   $OptionsInfo{CombineChainSequences} = ($Options{combinechains} =~ /^Yes$/i) ? 1 : 0;
 790 
 791   ProcessResiduesOptions();
 792   ProcessAtomsOptions();
 793   ProcessDistanceOptions();
 794 
 795   $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
 796   @{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
 797   %{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
 798 
 799   my(@SpecifiedWaterResiduesList);
 800   @SpecifiedWaterResiduesList = ();
 801 
 802   if ($OptionsInfo{Mode} =~ /^NonWater$/i) {
 803     my($WaterResidueName);
 804     if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
 805       push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
 806     }
 807     else {
 808       @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
 809     }
 810     for $WaterResidueName (@SpecifiedWaterResiduesList) {
 811       $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
 812     }
 813   }
 814   push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
 815 
 816   $OptionsInfo{RecordMode} = $Options{recordmode} ? $Options{recordmode} : ($Options{mode} =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames)$/i ? "Atom" : "AtomAndHetatm");
 817 
 818   $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
 819 
 820   $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
 821 
 822   $OptionsInfo{KeepNonStandardSequences} = ($Options{nonstandardkeep} =~ /^Yes$/i) ? 1 : 0;
 823   $OptionsInfo{NonStandardSequenceCode} = $Options{nonstandardcode};
 824   $OptionsInfo{MaxSequenceLength} = $Options{sequencelength};
 825   $OptionsInfo{SequenceRecordSource} = $Options{sequencerecords};
 826   $OptionsInfo{SequenceIDPrefixSource} = $Options{sequenceidprefix};
 827 
 828   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 829   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 830 }
 831 
 832 # Process specified residue options...
 833 sub ProcessResiduesOptions {
 834   my($ResidueNum, $StartResidueNum, $EndResNum, $ResidueName, @SpecifiedResidueNumsList, @SpecifiedResidueNamesList);
 835 
 836   @SpecifiedResidueNumsList = ();
 837   ($StartResidueNum, $EndResNum) = (0, 0);
 838 
 839   @SpecifiedResidueNamesList = ();
 840 
 841   if ($OptionsInfo{Mode} =~ /^(ResidueNums|ResiduesRange|ResidueNames)$/i) {
 842     if (!$Options{residues}) {
 843       die "Error: You must specify a value for \"--Residues\" option in \"ResidueNums, ResiduesRange, or ResidueNames\" \"-m, --mode\". \n";
 844     }
 845     $OptionsInfo{Residues} = $Options{residues};
 846     $OptionsInfo{Residues} =~ s/ //g;
 847 
 848     if ($OptionsInfo{Mode} =~ /^ResidueNames$/i) {
 849       @SpecifiedResidueNamesList = split /\,/, $OptionsInfo{Residues};
 850     }
 851     else {
 852       @SpecifiedResidueNumsList = split /\,/, $OptionsInfo{Residues};
 853       for $ResidueNum (@SpecifiedResidueNumsList) {
 854         if (!IsPositiveInteger($ResidueNum)) {
 855           die "Error: Invalid residue number value, $ResidueNum, for \"--Residues\" option during \"ResidueNumes\" or \"ResiduesRange\"value of \"-m --mode\" option: Residue number must be a positive integer.\n";
 856         }
 857       }
 858       if ($OptionsInfo{Mode} =~ /^ResiduesRange$/i) {
 859         if (@SpecifiedResidueNumsList != 2) {
 860           die "Error: Invalid number of residue number values, ", scalar(@SpecifiedResidueNumsList), ", for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end residue numbers.\n";
 861         }
 862         if ($SpecifiedResidueNumsList[0] > $SpecifiedResidueNumsList[1]) {
 863           die "Error: Invalid residue number values, @SpecifiedResidueNumsList, for \"--Residues\" option during \"ResiduesRange\" value of \"-m --mode\" option: The start residue number must be less than end residue number.\n";
 864         }
 865         ($StartResidueNum, $EndResNum) = @SpecifiedResidueNumsList;
 866       }
 867     }
 868   }
 869 
 870   @{$OptionsInfo{SpecifiedResidueNumsList}} = ();
 871   push @{$OptionsInfo{SpecifiedResidueNumsList}}, @SpecifiedResidueNumsList;
 872 
 873   $OptionsInfo{SpecifiedStartResidueNum} = $StartResidueNum;
 874   $OptionsInfo{SpecifiedEndResidueNum} = $EndResNum;
 875 
 876   @{$OptionsInfo{SpecifiedResidueNamesList}} = ();
 877   push @{$OptionsInfo{SpecifiedResidueNamesList}}, @SpecifiedResidueNamesList;
 878 
 879   # Set up a specified residue numbers map...
 880   %{$OptionsInfo{SpecifiedResidueNumsMap}} = ();
 881   for $ResidueNum (@{$OptionsInfo{SpecifiedResidueNumsList}}) {
 882     $OptionsInfo{SpecifiedResidueNumsMap}{$ResidueNum} = $ResidueNum;
 883   }
 884 
 885   # Set up a specified residue names map...
 886   %{$OptionsInfo{SpecifiedResidueNamesMap}} = ();
 887   for $ResidueName (@{$OptionsInfo{SpecifiedResidueNamesList}}) {
 888     $OptionsInfo{SpecifiedResidueNamesMap}{lc $ResidueName} = lc $ResidueName;
 889   }
 890 
 891 }
 892 
 893 # Process specified atom options...
 894 sub ProcessAtomsOptions {
 895   my($AtomNum, $StartAtomNum, $EndAtomNum, $AtomName, @SpecifiedAtomNumsList, @SpecifiedAtomNamesList);
 896 
 897   @SpecifiedAtomNumsList = ();
 898   ($StartAtomNum, $EndAtomNum) = (0, 0);
 899 
 900   @SpecifiedAtomNamesList = ();
 901 
 902   if ($OptionsInfo{Mode} =~ /^(AtomNums|AtomsRange|AtomNames)$/i) {
 903     if (!$Options{atoms}) {
 904       die "Error: You must specify a value for \"--Atoms\" option in \"AtomNums, AtomsRange, or AtomNames\" \"-m, --mode\". \n";
 905     }
 906     $OptionsInfo{Atoms} = $Options{atoms};
 907     $OptionsInfo{Atoms} =~ s/ //g;
 908 
 909     if ($OptionsInfo{Mode} =~ /^AtomNames$/i) {
 910       @SpecifiedAtomNamesList = split /\,/, $OptionsInfo{Atoms};
 911     }
 912     else {
 913       @SpecifiedAtomNumsList = split /\,/, $OptionsInfo{Atoms};
 914       for $AtomNum (@SpecifiedAtomNumsList) {
 915         if (!IsPositiveInteger($AtomNum)) {
 916           die "Error: Invalid atom number value, $AtomNum, for \"--Atoms\" option during \"AtomNums\" or \"AtomsRange\"value of \"-m --mode\" option: Atom number must be a positive integer.\n";
 917         }
 918       }
 919       if ($OptionsInfo{Mode} =~ /^AtomsRange$/i) {
 920         if (@SpecifiedAtomNumsList != 2) {
 921           die "Error: Invalid number of atom number values, ", scalar(@SpecifiedAtomNumsList), ", for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The number of values must be 2 corresponding to start and end atom numbers.\n";
 922         }
 923         if ($SpecifiedAtomNumsList[0] > $SpecifiedAtomNumsList[1]) {
 924           die "Error: Invalid atom number values, @SpecifiedAtomNumsList, for \"--Atoms\" option during \"AtomsRange\" value of \"-m --mode\" option: The start atom number must be less than end atom number.\n";
 925         }
 926         ($StartAtomNum, $EndAtomNum) = @SpecifiedAtomNumsList;
 927       }
 928     }
 929   }
 930   elsif ($OptionsInfo{Mode} =~ /^CAlphas$/i) {
 931     @SpecifiedAtomNamesList = ("CA");
 932   }
 933 
 934   @{$OptionsInfo{SpecifiedAtomNumsList}} = ();
 935   push @{$OptionsInfo{SpecifiedAtomNumsList}}, @SpecifiedAtomNumsList;
 936 
 937   $OptionsInfo{SpecifiedStartAtomNum} = $StartAtomNum;
 938   $OptionsInfo{SpecifiedEndAtomNum} = $EndAtomNum;
 939 
 940   @{$OptionsInfo{SpecifiedAtomNamesList}} = ();
 941   push @{$OptionsInfo{SpecifiedAtomNamesList}}, @SpecifiedAtomNamesList;
 942 
 943   # Set up a specified residue numbers map...
 944   %{$OptionsInfo{SpecifiedAtomNumsMap}} = ();
 945   for $AtomNum (@{$OptionsInfo{SpecifiedAtomNumsList}}) {
 946     $OptionsInfo{SpecifiedAtomNumsMap}{$AtomNum} = $AtomNum;
 947   }
 948 
 949   # Set up a specified residue names map...
 950   %{$OptionsInfo{SpecifiedAtomNamesMap}} = ();
 951   for $AtomName (@{$OptionsInfo{SpecifiedAtomNamesList}}) {
 952     $OptionsInfo{SpecifiedAtomNamesMap}{lc $AtomName} = lc $AtomName;
 953   }
 954 
 955 }
 956 
 957 # Process specified distance options...
 958 sub ProcessDistanceOptions {
 959   my(@SpecifiedDistanceOrigin) = ();
 960 
 961   $OptionsInfo{MaxExtractionDistance} = $Options{distance};
 962   $OptionsInfo{ExtractionDistanceMode} = $Options{distancemode};
 963   $OptionsInfo{ExtractionDistanceOrigin} = $Options{distanceorigin} ? $Options{distanceorigin} : '';
 964   $OptionsInfo{DistanceSelectionMode} = $Options{distanceselectionmode};
 965 
 966   if ($OptionsInfo{Mode} =~ /^Distance$/i) {
 967     if (!$Options{distanceorigin}) {
 968       die "Error: You must specify a value for \"--distanceorigin\" option in \"Distance\" \"-m, --mode\". \n";
 969     }
 970     @SpecifiedDistanceOrigin = split /\,/, $Options{distanceorigin};
 971     if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) {
 972       if (@SpecifiedDistanceOrigin != 2) {
 973         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\" : The number of values must be 2.\n";
 974       }
 975       if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
 976         die "Error: Invalid atom number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Atom\" value of \"--distancemode\". Allowed values: > 0\n";
 977       }
 978     }
 979     elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) {
 980       if (@SpecifiedDistanceOrigin != 2) {
 981         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\" : The number of values must be 2.\n";
 982       }
 983       if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
 984         die "Error: Invalid hetatm number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Hetatm\" value of \"--distancemode\". Allowed values: > 0\n";
 985       }
 986     }
 987     elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
 988       if (!(@SpecifiedDistanceOrigin == 2 || @SpecifiedDistanceOrigin == 3)) {
 989         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\" : The number of values must be either 2 or 3.\n";
 990       }
 991       if (!IsPositiveInteger($SpecifiedDistanceOrigin[0])) {
 992         die "Error: Invalid residue number value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"Residue\" value of \"--distancemode\". Allowed values: > 0\n";
 993       }
 994     }
 995     elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
 996       if (@SpecifiedDistanceOrigin != 3) {
 997         die "Error: Invalid number of values, ", scalar(@SpecifiedDistanceOrigin), " for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\" : The number of values must be 3.\n";
 998       }
 999       my($Value);
1000       for $Value (@SpecifiedDistanceOrigin) {
1001         if (!IsNumerical($Value)) {
1002           die "Error: Invalid coordinate value, ", $SpecifiedDistanceOrigin[0], ", for option \"distanceorigin\" option during \"XYZ\" value of \"--distancemode\". Allowed values: numerical\n";
1003         }
1004       }
1005     }
1006   }
1007   @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} = ();
1008   push @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}}, @SpecifiedDistanceOrigin;
1009 
1010 }
1011 
1012 # Retrieve information about PDB files...
1013 sub RetrievePDBFilesInfo {
1014   my($Index, $PDBFile, $PDBRecordLinesRef, $ChainID, $ChainLabel, $ChainsAndResiduesInfoRef, $Mode, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, @SpecifiedChains, @DistanceOrigin, @OutFileNames, @ChainLabels, @ChainSequenceIDs, @ChainSequenceIDsPrefix);
1015 
1016   %PDBFilesInfo = ();
1017   @{$PDBFilesInfo{FileOkay}} = ();
1018   @{$PDBFilesInfo{OutFileRoot}} = ();
1019   @{$PDBFilesInfo{OutFileNames}} = ();
1020   @{$PDBFilesInfo{ChainLabels}} = ();
1021   @{$PDBFilesInfo{ChainSequenceIDs}} = ();
1022   @{$PDBFilesInfo{ChainSequenceIDsPrefix}} = ();
1023   @{$PDBFilesInfo{SpecifiedChains}} = ();
1024   @{$PDBFilesInfo{DistanceOrigin}} = ();
1025 
1026   FILELIST: for $Index (0 .. $#PDBFilesList) {
1027     $PDBFilesInfo{FileOkay}[$Index] = 0;
1028 
1029     $PDBFilesInfo{OutFileRoot}[$Index] = '';
1030     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
1031     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
1032     @{$PDBFilesInfo{ChainLabels}[$Index]} = ();
1033     @{$PDBFilesInfo{ChainSequenceIDs}[$Index]} = ();
1034     @{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]} = ();
1035     @{$PDBFilesInfo{SpecifiedChains}[$Index]} = ();
1036     @{$PDBFilesInfo{DistanceOrigin}[$Index]} = ();
1037 
1038     $PDBFile = $PDBFilesList[$Index];
1039     if (!(-e $PDBFile)) {
1040       warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
1041       next FILELIST;
1042     }
1043     if (!CheckFileType($PDBFile, "pdb")) {
1044       warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
1045       next FILELIST;
1046     }
1047     if (! open PDBFILE, "$PDBFile") {
1048       warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
1049       next FILELIST;
1050     }
1051     close PDBFILE;
1052 
1053     # Get PDB data...
1054     $PDBRecordLinesRef = ReadPDBFile($PDBFile);
1055     if ($OptionsInfo{Mode} =~ /^Sequences$/i && $OptionsInfo{SequenceRecordSource} =~ /^SeqRes$/i) {
1056       $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef, 'SeqRes');
1057     }
1058     else {
1059       $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
1060     }
1061     if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
1062       warn "Warning: Ignoring file $PDBFile: No chains found \n";
1063       next FILELIST;
1064     }
1065 
1066     # Make sure specified chains exist in PDB file...
1067     @SpecifiedChains = ();
1068     if ($OptionsInfo{ChainsToExtract} =~ /^Specified$/i) {
1069       for $ChainID (@{$OptionsInfo{SpecifiedChains}}) {
1070         if (exists $ChainsAndResiduesInfoRef->{Residues}{$ChainID}) {
1071           push @SpecifiedChains, $ChainID;
1072         }
1073         else {
1074           warn "Warning: Ignoring file $PDBFile: Specified chain, $ChainID, in \"-c, --chains\" option doesn't exist.\n";
1075           next FILELIST;
1076         }
1077       }
1078     }
1079     elsif ($OptionsInfo{ChainsToExtract} =~ /^First$/i) {
1080       push @SpecifiedChains, $ChainsAndResiduesInfoRef->{ChainIDs}[0];
1081     }
1082     elsif ($OptionsInfo{ChainsToExtract} =~ /^All$/i) {
1083       push @SpecifiedChains, @{$ChainsAndResiduesInfoRef->{ChainIDs}};
1084     }
1085     # Setup chain labels to use for sequence IDs and generating output files...
1086     @ChainLabels = ();
1087     for $ChainID (@SpecifiedChains) {
1088       $ChainLabel = $ChainID; $ChainLabel =~ s/^None//ig;
1089       $ChainLabel = "Chain${ChainLabel}";
1090       push @ChainLabels, $ChainLabel;
1091     }
1092 
1093     # Make sure specified distance origin is valid...
1094     @DistanceOrigin = ();
1095     if ($OptionsInfo{Mode} =~ /^Distance$/i) {
1096       if ($OptionsInfo{ExtractionDistanceMode} =~ /^(Atom|Hetatm)$/i) {
1097         my($RecordType, $SpecifiedAtomName, $SpecifiedAtomNumber, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine);
1098         $RecordType = $OptionsInfo{ExtractionDistanceMode};
1099         ($SpecifiedAtomNumber, $SpecifiedAtomName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1100         $RecordFound = 0;
1101         LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
1102           if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
1103             next LINE;
1104           }
1105           ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
1106           $AtomName = RemoveLeadingAndTrailingWhiteSpaces($AtomName);
1107           if (($RecordType =~ /^Atom$/i && IsAtomRecordType($RecordLine)) || ($RecordType =~ /^Hetatm$/i && IsHetatmRecordType($RecordLine))) {
1108             if ($AtomNumber == $SpecifiedAtomNumber && $AtomName eq $SpecifiedAtomName) {
1109               $RecordFound = 1;
1110               last LINE;
1111             }
1112           }
1113         }
1114         if (!$RecordFound) {
1115           warn "Warning: Ignoring file $PDBFile: ", uc($RecordType), " record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n";
1116           next FILELIST;
1117         }
1118         push @DistanceOrigin, ($X, $Y, $Z);
1119       }
1120       elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
1121         my($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID, $RecordFound, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $RecordLine);
1122         $SpecifiedChainID = '';
1123         if (@{$OptionsInfo{SpecifiedExtractionDistanceOrigin}} == 3) {
1124           ($SpecifiedResidueNumber, $SpecifiedResidueName, $SpecifiedChainID) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1125         }
1126         else {
1127           ($SpecifiedResidueNumber, $SpecifiedResidueName) = @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1128         }
1129         $RecordFound = 0;
1130         LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
1131           if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
1132             next LINE;
1133           }
1134           ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z) = ParseAtomRecordLine($RecordLine);
1135           $ResidueName = RemoveLeadingAndTrailingWhiteSpaces($ResidueName);
1136           $ChainID = RemoveLeadingAndTrailingWhiteSpaces($ChainID);
1137           if ($SpecifiedChainID && ($SpecifiedChainID ne $ChainID)) {
1138             next LINE;
1139           }
1140           if ($ResidueNumber == $SpecifiedResidueNumber && $ResidueName eq $SpecifiedResidueName) {
1141             # Store coordinates for all the atoms...
1142             $RecordFound = 1;
1143             push @DistanceOrigin, ($X, $Y, $Z);
1144             next LINE;
1145           }
1146         }
1147         if (!$RecordFound) {
1148           warn "Warning: Ignoring file $PDBFile: ATOM/HETATM record corresponding to \"--distanceorigin\" option value, $OptionsInfo{ExtractionDistanceOrigin}, doesn't exist.\n";
1149           next FILELIST;
1150         }
1151       }
1152       elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
1153         push @DistanceOrigin, @{$OptionsInfo{SpecifiedExtractionDistanceOrigin}};
1154       }
1155     }
1156     # Setup output file names...
1157     @OutFileNames = ();
1158     $FileDir = ""; $FileName = ""; $FileExt = "";
1159     ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
1160     if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
1161       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
1162       if ($RootFileName && $RootFileExt) {
1163         $FileName = $RootFileName;
1164       }
1165       else {
1166         $FileName = $OptionsInfo{OutFileRoot};
1167       }
1168       $OutFileRoot = $FileName;
1169     }
1170     else {
1171       $OutFileRoot = $FileName;
1172     }
1173     $Mode = $OptionsInfo{Mode};
1174     if ($Mode =~ /^(Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResiduesRange|ResidueNames|Distance|NonWater|NonHydrogens)$/i) {
1175       $OutFileName = '';
1176       if ($Mode =~ /^CAlphas$/i) {
1177         $OutFileName = "${OutFileRoot}CAlphas.pdb";
1178       }
1179       elsif ($Mode =~ /^Atoms$/i) {
1180         $OutFileName = "${OutFileRoot}Atoms.pdb";
1181       }
1182       elsif ($Mode =~ /^AtomNums$/i) {
1183         $OutFileName = "${OutFileRoot}AtomNums.pdb";
1184       }
1185       elsif ($Mode =~ /^AtomsRange$/i) {
1186         $OutFileName = "${OutFileRoot}AtomsRange.pdb";
1187       }
1188       elsif ($Mode =~ /^AtomNames$/i) {
1189         $OutFileName = "${OutFileRoot}AtomNames.pdb";
1190       }
1191       elsif ($Mode =~ /^ResidueNums$/i) {
1192         $OutFileName = "${OutFileRoot}ResidueNums.pdb";
1193       }
1194       elsif ($Mode =~ /^ResiduesRange$/i) {
1195         $OutFileName = "${OutFileRoot}ResiduesRange.pdb";
1196       }
1197       elsif ($Mode =~ /^ResidueNames$/i) {
1198         $OutFileName = "${OutFileRoot}ResidueNames.pdb";
1199       }
1200       elsif ($Mode =~ /^NonWater$/i) {
1201         $OutFileName = "${OutFileRoot}NonWater.pdb";
1202       }
1203       elsif ($Mode =~ /^NonHydrogens$/i) {
1204         $OutFileName = "${OutFileRoot}NonHydrogens.pdb";
1205       }
1206       elsif ($Mode =~ /^Distance$/i) {
1207         my($DistanceMode) = '';
1208         if ($OptionsInfo{ExtractionDistanceMode} =~ /^Atom$/i) {
1209           $DistanceMode = 'Atom';
1210         }
1211         elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Hetatm$/i) {
1212           $DistanceMode = 'Hetatm';
1213         }
1214         elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^Residue$/i) {
1215           $DistanceMode = 'Residue';
1216         }
1217         elsif ($OptionsInfo{ExtractionDistanceMode} =~ /^XYZ$/i) {
1218           $DistanceMode = 'XYZ';
1219         }
1220         $OutFileName = "${OutFileRoot}DistanceBy${DistanceMode}.pdb";
1221       }
1222       push @OutFileNames, $OutFileName;
1223       if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
1224         warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
1225         next FILELIST;
1226       }
1227     }
1228     elsif ($Mode =~ /^(Chains|Sequences)$/i) {
1229       if ($OptionsInfo{CombineChainSequences}) {
1230         $OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}ExtractedChains.pdb" : "${OutFileRoot}SequencesChainsCombined.fasta";
1231         push @OutFileNames, $OutFileName;
1232         if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
1233           warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
1234           next FILELIST;
1235         }
1236       }
1237       else {
1238         for $ChainLabel (@ChainLabels) {
1239           $OutFileName = ($Mode =~ /^Chains$/i) ? "${OutFileRoot}${ChainLabel}.pdb" : "${OutFileRoot}Sequences${ChainLabel}.fasta";
1240           push @OutFileNames, $OutFileName;
1241           if (!$OptionsInfo{OverwriteFiles} && (-e $OutFileName)) {
1242             warn "Warning: Ignoring file $PDBFile: The file $OutFileName already exists\n";
1243             next FILELIST;
1244           }
1245         }
1246       }
1247     }
1248     @ChainSequenceIDs = ();
1249     @ChainSequenceIDsPrefix = ();
1250     if ($Mode =~ /^Sequences$/i) {
1251       my($HeaderRecordLine, $Classification, $DepositionDate, $IDCode, $IDPrefix);
1252       ($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
1253 
1254       if ($OptionsInfo{SequenceIDPrefixSource} =~ /^FileName$/i) {
1255         $IDPrefix = $FileName;
1256       }
1257       elsif ($OptionsInfo{SequenceIDPrefixSource} =~ /^HeaderRecord$/i) {
1258         $IDPrefix = IsNotEmpty($IDCode) ? $IDCode : '';
1259       }
1260       else {
1261         $IDPrefix = IsNotEmpty($IDCode) ? $IDCode : $FileName;
1262       }
1263 
1264       for $ChainLabel (@ChainLabels) {
1265         push @ChainSequenceIDsPrefix, $IDPrefix;
1266         push @ChainSequenceIDs, "${IDPrefix}_${ChainLabel}|PDB";
1267       }
1268     }
1269 
1270     $PDBFilesInfo{FileOkay}[$Index] = 1;
1271     $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
1272 
1273     push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
1274     push @{$PDBFilesInfo{ChainLabels}[$Index]}, @ChainLabels;
1275     push @{$PDBFilesInfo{ChainSequenceIDsPrefix}[$Index]}, @ChainSequenceIDsPrefix;
1276     push @{$PDBFilesInfo{ChainSequenceIDs}[$Index]}, @ChainSequenceIDs;
1277     push @{$PDBFilesInfo{SpecifiedChains}[$Index]}, @SpecifiedChains;
1278     push @{$PDBFilesInfo{DistanceOrigin}[$Index]}, @DistanceOrigin;
1279   }
1280 }
1281 
1282 
1283 # Setup script usage  and retrieve command line arguments specified using various options...
1284 sub SetupScriptUsage {
1285 
1286   # Retrieve all the options...
1287   %Options = ();
1288   $Options{chains} = 'First';
1289   $Options{chainsrecordmode} = 'NotAcrossTER';
1290   $Options{combinechains} = 'no';
1291   $Options{distance} = 10.0;
1292   $Options{distancemode} = 'XYZ';
1293   $Options{distanceselectionmode} = 'ByAtom';
1294   $Options{keepoldrecords} = 'no';
1295   $Options{mode} = 'NonWater';
1296   $Options{modifyheader} = 'yes';
1297   $Options{nonstandardkeep} = 'yes';
1298   $Options{nonstandardcode} = 'X';
1299   $Options{sequencelength} = 80;
1300   $Options{sequenceidprefix} = 'Automatic';
1301   $Options{sequencerecords} = 'Atom';
1302   $Options{waterresiduenames} = 'Automatic';
1303 
1304   if (!GetOptions(\%Options, "atoms|a=s", "chains|c=s", "chainsrecordmode=s", "combinechains=s", "distance|d=f", "distancemode=s", "distanceorigin=s", "distanceselectionmode=s", "help|h", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "nonstandardkeep=s", "nonstandardcode=s", "overwrite|o", "root|r=s", "recordmode=s", "residues=s", "sequencelength=i", "sequenceidprefix=s", "sequencerecords=s", "waterresiduenames=s", "workingdir|w=s")) {
1305     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
1306   }
1307   if ($Options{workingdir}) {
1308     if (! -d $Options{workingdir}) {
1309       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
1310     }
1311     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
1312   }
1313   if ($Options{combinechains} !~ /^(yes|no)$/i) {
1314     die "Error: The value specified, $Options{combinechains}, for option \"--CombineChains\" is not valid. Allowed values: yes or no\n";
1315   }
1316   if ($Options{chainsrecordmode} !~ /^(AcrossTER|NotAcrossTER)$/i) {
1317     die "Error: The value specified, $Options{chainsrecordmode}, for option \"--ChainsRecordMode\" is not valid. Allowed values: AcrossTER or NotAcrossTER\n";
1318   }
1319   if ($Options{distancemode} !~ /^(Atom|Hetatm|Residue|XYZ)$/i) {
1320     die "Error: The value specified, $Options{distancemode}, for option \"--DistanceMode\" is not valid. Allowed values: Atom, Hetatm, Residue, or XYZ\n";
1321   }
1322   if ($Options{distanceselectionmode} !~ /^(ByAtom|ByResidue)$/i) {
1323     die "Error: The value specified, $Options{distanceselectionmode}, for option \"--DistanceSelectionMode\" is not valid. Allowed values: ByAtom or ByResidue\n";
1324   }
1325   if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
1326     die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
1327   }
1328   if ($Options{mode} !~ /^(Chains|Sequences|Atoms|CAlphas|AtomNums|AtomsRange|AtomNames|ResidueNums|ResidueNames|ResiduesRange|Distance|NonWater|NonHydrogens)$/i) {
1329     die "Error: The value specified, $Options{mode}, for option \"m, --mode\" is not valid. Allowed values: Chains, Sequences, Atoms, CAlphas, AtomNums, AtomsRange, AtomNames, ResidueNums, ResiduesRange, ResidueNames, Distance, NonWater, NonHydrogens\n";
1330   }
1331   if ($Options{modifyheader} !~ /^(yes|no)$/i) {
1332     die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
1333   }
1334   if ($Options{nonstandardkeep} !~ /^(yes|no)$/i) {
1335     die "Error: The value specified, $Options{nonstandardkeep}, for option \"--NonStandardKeep\" is not valid. Allowed values: yes or no\n";
1336   }
1337   if ($Options{nonstandardcode} !~ /^(\?|\-|X)$/i) {
1338     die "Error: The value specified, $Options{nonstandardcode}, for option \"--NonStandardCode\" is not valid. Allowed values: ?, -, or X\n";
1339   }
1340   if ($Options{recordmode} && $Options{recordmode} !~ /^(Atom|Hetatm|AtomAndHetatm)$/i) {
1341     die "Error: The value specified, $Options{recordmode}, for option \"--RecordMode\" is not valid. Allowed values: Atom, Hetatm, AtomAndHetatm\n";
1342   }
1343   if (!IsPositiveInteger($Options{sequencelength})) {
1344     die "Error: The value specified, $Options{sequencelength}, for option \"--SequenceLength\" is not valid. Allowed values: >0\n";
1345   }
1346   if ($Options{sequencerecords} !~ /^(Atom|SeqRes)$/i) {
1347     die "Error: The value specified, $Options{sequencerecords}, for option \"--SequenceRecords\" is not valid. Allowed values: Atom or SeqRes\n";
1348   }
1349   if ($Options{sequenceidprefix} !~ /^(FileName|HeaderRecord|Automatic)$/i) {
1350     die "Error: The value specified, $Options{sequenceidprefix}, for option \"--SequenceIDPrefix\" is not valid. Allowed values: FileName, HeaderRecord, or AutomaticAtom\n";
1351   }
1352 }
1353