MayaChemTools

   1 #!/usr/bin/perl -w
   2 #
   3 # File: ModifyPDBFiles.pl
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use FindBin; use lib "$FindBin::Bin/../lib";
  28 use Getopt::Long;
  29 use File::Basename;
  30 use Text::ParseWords;
  31 use Benchmark;
  32 use FileUtil;
  33 use TextUtil;
  34 use PDBFileUtil;
  35 
  36 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime);
  37 
  38 # Autoflush STDOUT
  39 $| = 1;
  40 
  41 # Starting message...
  42 $ScriptName = basename($0);
  43 print "\n$ScriptName: Starting...\n\n";
  44 $StartTime = new Benchmark;
  45 
  46 # Get the options and setup script...
  47 SetupScriptUsage();
  48 if ($Options{help} || @ARGV < 1) {
  49   die GetUsageFromPod("$FindBin::Bin/$ScriptName");
  50 }
  51 
  52 my(@PDBFilesList);
  53 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb");
  54 
  55 # Process options...
  56 print "Processing options...\n";
  57 my(%OptionsInfo);
  58 ProcessOptions();
  59 
  60 # Setup information about input files...
  61 print "Checking input PDB file(s)...\n";
  62 my(%PDBFilesInfo);
  63 RetrievePDBFilesInfo();
  64 
  65 # Process input files..
  66 my($FileIndex);
  67 if (@PDBFilesList > 1) {
  68   print "\nProcessing PDB files...\n";
  69 }
  70 for $FileIndex (0 .. $#PDBFilesList) {
  71   if ($PDBFilesInfo{FileOkay}[$FileIndex]) {
  72     print "\nProcessing file $PDBFilesList[$FileIndex]...\n";
  73     ModifyPDBFiles($FileIndex);
  74   }
  75 }
  76 print "\n$ScriptName:Done...\n\n";
  77 
  78 $EndTime = new Benchmark;
  79 $TotalTime = timediff ($EndTime, $StartTime);
  80 print "Total time: ", timestr($TotalTime), "\n";
  81 
  82 ###############################################################################
  83 
  84 # Modify appropriate information...
  85 sub ModifyPDBFiles {
  86   my($FileIndex) = @_;
  87   my($PDBFile, $PDBRecordLinesRef);
  88 
  89   # Get PDB data...
  90   $PDBFile = $PDBFilesList[$FileIndex];
  91   $PDBRecordLinesRef = ReadPDBFile($PDBFile);
  92 
  93   if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) {
  94     RenumberAtoms($FileIndex, $PDBRecordLinesRef);
  95   }
  96   elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) {
  97     RenumberResidues($FileIndex, $PDBRecordLinesRef);
  98   }
  99   elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) {
 100     RenumberWaters($FileIndex, $PDBRecordLinesRef);
 101   }
 102   elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) {
 103     RenameChainsIDs($FileIndex, $PDBRecordLinesRef);
 104   }
 105 }
 106 
 107 # Renumber atom and hetro atom numbers...
 108 sub RenumberAtoms {
 109   my($FileIndex, $PDBRecordLinesRef) = @_;
 110   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap);
 111 
 112   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 113   print "Generating PDBFileName file $PDBFileName...\n";
 114   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 115 
 116   # Write out header and other older recors...
 117   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 118 
 119   # Write out all ATOM records along with TER and model records to indicate
 120   # chains and multiple models..
 121   %OldToNewAtomNumbersMap = ();
 122   $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
 123   for $RecordLine (@{$PDBRecordLinesRef}) {
 124     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 125       $RecordType = GetPDBRecordType($RecordLine);
 126 
 127       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 128 
 129       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 130 
 131       $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber;
 132       $NewAtomNumber++;
 133     }
 134     elsif (IsTerRecordType($RecordLine)) {
 135       $NewAtomNumber++;
 136       print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n";
 137     }
 138     elsif (IsModelRecordType($RecordLine)) {
 139       print OUTFILE "$RecordLine\n";
 140     }
 141     elsif (IsEndmdlRecordType($RecordLine)) {
 142       print OUTFILE "$RecordLine\n";
 143       # Restart numbering...
 144       $NewAtomNumber = $OptionsInfo{StartingAtomNumber};
 145     }
 146   }
 147 
 148   # Write out modified CONECT records...
 149   my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums);
 150   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 151     if (!IsConectRecordType($RecordLine)) {
 152       next LINE;
 153     }
 154     @ConectAtomNums = ();
 155     @ModifiedConectAtomNums = ();
 156     push @ConectAtomNums, ParseConectRecordLine($RecordLine);
 157     ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) {
 158       $ModifiedConectAtomNum = $ConectAtomNum;
 159       if (defined($ConectAtomNum)) {
 160         $AtomNumber = $ConectAtomNum;
 161         if ($AtomNumber) {
 162           if (exists $OldToNewAtomNumbersMap{$AtomNumber}) {
 163             $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber};
 164           }
 165         }
 166       }
 167       push @ModifiedConectAtomNums, $ModifiedConectAtomNum;
 168     }
 169     # Write out the record...
 170     print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n";
 171   }
 172 
 173   # Write out END record...
 174   print OUTFILE GenerateEndRecordLine(), "\n";
 175 
 176   close OUTFILE;
 177 }
 178 
 179 # Renumber residues...
 180 sub RenumberResidues {
 181   my($FileIndex, $PDBRecordLinesRef) = @_;
 182   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType);
 183 
 184   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 185   print "Generating PDBFileName file $PDBFileName...\n";
 186   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 187 
 188   # Write out header and other older recors...
 189   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 190 
 191   # Do a quick count of all TER records...
 192   $TotalTERCount = 0;
 193   for $RecordLine (@{$PDBRecordLinesRef}) {
 194     if (IsTerRecordType($RecordLine)) {
 195       $TotalTERCount++;
 196     }
 197   }
 198 
 199   # Write out all ATOM records along with TER and model records to indicate
 200   # chains and multiple models..
 201   $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
 202   $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber};
 203 
 204   $TERCount = 0;
 205   $PreviousResidueNumber = 0;
 206   $PreviousHetatmResidueNumber = 0;
 207 
 208   for $RecordLine (@{$PDBRecordLinesRef}) {
 209     if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) {
 210       $RecordType = GetPDBRecordType($RecordLine);
 211       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 212 
 213       if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) {
 214         $PreviousResidueNumber = $ResidueNumber;
 215         $NewResidueNumber++;
 216       }
 217       else {
 218         # First residue in a chain...
 219         $PreviousResidueNumber = $ResidueNumber;
 220       }
 221       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 222 
 223     }
 224     elsif (IsHetatmRecordType($RecordLine)) {
 225       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine);
 226 
 227       # User HETATM residue numbers...
 228       if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) {
 229         $PreviousHetatmResidueNumber = $ResidueNumber;
 230         $NewHetatmResidueNumber++;
 231       }
 232       else {
 233         # First HETATM residue outside a chain...
 234         $PreviousHetatmResidueNumber = $ResidueNumber;
 235       }
 236 
 237       print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 238     }
 239     elsif (IsTerRecordType($RecordLine)) {
 240       $TERCount++;
 241       $AtomNumber++;
 242       print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n";
 243       # For per chain numbering, start over again...
 244       if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) {
 245         if ($TERCount < $TotalTERCount ) {
 246           $NewResidueNumber = $OptionsInfo{StartingResidueNumber};
 247         }
 248         $PreviousResidueNumber = 0;
 249       }
 250     }
 251     elsif (IsModelRecordType($RecordLine)) {
 252       print OUTFILE "$RecordLine\n";
 253     }
 254     elsif (IsEndmdlRecordType($RecordLine)) {
 255       print OUTFILE "$RecordLine\n";
 256     }
 257   }
 258 
 259   # Write out CONECT records...
 260   for $RecordLine (@{$PDBRecordLinesRef}) {
 261     if (IsConectRecordType($RecordLine)) {
 262       print OUTFILE "$RecordLine\n";
 263     }
 264   }
 265 
 266   # Write out END record...
 267   print OUTFILE GenerateEndRecordLine(), "\n";
 268 
 269   close OUTFILE;
 270 }
 271 
 272 # Renumber water residues...
 273 sub RenumberWaters {
 274   my($FileIndex, $PDBRecordLinesRef) = @_;
 275   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType);
 276 
 277   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 278   print "Generating PDBFileName file $PDBFileName...\n";
 279   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 280 
 281   # Write out header and other older recors...
 282   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 283 
 284   # Write out all ATOM records along with TER and model records to indicate
 285   # chains and multiple models..
 286   $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber};
 287   for $RecordLine (@{$PDBRecordLinesRef}) {
 288     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 289       $RecordType = GetPDBRecordType($RecordLine);
 290 
 291       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 292 
 293       if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
 294         $ResidueNumber = $NewResidueNumber;
 295         print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 296         $NewResidueNumber++;
 297       }
 298       else {
 299         print OUTFILE "$RecordLine\n";
 300       }
 301     }
 302     elsif (IsTerRecordType($RecordLine)) {
 303       print OUTFILE "$RecordLine\n";
 304     }
 305     elsif (IsModelRecordType($RecordLine)) {
 306       print OUTFILE "$RecordLine\n";
 307     }
 308     elsif (IsEndmdlRecordType($RecordLine)) {
 309       print OUTFILE "$RecordLine\n";
 310     }
 311   }
 312 
 313   # Write out CONECT records...
 314   for $RecordLine (@{$PDBRecordLinesRef}) {
 315     if (IsConectRecordType($RecordLine)) {
 316       print OUTFILE "$RecordLine\n";
 317     }
 318   }
 319 
 320   # Write out END record...
 321   print OUTFILE GenerateEndRecordLine(), "\n";
 322 
 323   close OUTFILE;
 324 }
 325 
 326 # Rename chain IDs...
 327 sub RenameChainsIDs {
 328   my($FileIndex, $PDBRecordLinesRef) = @_;
 329   my($PDBFileName,  $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap);
 330 
 331   $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0];
 332   print "Generating PDBFileName file $PDBFileName...\n";
 333   open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n";
 334 
 335   # Write out header and other older recors...
 336   WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef);
 337 
 338   # Write out all ATOM records along with TER and model records to indicate
 339   # chains and multiple models..
 340   %OldToNewChainIDsMap = ();
 341   $NewChainIDCounter = $OptionsInfo{StartingChainID};
 342   $FirstChainID = 1;
 343   $PreviousChainID = '';
 344   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 345     if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 346       $RecordType = GetPDBRecordType($RecordLine);
 347 
 348       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine);
 349 
 350       if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) {
 351         # Chain IDs are not assigned to water residues...
 352         print OUTFILE "$RecordLine\n";
 353         next LINE;
 354       }
 355 
 356       if ($FirstChainID) {
 357         $FirstChainID = 0;
 358         $PreviousChainID = $ChainID;
 359         if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
 360           $NewChainID = $NewChainIDCounter;
 361           $OldToNewChainIDsMap{$ChainID} = $NewChainID;
 362         }
 363         else {
 364           $NewChainID = '';
 365         }
 366       }
 367       elsif ($PreviousChainID ne $ChainID) {
 368         if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) {
 369           $PreviousChainID = $ChainID;
 370           if (exists $OldToNewChainIDsMap{$ChainID}) {
 371             $NewChainID = $OldToNewChainIDsMap{$ChainID};
 372           }
 373           else {
 374             $NewChainIDCounter++;
 375             $NewChainID = $NewChainIDCounter;
 376             $OldToNewChainIDsMap{$ChainID} = $NewChainID;
 377           }
 378         }
 379         else {
 380           $NewChainID = '';
 381         }
 382       }
 383 
 384       print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n";
 385     }
 386     elsif (IsTerRecordType($RecordLine)) {
 387       $AtomNumber++;
 388       print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n";
 389     }
 390     elsif (IsModelRecordType($RecordLine)) {
 391       print OUTFILE "$RecordLine\n";
 392     }
 393     elsif (IsEndmdlRecordType($RecordLine)) {
 394       print OUTFILE "$RecordLine\n";
 395     }
 396   }
 397 
 398   # Write out CONECT records...
 399   for $RecordLine (@{$PDBRecordLinesRef}) {
 400     if (IsConectRecordType($RecordLine)) {
 401       print OUTFILE "$RecordLine\n";
 402     }
 403   }
 404 
 405   # Write out END record...
 406   print OUTFILE GenerateEndRecordLine(), "\n";
 407 
 408   close OUTFILE;
 409 }
 410 
 411 
 412 # Write out modifed header and other older records...
 413 sub WriteHeaderAndOlderRecords {
 414   my($OutFileRef, $PDBRecordLinesRef) = @_;
 415 
 416   if ($OptionsInfo{ModifyHeaderRecord}) {
 417     # Write out modified HEADER record...
 418     my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef);
 419     $Classification = 'Data modified using MayaChemTools';
 420     print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n";
 421   }
 422   else {
 423     print $OutFileRef $PDBRecordLinesRef->[0], "\n";
 424   }
 425 
 426   # Write out any old records...
 427   if ($OptionsInfo{KeepOldRecords}) {
 428     my($RecordLineIndex, $RecordLine);
 429     # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file...
 430     RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) {
 431       $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex];
 432       if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) {
 433         last RECORDLINE;
 434       }
 435       print $OutFileRef "$RecordLine\n";
 436     }
 437   }
 438 }
 439 
 440 # Get header record information assuming it's the first record...
 441 sub GetHeaderRecordInformation {
 442   my($PDBRecordLinesRef) = @_;
 443   my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine);
 444 
 445   ($Classification, $DepositionDate, $IDCode) = ('') x 3;
 446   $HeaderRecordLine = $PDBRecordLinesRef->[0];
 447   if (IsHeaderRecordType($HeaderRecordLine)) {
 448     ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine);
 449   }
 450   return ($Classification, $DepositionDate, $IDCode);
 451 }
 452 
 453 
 454 # Process option values...
 455 sub ProcessOptions {
 456   %OptionsInfo = ();
 457   $OptionsInfo{Mode} = $Options{mode};
 458 
 459   $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart};
 460   $OptionsInfo{StartingChainID} = $Options{chainidstart};
 461   $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0;
 462 
 463   $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0;
 464   $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0;
 465 
 466   $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode};
 467   $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart};
 468 
 469   $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode};
 470   $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm};
 471 
 472   $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0;
 473   $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0;
 474 
 475   $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames};
 476   $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart};
 477   @{$OptionsInfo{SpecifiedWaterResiduesList}} = ();
 478   %{$OptionsInfo{SpecifiedWaterResiduesMap}} = ();
 479 
 480   my(@SpecifiedWaterResiduesList);
 481   @SpecifiedWaterResiduesList = ();
 482   my($WaterResidueName);
 483   if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) {
 484     push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O');
 485   }
 486   else {
 487     @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames};
 488   }
 489   for $WaterResidueName (@SpecifiedWaterResiduesList) {
 490     $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName;
 491   }
 492   push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList;
 493 }
 494 
 495 # Retrieve information about PDB files...
 496 sub RetrievePDBFilesInfo {
 497   my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot,  $Mode, $OutFileMode, @OutFileNames);
 498 
 499   %PDBFilesInfo = ();
 500   @{$PDBFilesInfo{FileOkay}} = ();
 501   @{$PDBFilesInfo{OutFileRoot}} = ();
 502   @{$PDBFilesInfo{OutFileNames}} = ();
 503 
 504   FILELIST: for $Index (0 .. $#PDBFilesList) {
 505     $PDBFilesInfo{FileOkay}[$Index] = 0;
 506 
 507     $PDBFilesInfo{OutFileRoot}[$Index] = '';
 508     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
 509     @{$PDBFilesInfo{OutFileNames}[$Index]} = ();
 510 
 511     $PDBFile = $PDBFilesList[$Index];
 512     if (!(-e $PDBFile)) {
 513       warn "Warning: Ignoring file $PDBFile: It doesn't exist\n";
 514       next FILELIST;
 515     }
 516     if (!CheckFileType($PDBFile, "pdb")) {
 517       warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n";
 518       next FILELIST;
 519     }
 520     if (! open PDBFILE, "$PDBFile") {
 521       warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n";
 522       next FILELIST;
 523     }
 524     close PDBFILE;
 525 
 526     # Get PDB data...
 527     $PDBRecordLinesRef = ReadPDBFile($PDBFile);
 528     $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef);
 529     if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) {
 530       warn "Warning: Ignoring file $PDBFile: No chains found \n";
 531       next FILELIST;
 532     }
 533 
 534     # Setup output file names...
 535     @OutFileNames = ();
 536     $FileDir = ""; $FileName = ""; $FileExt = "";
 537     ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile);
 538     if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) {
 539       my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot});
 540       if ($RootFileName && $RootFileExt) {
 541         $FileName = $RootFileName;
 542       }
 543       else {
 544         $FileName = $OptionsInfo{OutFileRoot};
 545       }
 546       $OutFileRoot = $FileName;
 547     }
 548     else {
 549       $OutFileRoot = $FileName;
 550     }
 551     $Mode = $OptionsInfo{Mode};
 552     MODE: {
 553         if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;}
 554         if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;}
 555         if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;}
 556         if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;}
 557         $OutFileMode = '';
 558     }
 559     $OutFileName = "${OutFileRoot}${OutFileMode}.pdb";
 560     push @OutFileNames, $OutFileName;
 561 
 562     $PDBFilesInfo{FileOkay}[$Index] = 1;
 563     $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot;
 564 
 565     push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames;
 566   }
 567 }
 568 
 569 # Setup script usage  and retrieve command line arguments specified using various options...
 570 sub SetupScriptUsage {
 571 
 572   # Retrieve all the options...
 573   %Options = ();
 574   $Options{atomnumberstart} = 1;
 575   $Options{chainidstart} = 'A';
 576   $Options{chainidrenameempty} = 'No';
 577   $Options{keepoldrecords} = 'no';
 578   $Options{mode} = 'RenumberResidues';
 579   $Options{modifyheader} = 'yes';
 580   $Options{residuenumbermode} = 'PerChain';
 581   $Options{residuenumberstart} = 1;
 582   $Options{residuenumberhetatmmode} = 'Automatic';
 583   $Options{residuenumberstarthetatm} = 6000;
 584   $Options{waterresiduenames} = 'Automatic';
 585   $Options{waterresiduestart} = 8000;
 586 
 587   if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) {
 588     die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n";
 589   }
 590   if ($Options{workingdir}) {
 591     if (! -d $Options{workingdir}) {
 592       die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n";
 593     }
 594     chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n";
 595   }
 596   if (!IsPositiveInteger($Options{atomnumberstart})) {
 597     die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n";
 598   }
 599   if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) {
 600     die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n";
 601   }
 602   if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) {
 603     die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n";
 604   }
 605   if ($Options{keepoldrecords} !~ /^(yes|no)$/i) {
 606     die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n";
 607   }
 608   if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) {
 609     die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n";
 610   }
 611   if ($Options{modifyheader} !~ /^(yes|no)$/i) {
 612     die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n";
 613   }
 614   if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) {
 615     die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n";
 616   }
 617   if (!IsPositiveInteger($Options{residuenumberstart})) {
 618     die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n";
 619   }
 620   if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) {
 621     die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n";
 622   }
 623   if (!IsPositiveInteger($Options{residuenumberstarthetatm})) {
 624     die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n";
 625   }
 626   if (!IsPositiveInteger $Options{waterresiduestart}) {
 627     die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n";
 628   }
 629 }
 630