1 #!/usr/bin/perl -w 2 # 3 # File: ModifyPDBFiles.pl 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use FindBin; use lib "$FindBin::Bin/../lib"; 28 use Getopt::Long; 29 use File::Basename; 30 use Text::ParseWords; 31 use Benchmark; 32 use FileUtil; 33 use TextUtil; 34 use PDBFileUtil; 35 36 my($ScriptName, %Options, $StartTime, $EndTime, $TotalTime); 37 38 # Autoflush STDOUT 39 $| = 1; 40 41 # Starting message... 42 $ScriptName = basename($0); 43 print "\n$ScriptName: Starting...\n\n"; 44 $StartTime = new Benchmark; 45 46 # Get the options and setup script... 47 SetupScriptUsage(); 48 if ($Options{help} || @ARGV < 1) { 49 die GetUsageFromPod("$FindBin::Bin/$ScriptName"); 50 } 51 52 my(@PDBFilesList); 53 @PDBFilesList = ExpandFileNames(\@ARGV, "pdb"); 54 55 # Process options... 56 print "Processing options...\n"; 57 my(%OptionsInfo); 58 ProcessOptions(); 59 60 # Setup information about input files... 61 print "Checking input PDB file(s)...\n"; 62 my(%PDBFilesInfo); 63 RetrievePDBFilesInfo(); 64 65 # Process input files.. 66 my($FileIndex); 67 if (@PDBFilesList > 1) { 68 print "\nProcessing PDB files...\n"; 69 } 70 for $FileIndex (0 .. $#PDBFilesList) { 71 if ($PDBFilesInfo{FileOkay}[$FileIndex]) { 72 print "\nProcessing file $PDBFilesList[$FileIndex]...\n"; 73 ModifyPDBFiles($FileIndex); 74 } 75 } 76 print "\n$ScriptName:Done...\n\n"; 77 78 $EndTime = new Benchmark; 79 $TotalTime = timediff ($EndTime, $StartTime); 80 print "Total time: ", timestr($TotalTime), "\n"; 81 82 ############################################################################### 83 84 # Modify appropriate information... 85 sub ModifyPDBFiles { 86 my($FileIndex) = @_; 87 my($PDBFile, $PDBRecordLinesRef); 88 89 # Get PDB data... 90 $PDBFile = $PDBFilesList[$FileIndex]; 91 $PDBRecordLinesRef = ReadPDBFile($PDBFile); 92 93 if ($OptionsInfo{Mode} =~ /^RenumberAtoms$/i) { 94 RenumberAtoms($FileIndex, $PDBRecordLinesRef); 95 } 96 elsif ($OptionsInfo{Mode} =~ /^RenumberResidues$/i) { 97 RenumberResidues($FileIndex, $PDBRecordLinesRef); 98 } 99 elsif ($OptionsInfo{Mode} =~ /^RenumberWaters$/i) { 100 RenumberWaters($FileIndex, $PDBRecordLinesRef); 101 } 102 elsif ($OptionsInfo{Mode} =~ /^RenameChainIDs$/i) { 103 RenameChainsIDs($FileIndex, $PDBRecordLinesRef); 104 } 105 } 106 107 # Renumber atom and hetro atom numbers... 108 sub RenumberAtoms { 109 my($FileIndex, $PDBRecordLinesRef) = @_; 110 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewAtomNumber, $RecordType, %OldToNewAtomNumbersMap); 111 112 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 113 print "Generating PDBFileName file $PDBFileName...\n"; 114 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 115 116 # Write out header and other older recors... 117 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 118 119 # Write out all ATOM records along with TER and model records to indicate 120 # chains and multiple models.. 121 %OldToNewAtomNumbersMap = (); 122 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; 123 for $RecordLine (@{$PDBRecordLinesRef}) { 124 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 125 $RecordType = GetPDBRecordType($RecordLine); 126 127 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 128 129 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $NewAtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 130 131 $OldToNewAtomNumbersMap{$AtomNumber} = $NewAtomNumber; 132 $NewAtomNumber++; 133 } 134 elsif (IsTerRecordType($RecordLine)) { 135 $NewAtomNumber++; 136 print OUTFILE GenerateTerRecordLine($NewAtomNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode), "\n"; 137 } 138 elsif (IsModelRecordType($RecordLine)) { 139 print OUTFILE "$RecordLine\n"; 140 } 141 elsif (IsEndmdlRecordType($RecordLine)) { 142 print OUTFILE "$RecordLine\n"; 143 # Restart numbering... 144 $NewAtomNumber = $OptionsInfo{StartingAtomNumber}; 145 } 146 } 147 148 # Write out modified CONECT records... 149 my($ModifiedConectAtomNum, $ConectAtomNum, @ConectAtomNums, @ModifiedConectAtomNums); 150 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 151 if (!IsConectRecordType($RecordLine)) { 152 next LINE; 153 } 154 @ConectAtomNums = (); 155 @ModifiedConectAtomNums = (); 156 push @ConectAtomNums, ParseConectRecordLine($RecordLine); 157 ATOMNUMBER: for $ConectAtomNum (@ConectAtomNums) { 158 $ModifiedConectAtomNum = $ConectAtomNum; 159 if (defined($ConectAtomNum)) { 160 $AtomNumber = $ConectAtomNum; 161 if ($AtomNumber) { 162 if (exists $OldToNewAtomNumbersMap{$AtomNumber}) { 163 $ModifiedConectAtomNum = $OldToNewAtomNumbersMap{$AtomNumber}; 164 } 165 } 166 } 167 push @ModifiedConectAtomNums, $ModifiedConectAtomNum; 168 } 169 # Write out the record... 170 print OUTFILE GenerateConectRecordLine(@ModifiedConectAtomNums), "\n"; 171 } 172 173 # Write out END record... 174 print OUTFILE GenerateEndRecordLine(), "\n"; 175 176 close OUTFILE; 177 } 178 179 # Renumber residues... 180 sub RenumberResidues { 181 my($FileIndex, $PDBRecordLinesRef) = @_; 182 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $NewHetatmResidueNumber, $TERCount, $TotalTERCount, $PreviousResidueNumber, $PreviousHetatmResidueNumber, $RecordType); 183 184 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 185 print "Generating PDBFileName file $PDBFileName...\n"; 186 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 187 188 # Write out header and other older recors... 189 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 190 191 # Do a quick count of all TER records... 192 $TotalTERCount = 0; 193 for $RecordLine (@{$PDBRecordLinesRef}) { 194 if (IsTerRecordType($RecordLine)) { 195 $TotalTERCount++; 196 } 197 } 198 199 # Write out all ATOM records along with TER and model records to indicate 200 # chains and multiple models.. 201 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; 202 $NewHetatmResidueNumber = $OptionsInfo{StartingHetatmResidueNumber}; 203 204 $TERCount = 0; 205 $PreviousResidueNumber = 0; 206 $PreviousHetatmResidueNumber = 0; 207 208 for $RecordLine (@{$PDBRecordLinesRef}) { 209 if (IsAtomRecordType($RecordLine) || (IsHetatmRecordType($RecordLine) && ($TERCount < $TotalTERCount || $OptionsInfo{HetatmResidueNumberMode} =~ /^Automatic$/i))) { 210 $RecordType = GetPDBRecordType($RecordLine); 211 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 212 213 if ($PreviousResidueNumber && $PreviousResidueNumber != $ResidueNumber) { 214 $PreviousResidueNumber = $ResidueNumber; 215 $NewResidueNumber++; 216 } 217 else { 218 # First residue in a chain... 219 $PreviousResidueNumber = $ResidueNumber; 220 } 221 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 222 223 } 224 elsif (IsHetatmRecordType($RecordLine)) { 225 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseHetatmRecordLine($RecordLine); 226 227 # User HETATM residue numbers... 228 if ($PreviousHetatmResidueNumber && $PreviousHetatmResidueNumber != $ResidueNumber) { 229 $PreviousHetatmResidueNumber = $ResidueNumber; 230 $NewHetatmResidueNumber++; 231 } 232 else { 233 # First HETATM residue outside a chain... 234 $PreviousHetatmResidueNumber = $ResidueNumber; 235 } 236 237 print OUTFILE GenerateHetatmRecordLine($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $NewHetatmResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 238 } 239 elsif (IsTerRecordType($RecordLine)) { 240 $TERCount++; 241 $AtomNumber++; 242 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $ChainID, $NewResidueNumber, $InsertionCode), "\n"; 243 # For per chain numbering, start over again... 244 if ($OptionsInfo{ResidueNumberMode} =~ /^PerChain$/i) { 245 if ($TERCount < $TotalTERCount ) { 246 $NewResidueNumber = $OptionsInfo{StartingResidueNumber}; 247 } 248 $PreviousResidueNumber = 0; 249 } 250 } 251 elsif (IsModelRecordType($RecordLine)) { 252 print OUTFILE "$RecordLine\n"; 253 } 254 elsif (IsEndmdlRecordType($RecordLine)) { 255 print OUTFILE "$RecordLine\n"; 256 } 257 } 258 259 # Write out CONECT records... 260 for $RecordLine (@{$PDBRecordLinesRef}) { 261 if (IsConectRecordType($RecordLine)) { 262 print OUTFILE "$RecordLine\n"; 263 } 264 } 265 266 # Write out END record... 267 print OUTFILE GenerateEndRecordLine(), "\n"; 268 269 close OUTFILE; 270 } 271 272 # Renumber water residues... 273 sub RenumberWaters { 274 my($FileIndex, $PDBRecordLinesRef) = @_; 275 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $NewResidueNumber, $RecordType); 276 277 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 278 print "Generating PDBFileName file $PDBFileName...\n"; 279 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 280 281 # Write out header and other older recors... 282 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 283 284 # Write out all ATOM records along with TER and model records to indicate 285 # chains and multiple models.. 286 $NewResidueNumber = $OptionsInfo{StartingWaterResidueNumber}; 287 for $RecordLine (@{$PDBRecordLinesRef}) { 288 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 289 $RecordType = GetPDBRecordType($RecordLine); 290 291 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 292 293 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { 294 $ResidueNumber = $NewResidueNumber; 295 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 296 $NewResidueNumber++; 297 } 298 else { 299 print OUTFILE "$RecordLine\n"; 300 } 301 } 302 elsif (IsTerRecordType($RecordLine)) { 303 print OUTFILE "$RecordLine\n"; 304 } 305 elsif (IsModelRecordType($RecordLine)) { 306 print OUTFILE "$RecordLine\n"; 307 } 308 elsif (IsEndmdlRecordType($RecordLine)) { 309 print OUTFILE "$RecordLine\n"; 310 } 311 } 312 313 # Write out CONECT records... 314 for $RecordLine (@{$PDBRecordLinesRef}) { 315 if (IsConectRecordType($RecordLine)) { 316 print OUTFILE "$RecordLine\n"; 317 } 318 } 319 320 # Write out END record... 321 print OUTFILE GenerateEndRecordLine(), "\n"; 322 323 close OUTFILE; 324 } 325 326 # Rename chain IDs... 327 sub RenameChainsIDs { 328 my($FileIndex, $PDBRecordLinesRef) = @_; 329 my($PDBFileName, $RecordLine, $ConectRecordLinesRef, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $RecordType, $PreviousChainID, $FirstChainID, $NewChainID, $NewChainIDCounter, %OldToNewChainIDsMap); 330 331 $PDBFileName = $PDBFilesInfo{OutFileNames}[$FileIndex][0]; 332 print "Generating PDBFileName file $PDBFileName...\n"; 333 open OUTFILE, ">$PDBFileName" or die "Error: Can't open $PDBFileName: $! \n"; 334 335 # Write out header and other older recors... 336 WriteHeaderAndOlderRecords(\*OUTFILE, $PDBRecordLinesRef); 337 338 # Write out all ATOM records along with TER and model records to indicate 339 # chains and multiple models.. 340 %OldToNewChainIDsMap = (); 341 $NewChainIDCounter = $OptionsInfo{StartingChainID}; 342 $FirstChainID = 1; 343 $PreviousChainID = ''; 344 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 345 if (IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 346 $RecordType = GetPDBRecordType($RecordLine); 347 348 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomOrHetatmRecordLine($RecordLine); 349 350 if (exists $OptionsInfo{SpecifiedWaterResiduesMap}{$ResidueName}) { 351 # Chain IDs are not assigned to water residues... 352 print OUTFILE "$RecordLine\n"; 353 next LINE; 354 } 355 356 if ($FirstChainID) { 357 $FirstChainID = 0; 358 $PreviousChainID = $ChainID; 359 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { 360 $NewChainID = $NewChainIDCounter; 361 $OldToNewChainIDsMap{$ChainID} = $NewChainID; 362 } 363 else { 364 $NewChainID = ''; 365 } 366 } 367 elsif ($PreviousChainID ne $ChainID) { 368 if ($ChainID || (!$ChainID && $OptionsInfo{RenameEmptyChainIDs})) { 369 $PreviousChainID = $ChainID; 370 if (exists $OldToNewChainIDsMap{$ChainID}) { 371 $NewChainID = $OldToNewChainIDsMap{$ChainID}; 372 } 373 else { 374 $NewChainIDCounter++; 375 $NewChainID = $NewChainIDCounter; 376 $OldToNewChainIDsMap{$ChainID} = $NewChainID; 377 } 378 } 379 else { 380 $NewChainID = ''; 381 } 382 } 383 384 print OUTFILE GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge), "\n"; 385 } 386 elsif (IsTerRecordType($RecordLine)) { 387 $AtomNumber++; 388 print OUTFILE GenerateTerRecordLine($AtomNumber, $ResidueName, $NewChainID, $ResidueNumber, $InsertionCode), "\n"; 389 } 390 elsif (IsModelRecordType($RecordLine)) { 391 print OUTFILE "$RecordLine\n"; 392 } 393 elsif (IsEndmdlRecordType($RecordLine)) { 394 print OUTFILE "$RecordLine\n"; 395 } 396 } 397 398 # Write out CONECT records... 399 for $RecordLine (@{$PDBRecordLinesRef}) { 400 if (IsConectRecordType($RecordLine)) { 401 print OUTFILE "$RecordLine\n"; 402 } 403 } 404 405 # Write out END record... 406 print OUTFILE GenerateEndRecordLine(), "\n"; 407 408 close OUTFILE; 409 } 410 411 412 # Write out modifed header and other older records... 413 sub WriteHeaderAndOlderRecords { 414 my($OutFileRef, $PDBRecordLinesRef) = @_; 415 416 if ($OptionsInfo{ModifyHeaderRecord}) { 417 # Write out modified HEADER record... 418 my($Classification, $DepositionDate, $IDCode) = GetHeaderRecordInformation($PDBRecordLinesRef); 419 $Classification = 'Data modified using MayaChemTools'; 420 print $OutFileRef GenerateHeaderRecordLine($IDCode, $Classification), "\n"; 421 } 422 else { 423 print $OutFileRef $PDBRecordLinesRef->[0], "\n"; 424 } 425 426 # Write out any old records... 427 if ($OptionsInfo{KeepOldRecords}) { 428 my($RecordLineIndex, $RecordLine); 429 # Skip HEADER record and write out older records all the way upto first MODEL/ATOM/HETATM records from input file... 430 RECORDLINE: for $RecordLineIndex (1 .. $#{$PDBRecordLinesRef}) { 431 $RecordLine = $PDBRecordLinesRef->[$RecordLineIndex]; 432 if (IsModelRecordType($RecordLine) || IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine)) { 433 last RECORDLINE; 434 } 435 print $OutFileRef "$RecordLine\n"; 436 } 437 } 438 } 439 440 # Get header record information assuming it's the first record... 441 sub GetHeaderRecordInformation { 442 my($PDBRecordLinesRef) = @_; 443 my($Classification, $DepositionDate, $IDCode, $HeaderRecordLine); 444 445 ($Classification, $DepositionDate, $IDCode) = ('') x 3; 446 $HeaderRecordLine = $PDBRecordLinesRef->[0]; 447 if (IsHeaderRecordType($HeaderRecordLine)) { 448 ($Classification, $DepositionDate, $IDCode) = ParseHeaderRecordLine($HeaderRecordLine); 449 } 450 return ($Classification, $DepositionDate, $IDCode); 451 } 452 453 454 # Process option values... 455 sub ProcessOptions { 456 %OptionsInfo = (); 457 $OptionsInfo{Mode} = $Options{mode}; 458 459 $OptionsInfo{StartingAtomNumber} = $Options{atomnumberstart}; 460 $OptionsInfo{StartingChainID} = $Options{chainidstart}; 461 $OptionsInfo{RenameEmptyChainIDs} = ($Options{chainidrenameempty} =~ /^Yes$/i) ? 1 : 0; 462 463 $OptionsInfo{KeepOldRecords} = ($Options{keepoldrecords} =~ /^Yes$/i) ? 1 : 0; 464 $OptionsInfo{ModifyHeaderRecord} = ($Options{modifyheader} =~ /^Yes$/i) ? 1 : 0; 465 466 $OptionsInfo{ResidueNumberMode} = $Options{residuenumbermode}; 467 $OptionsInfo{StartingResidueNumber} = $Options{residuenumberstart}; 468 469 $OptionsInfo{HetatmResidueNumberMode} = $Options{residuenumberhetatmmode}; 470 $OptionsInfo{StartingHetatmResidueNumber} = $Options{residuenumberstarthetatm}; 471 472 $OptionsInfo{OverwriteFiles} = $Options{overwrite} ? 1 : 0; 473 $OptionsInfo{OutFileRoot} = $Options{root} ? $Options{root} : 0; 474 475 $OptionsInfo{WaterResidueNames} = $Options{waterresiduenames}; 476 $OptionsInfo{StartingWaterResidueNumber} = $Options{waterresiduestart}; 477 @{$OptionsInfo{SpecifiedWaterResiduesList}} = (); 478 %{$OptionsInfo{SpecifiedWaterResiduesMap}} = (); 479 480 my(@SpecifiedWaterResiduesList); 481 @SpecifiedWaterResiduesList = (); 482 my($WaterResidueName); 483 if ($OptionsInfo{WaterResidueNames} =~ /Automatic/i) { 484 push @SpecifiedWaterResiduesList, ('HOH', 'WAT', 'H2O'); 485 } 486 else { 487 @SpecifiedWaterResiduesList = split /\,/, $Options{waterresiduenames}; 488 } 489 for $WaterResidueName (@SpecifiedWaterResiduesList) { 490 $OptionsInfo{SpecifiedWaterResiduesMap}{$WaterResidueName} = $WaterResidueName; 491 } 492 push @{$OptionsInfo{SpecifiedWaterResiduesList}}, @SpecifiedWaterResiduesList; 493 } 494 495 # Retrieve information about PDB files... 496 sub RetrievePDBFilesInfo { 497 my($Index, $PDBFile, $PDBRecordLinesRef, $ChainsAndResiduesInfoRef, $FileDir, $FileName, $FileExt, $OutFileName, $OutFileRoot, $Mode, $OutFileMode, @OutFileNames); 498 499 %PDBFilesInfo = (); 500 @{$PDBFilesInfo{FileOkay}} = (); 501 @{$PDBFilesInfo{OutFileRoot}} = (); 502 @{$PDBFilesInfo{OutFileNames}} = (); 503 504 FILELIST: for $Index (0 .. $#PDBFilesList) { 505 $PDBFilesInfo{FileOkay}[$Index] = 0; 506 507 $PDBFilesInfo{OutFileRoot}[$Index] = ''; 508 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); 509 @{$PDBFilesInfo{OutFileNames}[$Index]} = (); 510 511 $PDBFile = $PDBFilesList[$Index]; 512 if (!(-e $PDBFile)) { 513 warn "Warning: Ignoring file $PDBFile: It doesn't exist\n"; 514 next FILELIST; 515 } 516 if (!CheckFileType($PDBFile, "pdb")) { 517 warn "Warning: Ignoring file $PDBFile: It's not a PDB file\n"; 518 next FILELIST; 519 } 520 if (! open PDBFILE, "$PDBFile") { 521 warn "Warning: Ignoring file $PDBFile: Couldn't open it: $! \n"; 522 next FILELIST; 523 } 524 close PDBFILE; 525 526 # Get PDB data... 527 $PDBRecordLinesRef = ReadPDBFile($PDBFile); 528 $ChainsAndResiduesInfoRef = GetChainsAndResidues($PDBRecordLinesRef); 529 if (!scalar @{$ChainsAndResiduesInfoRef->{ChainIDs}}) { 530 warn "Warning: Ignoring file $PDBFile: No chains found \n"; 531 next FILELIST; 532 } 533 534 # Setup output file names... 535 @OutFileNames = (); 536 $FileDir = ""; $FileName = ""; $FileExt = ""; 537 ($FileDir, $FileName, $FileExt) = ParseFileName($PDBFile); 538 if ($OptionsInfo{OutFileRoot} && (@PDBFilesList == 1)) { 539 my ($RootFileDir, $RootFileName, $RootFileExt) = ParseFileName($OptionsInfo{OutFileRoot}); 540 if ($RootFileName && $RootFileExt) { 541 $FileName = $RootFileName; 542 } 543 else { 544 $FileName = $OptionsInfo{OutFileRoot}; 545 } 546 $OutFileRoot = $FileName; 547 } 548 else { 549 $OutFileRoot = $FileName; 550 } 551 $Mode = $OptionsInfo{Mode}; 552 MODE: { 553 if ($Mode =~ /^RenumberAtoms$/i) { $OutFileMode = 'RenumberAtoms'; last MODE;} 554 if ($Mode =~ /^RenumberResidues$/i) { $OutFileMode = 'RenumberResidues'; last MODE;} 555 if ($Mode =~ /^RenumberWaters$/i) { $OutFileMode = 'RenumberWaters'; last MODE;} 556 if ($Mode =~ /^RenameChainIDs$/i) { $OutFileMode = 'RenameChainIDs'; last MODE;} 557 $OutFileMode = ''; 558 } 559 $OutFileName = "${OutFileRoot}${OutFileMode}.pdb"; 560 push @OutFileNames, $OutFileName; 561 562 $PDBFilesInfo{FileOkay}[$Index] = 1; 563 $PDBFilesInfo{OutFileRoot}[$Index] = $OutFileRoot; 564 565 push @{$PDBFilesInfo{OutFileNames}[$Index]}, @OutFileNames; 566 } 567 } 568 569 # Setup script usage and retrieve command line arguments specified using various options... 570 sub SetupScriptUsage { 571 572 # Retrieve all the options... 573 %Options = (); 574 $Options{atomnumberstart} = 1; 575 $Options{chainidstart} = 'A'; 576 $Options{chainidrenameempty} = 'No'; 577 $Options{keepoldrecords} = 'no'; 578 $Options{mode} = 'RenumberResidues'; 579 $Options{modifyheader} = 'yes'; 580 $Options{residuenumbermode} = 'PerChain'; 581 $Options{residuenumberstart} = 1; 582 $Options{residuenumberhetatmmode} = 'Automatic'; 583 $Options{residuenumberstarthetatm} = 6000; 584 $Options{waterresiduenames} = 'Automatic'; 585 $Options{waterresiduestart} = 8000; 586 587 if (!GetOptions(\%Options, "help|h", "atomnumberstart|a=i", "chainidstart|c=s", "chainidrenameempty=s", "keepoldrecords|k=s", "mode|m=s", "modifyheader=s", "overwrite|o", "residuenumbermode=s", "residuenumberstart=i", "residuenumberhetatmmode=s", "residuenumberstarthetatm=i", "root|r=s", "sequencelength=i", "waterresiduenames=s", "waterresiduestart=i", "workingdir|w=s")) { 588 die "\nTo get a list of valid options and their values, use \"$ScriptName -h\" or\n\"perl -S $ScriptName -h\" command and try again...\n"; 589 } 590 if ($Options{workingdir}) { 591 if (! -d $Options{workingdir}) { 592 die "Error: The value specified, $Options{workingdir}, for option \"-w --workingdir\" is not a directory name.\n"; 593 } 594 chdir $Options{workingdir} or die "Error: Couldn't chdir $Options{workingdir}: $! \n"; 595 } 596 if (!IsPositiveInteger($Options{atomnumberstart})) { 597 die "Error: The value specified, $Options{atomnumberstart}, for option \"-a, --AtomNumberStart\" is not valid. Allowed values: >0\n"; 598 } 599 if ((length($Options{chainidstart}) > 1) || ($Options{chainidstart} !~ /[A-Z]/i)) { 600 die "Error: The value specified, $Options{chainidstart}, for option \"-c, --ChainIDStart\" is not valid. Allowed values: a single character from A to Z\n"; 601 } 602 if ($Options{chainidrenameempty} !~ /^(yes|no)$/i) { 603 die "Error: The value specified, $Options{chainidrenameempty}, for option \"--chainidrenameempty\" is not valid. Allowed values: yes or no\n"; 604 } 605 if ($Options{keepoldrecords} !~ /^(yes|no)$/i) { 606 die "Error: The value specified, $Options{keepoldrecords}, for option \"--KeepOldRecords\" is not valid. Allowed values: yes or no\n"; 607 } 608 if ($Options{mode} !~ /^(RenumberAtoms|RenumberResidues|RenumberWaters|RenameChainIDs)$/i) { 609 die "Error: The value specified, $Options{mode}, for option \"-m, --mode\" is not valid. Allowed values: RenumberAtoms, RenumberResidues, RenumberWaters or RenameChainIDs\n"; 610 } 611 if ($Options{modifyheader} !~ /^(yes|no)$/i) { 612 die "Error: The value specified, $Options{modifyheader}, for option \"--ModifyHeader\" is not valid. Allowed values: yes or no\n"; 613 } 614 if ($Options{residuenumbermode} !~ /^(Sequential|PerChain)$/i) { 615 die "Error: The value specified, $Options{residuenumbermode}, for option \"--ResidueNumberMode\" is not valid. Allowed values: Sequential or PerChain\n"; 616 } 617 if (!IsPositiveInteger($Options{residuenumberstart})) { 618 die "Error: The value specified, $Options{residuenumberstart}, for option \"--ResidueNumberStart\" is not valid. Allowed values: >0\n"; 619 } 620 if ($Options{residuenumberhetatmmode} !~ /^(automatic|specify)$/i) { 621 die "Error: The value specified, $Options{residuenumberhetatmmode}, for option \"--residuenumbermode\" is not valid. Allowed values: automatic or specify\n"; 622 } 623 if (!IsPositiveInteger($Options{residuenumberstarthetatm})) { 624 die "Error: The value specified, $Options{residuenumberstarthetatm}, for option \"--residuenumberstartHetatm\" is not valid. Allowed values: >0\n"; 625 } 626 if (!IsPositiveInteger $Options{waterresiduestart}) { 627 die "Error: The value specified, $Options{waterresiduestart}, for option \"--waterresiduestart\" is not valid. Allowed values: >0\n"; 628 } 629 } 630