MayaChemTools

   1 package PDBFileUtil;
   2 #
   3 # File: PDBFileUtil.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Exporter;
  28 use Text::ParseWords;
  29 use TextUtil;
  30 use FileUtil;
  31 use TimeUtil ();
  32 
  33 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  34 
  35 @ISA = qw(Exporter);
  36 @EXPORT = qw(GetPDBRecordType GetRecordTypesCount GetAllResidues GetConectRecordLines GetChainsAndResidues GetExperimentalTechnique GetExperimentalTechniqueResolution GetMinMaxCoords IsPDBFile IsAtomRecordType IsConectRecordType IsHeaderRecordType IsHetatmRecordType IsSeqresRecordType IsModelRecordType IsEndmdlRecordType IsTerRecordType IsMasterRecordType ReadPDBFile ParseHeaderRecordLine GenerateHeaderRecordLine GenerateHeaderRecordTimeStamp ParseAtomRecordLine GenerateAtomRecordLine ParseAtomOrHetatmRecordLine GenerateAtomOrHetatmRecordLine GenerateHetatmRecordLine ParseHetatmRecordLine ParseConectRecordLine GenerateConectRecordLine ParseExpdtaRecordLine ParseRemark2ResolutionRecordLine ParseSeqresRecordLine ParseTerRecordLine GenerateTerRecordLine ParseMasterRecordLine GenerateEndRecordLine);
  37 @EXPORT_OK = qw();
  38 
  39 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  40 
  41 # Get PDB record type...
  42 sub GetPDBRecordType {
  43   my($Line) = @_;
  44 
  45   return _GetRecordType($Line);
  46 }
  47 
  48 # Is it a PDB file?
  49 sub IsPDBFile {
  50   my($PDBFile) = @_;
  51   my($Line, $Status);
  52 
  53   $Status = 0;
  54   open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n";
  55   $Line = GetTextLine(\*PDBFILE);
  56   $Status = ($Line =~ /^HEADER/i) ? 1 : 0;
  57   close PDBFILE;
  58 
  59   return $Status;
  60 }
  61 
  62 # Is it a atom record type?
  63 sub IsAtomRecordType {
  64   my($Line) = @_;
  65 
  66   return _IsRecordType($Line, 'ATOM');
  67 }
  68 
  69 # Is it a connect record type?
  70 sub IsConectRecordType {
  71   my($Line) = @_;
  72 
  73   return _IsRecordType($Line, 'CONECT');
  74 }
  75 
  76 # Is it a header atom record type?
  77 sub IsHeaderRecordType {
  78   my($Line) = @_;
  79 
  80   return _IsRecordType($Line, 'HEADER');
  81 }
  82 
  83 # Is it a hetro atom record type?
  84 sub IsHetatmRecordType {
  85   my($Line) = @_;
  86 
  87   return _IsRecordType($Line, 'HETATM');
  88 }
  89 
  90 # Is it a seqres record type?
  91 sub IsSeqresRecordType {
  92   my($Line) = @_;
  93 
  94   return _IsRecordType($Line, 'SEQRES');
  95 }
  96 
  97 # Is it a MODEL record type?
  98 sub IsModelRecordType {
  99   my($Line) = @_;
 100 
 101   return _IsRecordType($Line, 'MODEL');
 102 }
 103 
 104 # Is it a ENDMDL record type?
 105 sub IsEndmdlRecordType {
 106   my($Line) = @_;
 107 
 108   return _IsRecordType($Line, 'ENDMDL');
 109 }
 110 
 111 # Is it a TER record type?
 112 sub IsTerRecordType {
 113   my($Line) = @_;
 114 
 115   return _IsRecordType($Line, 'TER');
 116 }
 117 
 118 # Is it a MASTER record type?
 119 sub IsMasterRecordType {
 120   my($Line) = @_;
 121 
 122   return _IsRecordType($Line, 'MASTER');
 123 }
 124 
 125 # Count the number of each record type and a reference to data type with these key/value pairs:
 126 # {RecordTypes} - An array of unique record types in order of their presence in the file
 127 # {Count}{$RecordType} - Count of each record type
 128 # {Lines}{$RecordType} - Optional lines data for a specific record type.
 129 #
 130 sub GetRecordTypesCount {
 131   my($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag, $RecordType, $RecordLine, %RecordTypeDataMap);
 132 
 133   %RecordTypeDataMap = ();
 134   @{$RecordTypeDataMap{RecordTypes}} = ();
 135   %{$RecordTypeDataMap{Count}} = ();
 136   %{$RecordTypeDataMap{Lines}} = ();
 137 
 138   $SpecifiedRecordType = '';
 139   $GetRecordLinesFlag = 0;
 140   if (@_ == 3) {
 141     ($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag) = @_;
 142     $SpecifiedRecordType = uc $SpecifiedRecordType;
 143   }
 144   elsif (@_ == 2) {
 145     ($PDBRecordLinesRef, $SpecifiedRecordType) = @_;
 146     $SpecifiedRecordType = uc $SpecifiedRecordType;
 147   }
 148   else {
 149     ($PDBRecordLinesRef) = @_;
 150   }
 151   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 152     $RecordType = _GetRecordType($RecordLine);
 153     if ($SpecifiedRecordType && ($SpecifiedRecordType ne $RecordType)) {
 154       next LINE;
 155     }
 156     if (exists $RecordTypeDataMap{Count}{$RecordType}) {
 157       # Update count...
 158       $RecordTypeDataMap{Count}{$RecordType} += 1;
 159 
 160       if ($GetRecordLinesFlag) {
 161         push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine;
 162       }
 163     }
 164     else {
 165       # New record type...
 166       push @{$RecordTypeDataMap{RecordTypes}}, $RecordType;
 167       $RecordTypeDataMap{Count}{$RecordType} = 1;
 168 
 169       if ($GetRecordLinesFlag) {
 170         @{$RecordTypeDataMap{Lines}{$RecordType}} = ();
 171         push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine;
 172       }
 173     }
 174   }
 175   return (\%RecordTypeDataMap);
 176 }
 177 
 178 # Collect CONECT record lines for specific atom number, modified specified data to exclude any atom
 179 # number not present in the list of specified atom numbers and return a reference to list of
 180 # CONECT record lines.
 181 #
 182 sub GetConectRecordLines {
 183   my($PDBRecordLinesRef, $AtomNumbersMapRef) = @_;
 184   my($AtomNumber, $ConectAtomNumber, $RecordLine, @ConectRecordAtomNums, @ConectRecordLines);
 185 
 186   @ConectRecordLines = ();
 187   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 188     if (!IsConectRecordType($RecordLine)) {
 189       next LINE;
 190     }
 191     @ConectRecordAtomNums = ();
 192     push @ConectRecordAtomNums, ParseConectRecordLine($RecordLine);
 193     ATOMNUMBER: for $ConectAtomNumber (@ConectRecordAtomNums) {
 194       if (defined $ConectAtomNumber) {
 195         $AtomNumber = $ConectAtomNumber;
 196         if ($AtomNumber) {
 197           if (! exists $AtomNumbersMapRef->{$AtomNumber}) {
 198             next LINE;
 199           }
 200         }
 201       }
 202     }
 203     push @ConectRecordLines, $RecordLine;
 204   }
 205   return \@ConectRecordLines;
 206 }
 207 
 208 # Get chains and residue information using ATOM/HETATM or SEQRES records. And return a reference to a
 209 #  hash with these keys:
 210 #
 211 # @{$ChainsDataMap{ChainIDs}} - List of chain IDs with 'None' for no chain identification
 212 # @{$ChainsDataMap{Residues}{$ChainID}} - List of residues in order of their appearance in a chain
 213 # @{$ChainsDataMap{ResidueNumbers}{$ChainID}} - List of residue numbers in order of their appearance in a chain
 214 # %{$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}} - Count of specific residues in a chain
 215 #
 216 # Notes:
 217 #  . Chains and residue data can be extacted using either ATOM/HETATM records or SEQRES records.
 218 #  . In addition to a different chain ID in ATOM/HETATM a TER record also indicates end of an existing chain
 219 #    and start of a new one: ChainID in ATOM/HETATM records might still be emtpy.
 220 #   . ATOM/HETATM records after the first ENDMDL records are simply ingnored.
 221 #
 222 sub GetChainsAndResidues {
 223   my($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag);
 224 
 225   $RecordsSource = 'AtomAndHetatm';
 226   $GetChainResiduesBeyondTERFlag = 0;
 227   $GetRecordLinesFlag = 0;
 228 
 229   if (@_ == 4) {
 230     ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_;
 231   }
 232   elsif (@_ == 3) {
 233     ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag) = @_;
 234   }
 235   elsif (@_ == 2) {
 236     ($PDBRecordLinesRef, $RecordsSource) = @_;
 237   }
 238   else {
 239     ($PDBRecordLinesRef) = @_;
 240   }
 241 
 242   if ($RecordsSource =~ /^AtomAndHetatm$/i) {
 243     return _GetChainsAndResiduesFromAtomHetatmRecords($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag);
 244   }
 245   elsif ($RecordsSource =~ /^Seqres$/i) {
 246     return _GetChainsAndResiduesFromSeqresRecords($PDBRecordLinesRef);
 247   }
 248   else {
 249     my(%ChainsDataMap);
 250     %ChainsDataMap = ();
 251     @{$ChainsDataMap{ChainIDs}} = ();
 252     %{$ChainsDataMap{Residues}} = ();
 253     %{$ChainsDataMap{ResidueNumbers}} = ();
 254     %{$ChainsDataMap{ResidueCount}} = ();
 255 
 256     return \%ChainsDataMap;
 257   }
 258 }
 259 
 260 
 261 # Get residue information using ATOM/HETATM records and return a reference to a hash with
 262 # these keys:
 263 #
 264 # @{$ResiduesDataMap{ResidueNames}} - List of all the residues
 265 # %{$ResiduesDataMap{ResidueCount}{$ResidueName}} - Count of residues
 266 # @{$ResiduesDataMap{AtomResidueNames}} - List of all the residues
 267 # %{$ResiduesDataMap{AtomResidueCount}{$ResidueName}} - Count of residues in ATOM records
 268 # @{$ResiduesDataMap{HetatomResidueNames}} - List of all the residues
 269 # %{$ResiduesDataMap{HetatmResidueCount}{$ResidueName}} - Count of residues HETATM records
 270 #
 271 # Notes:
 272 #  . ATOM/HETATM records after the first ENDMDL records are simply ingnored.
 273 #
 274 sub GetAllResidues {
 275   my($PDBRecordLinesRef) = @_;
 276 
 277   my($PreviousChainID, $PreviousResidueNumber, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ResiduesDataMap);
 278 
 279   %ResiduesDataMap = ();
 280   @{$ResiduesDataMap{ResidueNames}} = ();
 281   %{$ResiduesDataMap{ResidueCount}} = ();
 282   @{$ResiduesDataMap{AtomResidueNames}} = ();
 283   %{$ResiduesDataMap{AtomResidueCount}} = ();
 284   @{$ResiduesDataMap{HetatmResidueNames}} = ();
 285   %{$ResiduesDataMap{HetatmResidueCount}} = ();
 286 
 287   $PreviousChainID = '';
 288   $PreviousResidueNumber = 0;
 289 
 290   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 291     if (IsEndmdlRecordType($RecordLine)) {
 292       last LINE;
 293     }
 294     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 295       next LINE;
 296     }
 297     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 298 
 299     if ($PreviousChainID eq $ChainID) {
 300       if ($ResidueNumber == $PreviousResidueNumber) {
 301         next LINE;
 302       }
 303       $PreviousResidueNumber = $ResidueNumber;
 304     }
 305     else {
 306       # New chain...
 307       $PreviousChainID = $ChainID;
 308       $PreviousResidueNumber = $ResidueNumber;
 309     }
 310 
 311     # Store the residue and update its count...
 312     push @{$ResiduesDataMap{ResidueNames}}, $ResidueName;
 313     if (exists $ResiduesDataMap{ResidueCount}{$ResidueName}) {
 314       $ResiduesDataMap{ResidueCount}{$ResidueName} += 1;
 315     }
 316     else {
 317       $ResiduesDataMap{ResidueCount}{$ResidueName} = 1;
 318     }
 319     # Update ATOM residue data...
 320     if (IsAtomRecordType($RecordLine)) {
 321       push @{$ResiduesDataMap{AtomResidueNames}}, $ResidueName;
 322       if (exists $ResiduesDataMap{AtomResidueCount}{$ResidueName}) {
 323         $ResiduesDataMap{AtomResidueCount}{$ResidueName} += 1;
 324       }
 325       else {
 326         $ResiduesDataMap{AtomResidueCount}{$ResidueName} = 1;
 327       }
 328     }
 329     # Update HETATM residue data...
 330     if (IsHetatmRecordType($RecordLine)) {
 331       push @{$ResiduesDataMap{HetatmResidueNames}}, $ResidueName;
 332       if (exists $ResiduesDataMap{HetatmResidueCount}{$ResidueName}) {
 333         $ResiduesDataMap{HetatmResidueCount}{$ResidueName} += 1;
 334       }
 335       else {
 336         $ResiduesDataMap{HetatmResidueCount}{$ResidueName} = 1;
 337       }
 338     }
 339   }
 340 
 341   return \%ResiduesDataMap;
 342 }
 343 
 344 # Return min/max XYZ coordinates for ATOM/HETATM records...
 345 sub GetMinMaxCoords {
 346   my($PDBRecordLinesRef) = @_;
 347 
 348   my($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 349 
 350   ($XMin, $YMin, $ZMin) = (99999) x 3;
 351   ($XMax, $YMax, $ZMax) = (-99999) x 3;
 352 
 353   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 354     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 355       next LINE;
 356     }
 357     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 358 
 359     $XMin = ($X < $XMin) ? $X : $XMin;
 360     $YMin = ($Y < $YMin) ? $Y : $YMin;
 361     $ZMin = ($Z < $ZMin) ? $Z : $ZMin;
 362 
 363     $XMax = ($X > $XMax) ? $X : $XMax;
 364     $YMax = ($Y > $YMax) ? $Y : $YMax;
 365     $ZMax = ($Z > $ZMax) ? $Z : $ZMax;
 366   }
 367 
 368   if ($XMin == 99999) { $XMin = undef; }
 369   if ($YMin == 99999) { $YMin = undef; }
 370   if ($ZMin == 99999) { $ZMin = undef; }
 371   if ($XMax == -99999) { $XMax = undef; }
 372   if ($YMax == -99999) { $YMax = undef; }
 373   if ($ZMax == -99999) { $ZMax = undef; }
 374 
 375   return ($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax);
 376 }
 377 
 378 # Read PDB file and return reference to record lines..
 379 sub ReadPDBFile {
 380   my($PDBFile) = @_;
 381 
 382   my($Line, @PDBRecordLines);
 383 
 384   @PDBRecordLines = ();
 385   open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n";
 386   while ($Line = GetTextLine(\*PDBFILE)) {
 387     push @PDBRecordLines, $Line;
 388   }
 389 
 390   close PDBFILE;
 391 
 392   return (\@PDBRecordLines);
 393 }
 394 
 395 #
 396 # Get experimental technique information...
 397 #
 398 sub GetExperimentalTechnique {
 399   my($PDBRecordLinesRef) = @_;
 400   my($RecordLine, $ContinuationNum, $ExperimentalTechnique);
 401 
 402   $ExperimentalTechnique = undef;
 403 
 404   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 405       if (_IsRecordType($RecordLine, 'EXPDTA')) {
 406         ($ContinuationNum, $ExperimentalTechnique) = ParseExpdtaRecordLine($RecordLine);
 407         last LINE;
 408       }
 409   }
 410 
 411   return $ExperimentalTechnique;
 412 }
 413 
 414 #
 415 # Get experimental technique resolution information...
 416 #
 417 sub GetExperimentalTechniqueResolution {
 418   my($PDBRecordLinesRef) = @_;
 419   my($RecordLine, $Resolution, $ResolutionUnits);
 420 
 421   ($Resolution, $ResolutionUnits) = ((undef) x 2);
 422 
 423   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 424     if ($RecordLine =~ /^REMARK   2 RESOLUTION./i) {
 425       ($Resolution, $ResolutionUnits) = ParseRemark2ResolutionRecordLine($RecordLine);
 426       last LINE;
 427     }
 428   }
 429 
 430   return ($Resolution, $ResolutionUnits);
 431 }
 432 
 433 #
 434 # Parse HEADER record line...
 435 sub ParseHeaderRecordLine {
 436   my($Line) = @_;
 437   my($Classification, $DepositionDate, $IDCode) = (undef, undef, undef);
 438 
 439   if ($Line !~ /^HEADER/i) {
 440     return ($Classification, $DepositionDate, $IDCode);
 441   }
 442   my($Length);
 443 
 444   ($Classification, $DepositionDate, $IDCode) = ('') x 3;
 445 
 446   $Length = length $Line;
 447 
 448   if ($Length <= 62) {
 449     ($Classification, $DepositionDate) = unpack("x10A40A9", $Line);
 450   }
 451   else {
 452     ($Classification, $DepositionDate, $IDCode) = unpack("x10A40A9x3A4", $Line);
 453   }
 454 
 455   $Classification = RemoveLeadingAndTrailingWhiteSpaces($Classification);
 456   $DepositionDate =~ s/ //g;
 457   $IDCode =~ s/ //g;
 458 
 459   return ($Classification, $DepositionDate, $IDCode);
 460 }
 461 
 462 #
 463 # Generate HEADER record line...
 464 sub GenerateHeaderRecordLine {
 465   my($Classification, $Date, $IDCode, $Line);
 466 
 467   $Classification = "Created using MayaChemTools";
 468   $Date = GenerateHeaderRecordTimeStamp();
 469   if (@_ == 3) {
 470     ($IDCode, $Classification, $Date) = @_;
 471   }
 472   elsif (@_ == 2) {
 473     ($IDCode, $Classification) = @_;
 474   }
 475   elsif (@_ == 1) {
 476     ($IDCode) = @_;
 477   }
 478 
 479   $Line = sprintf "HEADER    %-40.40s%9.9s   %4.4s", $Classification, $Date, $IDCode;
 480   return $Line;
 481 }
 482 
 483 # Generate PDB header time stamp...
 484 sub GenerateHeaderRecordTimeStamp {
 485   return TimeUtil::PDBFileTimeStamp();
 486 }
 487 
 488 #
 489 # Parse ATOM record line.
 490 #
 491 sub ParseAtomRecordLine {
 492   my($Line) = @_;
 493 
 494   return _ParseAtomOrHetatmRecordLine($Line);
 495 }
 496 
 497 # Generate ATOM record line...
 498 sub GenerateAtomRecordLine {
 499   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 500 
 501   return _GenerateAtomOrHetatmRecordLine('ATOM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 502 }
 503 
 504 #
 505 # Parse ATOM/HETATm record line.
 506 #
 507 sub ParseAtomOrHetatmRecordLine {
 508   my($Line) = @_;
 509 
 510   return _ParseAtomOrHetatmRecordLine($Line);
 511 }
 512 
 513 # Generate ATOM/HETATM record line...
 514 sub GenerateAtomOrHetatmRecordLine {
 515   my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 516 
 517   return _GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 518 }
 519 #
 520 # Parse HETATM record line...
 521 #
 522 sub ParseHetatmRecordLine {
 523   my($Line) = @_;
 524 
 525   return _ParseAtomOrHetatmRecordLine($Line);
 526 }
 527 
 528 # Generate HETATM record line...
 529 sub GenerateHetatmRecordLine {
 530   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 531 
 532   return _GenerateAtomOrHetatmRecordLine('HETATM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 533 }
 534 
 535 # Parse EXPDTA record line...
 536 #
 537 # EXPDTA format:
 538 #
 539 #1 - 6 Record name "EXPDTA"
 540 # 9 - 10 Continuation continuation Allows concatenation of multiple records.
 541 # 11 - 70 SList technique The experimental technique(s) with optional comment describing the sample or experiment.
 542 #
 543 # The EXPDTA record identifies the experimental technique used. This may refer to the type of radiation and
 544 # sample, or include the spectroscopic or modeling technique. Permitted values include:
 545 #
 546 # ELECTRON DIFFRACTION
 547 # FIBER DIFFRACTION
 548 # FLUORESCENCE TRANSFER
 549 # NEUTRON DIFFRACTION
 550 # NMR
 551 # THEORETICAL MODEL
 552 # X-RAY DIFFRACTION
 553 #
 554 sub ParseExpdtaRecordLine {
 555   my($Line) = @_;
 556 
 557   if ($Line !~ /^EXPDTA/i) {
 558     return ((undef) x 2);
 559   }
 560 
 561   my($ContinuationNum, $ExperimentalTechnique) = unpack("x8A2A60" , $Line);
 562 
 563   $ContinuationNum =~ s/ //g;
 564   $ExperimentalTechnique = RemoveLeadingAndTrailingWhiteSpaces($ExperimentalTechnique);
 565 
 566   return ($ContinuationNum, $ExperimentalTechnique);
 567 }
 568 
 569 # Parse REMARK 2 record line...
 570 #
 571 # REMARK 2 format:
 572 #
 573 # The second REMARK 2 record has one of two formats. The first is used for diffraction studies, the second
 574 # for other types of experiments in which resolution is not relevant, e.g., NMR and theoretical modeling.
 575 #
 576 #For diffraction experiments:
 577 #
 578 # 1 - 6 Record name "REMARK"
 579 # 10 LString(1) "2"
 580 # 12 - 22 LString(11) "RESOLUTION."
 581 # 23 - 27 Real(5.2) resolution Resolution.
 582 # 29 - 38 LString(10) "ANGSTROMS."
 583 #
 584 # REMARK 2 when not a diffraction experiment:
 585 #
 586 # 1 - 6 Record name "REMARK"
 587 # 10 LString(1) "2"
 588 # 12 - 38 LString(28) "RESOLUTION. NOT APPLICABLE."
 589 # 41 - 70 String comment Comment.
 590 #
 591 sub ParseRemark2ResolutionRecordLine {
 592   my($Line) = @_;
 593 
 594   if ($Line !~ /^REMARK   2 RESOLUTION./i) {
 595     return ((undef) x 2);
 596   }
 597 
 598   my($Resolution, $ResolutionUnits);
 599 
 600   if ($Line =~ /NOT APPLICABLE/i) {
 601     ($Resolution, $ResolutionUnits) = ("NOT APPLICABLE", "");
 602   }
 603   else {
 604     ($Resolution, $ResolutionUnits) = unpack("x22A5x1A10" , $Line);
 605   }
 606 
 607   $Resolution = RemoveLeadingAndTrailingWhiteSpaces($Resolution);
 608 
 609   $ResolutionUnits = RemoveLeadingAndTrailingWhiteSpaces($ResolutionUnits);
 610   $ResolutionUnits =~ s/\.$//;
 611 
 612   return ($Resolution, $ResolutionUnits);
 613 }
 614 
 615 #
 616 # Parse SEQRES record line...
 617 #
 618 # SEQRES format:
 619 #
 620 # 1 - 6 Record name "SEQRES"
 621 # 9 - 10 Serial number of the SEQRES record for the current chain. Starts at 1 and increments by one each line. Reset to 1 for each chain.
 622 # 12 - Chain identifier
 623 # 14 - 17 Integer numRes Number of residues in the chain
 624 # 20 - 22 24 -26 ... ... 68 - 70 Residue name resName Residue name.
 625 #
 626 sub ParseSeqresRecordLine {
 627   my($Line) = @_;
 628 
 629   if ($Line !~ /^SEQRES/i) {
 630     return ((undef) x 5);
 631   }
 632   my($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames) = unpack("x8A2x1A1x1A4x2A51" , $Line);
 633   $RecordSerialNumber =~ s/ //g;
 634   $ChainID =~ s/ //g;
 635   $NumOfResidues =~ s/ //g;
 636   $ResidueNames = RemoveLeadingAndTrailingWhiteSpaces($ResidueNames);
 637 
 638   return ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames);
 639 }
 640 
 641 #
 642 # Parse CONECT record line...
 643 #
 644 # CONECT format:
 645 #
 646 # 1 - 6 Record name "CONECT"
 647 # 7 - 11 Atom number
 648 # 12 - 16, 17 - 21, 22 - 26, 27 - 31 Atom number of bonded atom
 649 #
 650 # 32 - 36, 37 - 41 Atom number of hydrogen bonded atom
 651 # 42 - 46 Atom number of salt bridged atom
 652 # 47 - 51, 52 -56 Atom number of hydrogen bonded atom
 653 # 57 - 61 Atom number of salt bridged atom
 654 #
 655 sub ParseConectRecordLine {
 656   my($Line) = @_;
 657 
 658   if ($Line !~ /^CONECT/i) {
 659     return ((undef) x 11);
 660   }
 661   my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = map {s/ //g; $_} unpack("x6A5A5A5A5A5A5A5A5A5A5A5", $Line);
 662 
 663   return ($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2);
 664 }
 665 
 666 # Generate CONECT record line...
 667 sub GenerateConectRecordLine {
 668   my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = @_;
 669   my($Line);
 670 
 671   $Line = sprintf "CONECT%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s", $AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2;
 672 
 673   return $Line;
 674 }
 675 
 676 #
 677 # Parse TER record line...
 678 #
 679 # TER format:
 680 #
 681 #1 - 6 Record name "TER "
 682 # 7 - 11 Serial number
 683 # 18 - 20 Residue name
 684 # 22 Chain identifier
 685 # 23 - 26 Residue sequence number
 686 # 27 Insertion code
 687 #
 688 sub ParseTerRecordLine {
 689   my($Line) = @_;
 690 
 691   if ($Line !~ /^TER/i) {
 692     return ((undef) x 5);
 693   }
 694   my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Length);
 695 
 696   ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = ('') x 5;
 697 
 698   $Length = length $Line;
 699 
 700   if ($Length <= 17) {
 701     ($SerialNumber, $ResidueName) = map {s/ //g; $_} unpack("x6A5", $Line);
 702   }
 703   elsif ($Length <= 21) {
 704     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3", $Line);
 705   }
 706   else {
 707     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3xA1A4A1", $Line);
 708   }
 709 
 710   return ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode);
 711 }
 712 
 713 # Generate TER record line...
 714 sub GenerateTerRecordLine {
 715   my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Line) = ('') x 6;
 716 
 717   if (@_ == 5) {
 718     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = @_;
 719   }
 720   elsif (@_ == 4) {
 721     ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber) = @_;
 722   }
 723   elsif (@_ == 3) {
 724     ($SerialNumber, $ResidueName) = @_;
 725   }
 726   elsif (@_ == 2) {
 727     ($SerialNumber, $ResidueName) = @_;
 728   }
 729   elsif (@_ == 1) {
 730     ($SerialNumber) = @_;
 731   }
 732   $Line = sprintf "TER   %5.5s      %-3.3s %1.1s%4.4s%1.1s", $SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode;
 733 
 734   return $Line;
 735 }
 736 
 737 #
 738 # Parse MASTER record line...
 739 #
 740 # MASTER record format:
 741 #
 742 #1 - 6 Record name "MASTER"
 743 # 11 - 15 Number of REMARK records
 744 # 16 - 20 "0"
 745 # 21 - 25 Number of HET records
 746 # 26 - 30 Number of HELIX records
 747 # 31 - 35 Number of SHEET records
 748 # 36 - 40 Number of TURN records
 749 # 41 - 45 Number of SITE records
 750 # 46 - 50 Number of coordinate transformation records (ORIGXn+SCALEn+MTRIXn)
 751 # 51 - 55 Number of atomic coordinate records (ATOM+HETATM)
 752 # 56 - 60 Number of TER records
 753 # 61 - 65 Number of CONECT records
 754 # 66 - 70 Number of SEQRES records
 755 #
 756 sub ParseMasterRecordLine {
 757   my($Line) = @_;
 758 
 759   if ($Line !~ /^MASTER/i) {
 760     return ((undef) x 11);
 761   }
 762   my($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = map {s/ //g; $_} unpack("x6x4A5x5A5A5A5A5A5A5A5A5A5A5", $Line);
 763 
 764   return ($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords);
 765 }
 766 
 767 # End record...
 768 sub GenerateEndRecordLine {
 769   my($Line);
 770   $Line = 'END   ';
 771   return $Line;
 772 }
 773 
 774 # ATOM/HETATM record format:
 775 #
 776 # 1 - 6 Record name
 777 # 7 - 11  Atom serial number - right justified
 778 # 13 - 16 Atom name
 779 # 17 Alternate location indicator.
 780 # 18 - 20 Residue name - right justified
 781 # 22 Chain identifier.
 782 # 23 - 26 Residue sequence number - right justified
 783 # 27 Code for insertion of residues.
 784 # 31 - 38 Real(8.3), Orthogonal coordinates for X in Angstroms.
 785 # 39 - 46 Real(8.3), Orthogonal coordinates for Y in Angstroms.
 786 # 47 - 54 Real(8.3), Orthogonal coordinates for Z in Angstroms.
 787 # 55 - 60 Real(6.2), Occupancy
 788 # 61 - 66 Real(6.2), Temperature factor
 789 # 73 - 76 LString(4), Segment identifier, left-justified.
 790 # 77 - 78 LString(2), Element symbol, right-justified.
 791 #79 - 80 LString(2), Charge on the atom.
 792 #
 793 # Notes:
 794 #  . Atom names starting with C, N, O and S are left justified starting with column 14
 795 #    and others are left justified starting with column 13.
 796 #
 797 #  . Six characters (columns) are reserved for atom names, assigned as follows:
 798 #
 799 #   13 - 14 Chemical symbol - right justified, except for hydrogen atoms
 800 #
 801 #   And for amino acids:
 802 #
 803 #   15 Remoteness indicator (alphabetic) (A, B, G, D, E, Z and so on)
 804 #   16 Branch designator (numeric)
 805 #
 806 sub _ParseAtomOrHetatmRecordLine {
 807   my($Line) = @_;
 808 
 809   if ($Line !~ /^(ATOM|HETATM)/i) {
 810     return ((undef) x 15);
 811   }
 812   my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $Length);
 813 
 814   ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ('') x 15;
 815 
 816   $Length = length $Line;
 817 
 818   if ($Length <= 72) {
 819     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6", $Line);
 820   }
 821   else {
 822     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6x6A4A2A2", $Line);
 823   }
 824   return($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge);
 825 }
 826 
 827 # Generate ATOM/HETATM record line...
 828 sub _GenerateAtomOrHetatmRecordLine {
 829   my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_;
 830   my($Line, $AtomNameFormat);
 831 
 832   if (length($AtomName) >= 4) {
 833     # Left justified starting at column 13 for all atom names of length 4...
 834     $AtomNameFormat = "%-4.4s";
 835   }
 836   elsif (IsEmpty($ElementSymbol)) {
 837     # No element symbol specified; just guess from atom name to cover most likely cases...
 838     $AtomNameFormat = ($AtomName =~ /^(C|N|O|S)/i) ? " %-3.3s" : "%-4.4s";
 839   }
 840   else {
 841     # Element symbol specified...
 842     if ($ElementSymbol =~ /^H$/i) {
 843       # Hydrogen atom name with <=3 characters is left justified starting at column 14;
 844       # Otherwise, left justified starting at column 13.
 845       $AtomNameFormat = (length($AtomName) <= 3) ? " %-3.3s" : "%-4.4s";
 846     }
 847     else {
 848       # Non-hydrogen atom name...
 849       $AtomNameFormat = (length($ElementSymbol) == 1) ? " %-3.3s" : "%-4.4s";
 850     }
 851   }
 852 
 853   $Line = sprintf "%-6.6s%5.5s ${AtomNameFormat}%1.1s%3.3s %1.1s%4.4s%1.1s   %8.8s%8.8s%8.8s%6.6s%6.6s      %-4.4s%2.2s%2.2s", $RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge;
 854 
 855   return $Line;
 856 }
 857 
 858 # Check record type...
 859 sub _IsRecordType {
 860   my($Line, $SpecifiedType) = @_;
 861   my($Type, $Status);
 862 
 863   ($Type) = map {s/ //g; $_} unpack("A6", $Line);
 864 
 865   $Status = ($SpecifiedType eq $Type) ? 1 : 0;
 866 
 867   return $Status;
 868 }
 869 
 870 # Get record type...
 871 sub _GetRecordType {
 872   my($Line) = @_;
 873   my($Type);
 874 
 875   ($Type) = map {s/ //g; $_} unpack("A6", $Line);
 876 
 877   return $Type;
 878 }
 879 
 880 # Get chains and residues data using ATOM/HETATM records...
 881 #
 882 sub _GetChainsAndResiduesFromAtomHetatmRecords {
 883   my($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_;
 884 
 885   my($LineCount, $TotalChainCount, $PreviousResidueNumber, $ChainCount, $DefaultChainID, $DefaultChainLabel, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ChainsDataMap);
 886 
 887   # Do a quick chain count using TER record...
 888   $TotalChainCount = 0;
 889   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 890     if (IsEndmdlRecordType($RecordLine)) {
 891       last LINE;
 892     }
 893     if (IsTerRecordType($RecordLine)) {
 894       $TotalChainCount++;
 895     }
 896   }
 897 
 898   %ChainsDataMap = ();
 899   @{$ChainsDataMap{ChainIDs}} = ();
 900   %{$ChainsDataMap{Residues}} = ();
 901   %{$ChainsDataMap{ResidueNumbers}} = ();
 902   %{$ChainsDataMap{Lines}} = ();
 903   %{$ChainsDataMap{ResidueCount}} = ();
 904 
 905   $LineCount = 0;
 906   $ChainCount = 0;
 907   $DefaultChainLabel = 'None';
 908   $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 909   $PreviousResidueNumber = 0;
 910 
 911   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
 912       $LineCount++;
 913       if (IsTerRecordType($RecordLine)) {
 914         $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
 915         $ChainCount++;
 916         if ($ChainCount == $TotalChainCount) {
 917           last LINE;
 918         }
 919         else {
 920           next LINE;
 921         }
 922       }
 923       elsif (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 924         next LINE;
 925       }
 926       ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 927 
 928       if (IsEmpty($ChainID)) {
 929         $ChainID = $DefaultChainID;
 930       }
 931       if (exists $ChainsDataMap{Residues}{$ChainID}) {
 932         # Data for existing chain...
 933         if ($GetRecordLinesFlag) {
 934           push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 935         }
 936 
 937         if ($ResidueNumber != $PreviousResidueNumber) {
 938           # Next residue with in the chain...
 939           push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
 940           push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber;
 941 
 942           if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
 943             $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
 944           }
 945           else {
 946             $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 947           }
 948           $PreviousResidueNumber = $ResidueNumber;
 949         }
 950       }
 951       else {
 952         # Data for new chain...
 953         push @{$ChainsDataMap{ChainIDs}}, $ChainID;
 954 
 955         @{$ChainsDataMap{Residues}{$ChainID}} = ();
 956         push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
 957 
 958         @{$ChainsDataMap{ResidueNumbers}{$ChainID}} = ();
 959         push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber;
 960 
 961         @{$ChainsDataMap{Lines}{$ChainID}} = ();
 962         if ($GetRecordLinesFlag) {
 963           push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 964         }
 965 
 966         %{$ChainsDataMap{ResidueCount}{$ChainID}} = ();
 967         $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
 968         $PreviousResidueNumber = $ResidueNumber;
 969       }
 970   }
 971   if (!$GetChainResiduesBeyondTERFlag) {
 972     return \%ChainsDataMap;
 973   }
 974   # Look for any HETATM residues specified outside TER records which could belong to an existing chain...
 975   my($LineIndex, $PreviousChainID);
 976   $PreviousChainID = '';
 977   $PreviousResidueNumber = 0;
 978   LINE: for $LineIndex (($LineCount - 1) .. $#{$PDBRecordLinesRef}) {
 979     $RecordLine = $PDBRecordLinesRef->[$LineIndex];
 980     if (IsEndmdlRecordType($RecordLine)) {
 981       last LINE;
 982     }
 983     if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) {
 984       next LINE;
 985     }
 986     ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine);
 987     if (IsEmpty($ChainID)) {
 988       # Ignore the chains with no ids...
 989       next LINE;
 990     }
 991     if (! exists($ChainsDataMap{Residues}{$ChainID})) {
 992       # Don't collect any new chains after TER record...
 993       next LINE;
 994     }
 995     if ($GetRecordLinesFlag) {
 996       push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine;
 997     }
 998     if ($ResidueNumber != $PreviousResidueNumber || $ChainID ne $PreviousChainID) {
 999 
1000       push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName;
1001       push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber;
1002 
1003       if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
1004         $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
1005       }
1006       else {
1007         $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
1008       }
1009       $PreviousChainID = $ChainID;
1010       $PreviousResidueNumber = $ResidueNumber;
1011     }
1012   }
1013   return \%ChainsDataMap;
1014 }
1015 
1016 # Get chains and residues data using SEQRES records...
1017 #
1018 sub _GetChainsAndResiduesFromSeqresRecords {
1019   my($PDBRecordLinesRef) = @_;
1020 
1021   my($ChainCount, $DefaultChainLabel, $DefaultChainID, $RecordLine, $RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueName, $ResidueNamesString, @ResidueNamesList, %ChainsDataMap);
1022 
1023   %ChainsDataMap = ();
1024   @{$ChainsDataMap{ChainIDs}} = ();
1025   %{$ChainsDataMap{Residues}} = ();
1026   %{$ChainsDataMap{ResidueNumbers}} = ();
1027   %{$ChainsDataMap{ResidueCount}} = ();
1028 
1029   $ChainCount = 0;
1030   $DefaultChainLabel = 'None';
1031   $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
1032 
1033   LINE: for $RecordLine (@{$PDBRecordLinesRef}) {
1034       if (!IsSeqresRecordType($RecordLine)) {
1035         next LINE;
1036       }
1037       ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNamesString) = ParseSeqresRecordLine($RecordLine);
1038       if ($RecordSerialNumber == 1) {
1039         # Indicates start of a new chain...
1040         $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1);
1041         $ChainCount++;
1042       }
1043       if (IsEmpty($ChainID)) {
1044         $ChainID = $DefaultChainID;
1045       }
1046       # Process the residues...
1047       @ResidueNamesList = split /[ ]+/, $ResidueNamesString;
1048 
1049       if (exists $ChainsDataMap{Residues}{$ChainID}) {
1050         # Data for existing chain...
1051         push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList;
1052       }
1053       else {
1054         # Data for new chain...
1055         push @{$ChainsDataMap{ChainIDs}}, $ChainID;
1056         @{$ChainsDataMap{Residues}{$ChainID}} = ();
1057         push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList;
1058       }
1059 
1060       # Setup residue count...
1061       for $ResidueName (@ResidueNamesList) {
1062         if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) {
1063           $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1;
1064         }
1065         else {
1066           $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1;
1067         }
1068       }
1069   }
1070   return \%ChainsDataMap;
1071 }
1072