1 package PDBFileUtil; 2 # 3 # File: PDBFileUtil.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2025 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Exporter; 28 use Text::ParseWords; 29 use TextUtil; 30 use FileUtil; 31 use TimeUtil (); 32 33 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 34 35 @ISA = qw(Exporter); 36 @EXPORT = qw(GetPDBRecordType GetRecordTypesCount GetAllResidues GetConectRecordLines GetChainsAndResidues GetExperimentalTechnique GetExperimentalTechniqueResolution GetMinMaxCoords IsPDBFile IsAtomRecordType IsConectRecordType IsHeaderRecordType IsHetatmRecordType IsSeqresRecordType IsModelRecordType IsEndmdlRecordType IsTerRecordType IsMasterRecordType ReadPDBFile ParseHeaderRecordLine GenerateHeaderRecordLine GenerateHeaderRecordTimeStamp ParseAtomRecordLine GenerateAtomRecordLine ParseAtomOrHetatmRecordLine GenerateAtomOrHetatmRecordLine GenerateHetatmRecordLine ParseHetatmRecordLine ParseConectRecordLine GenerateConectRecordLine ParseExpdtaRecordLine ParseRemark2ResolutionRecordLine ParseSeqresRecordLine ParseTerRecordLine GenerateTerRecordLine ParseMasterRecordLine GenerateEndRecordLine); 37 @EXPORT_OK = qw(); 38 39 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 40 41 # Get PDB record type... 42 sub GetPDBRecordType { 43 my($Line) = @_; 44 45 return _GetRecordType($Line); 46 } 47 48 # Is it a PDB file? 49 sub IsPDBFile { 50 my($PDBFile) = @_; 51 my($Line, $Status); 52 53 $Status = 0; 54 open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n"; 55 $Line = GetTextLine(\*PDBFILE); 56 $Status = ($Line =~ /^HEADER/i) ? 1 : 0; 57 close PDBFILE; 58 59 return $Status; 60 } 61 62 # Is it a atom record type? 63 sub IsAtomRecordType { 64 my($Line) = @_; 65 66 return _IsRecordType($Line, 'ATOM'); 67 } 68 69 # Is it a connect record type? 70 sub IsConectRecordType { 71 my($Line) = @_; 72 73 return _IsRecordType($Line, 'CONECT'); 74 } 75 76 # Is it a header atom record type? 77 sub IsHeaderRecordType { 78 my($Line) = @_; 79 80 return _IsRecordType($Line, 'HEADER'); 81 } 82 83 # Is it a hetro atom record type? 84 sub IsHetatmRecordType { 85 my($Line) = @_; 86 87 return _IsRecordType($Line, 'HETATM'); 88 } 89 90 # Is it a seqres record type? 91 sub IsSeqresRecordType { 92 my($Line) = @_; 93 94 return _IsRecordType($Line, 'SEQRES'); 95 } 96 97 # Is it a MODEL record type? 98 sub IsModelRecordType { 99 my($Line) = @_; 100 101 return _IsRecordType($Line, 'MODEL'); 102 } 103 104 # Is it a ENDMDL record type? 105 sub IsEndmdlRecordType { 106 my($Line) = @_; 107 108 return _IsRecordType($Line, 'ENDMDL'); 109 } 110 111 # Is it a TER record type? 112 sub IsTerRecordType { 113 my($Line) = @_; 114 115 return _IsRecordType($Line, 'TER'); 116 } 117 118 # Is it a MASTER record type? 119 sub IsMasterRecordType { 120 my($Line) = @_; 121 122 return _IsRecordType($Line, 'MASTER'); 123 } 124 125 # Count the number of each record type and a reference to data type with these key/value pairs: 126 # {RecordTypes} - An array of unique record types in order of their presence in the file 127 # {Count}{$RecordType} - Count of each record type 128 # {Lines}{$RecordType} - Optional lines data for a specific record type. 129 # 130 sub GetRecordTypesCount { 131 my($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag, $RecordType, $RecordLine, %RecordTypeDataMap); 132 133 %RecordTypeDataMap = (); 134 @{$RecordTypeDataMap{RecordTypes}} = (); 135 %{$RecordTypeDataMap{Count}} = (); 136 %{$RecordTypeDataMap{Lines}} = (); 137 138 $SpecifiedRecordType = ''; 139 $GetRecordLinesFlag = 0; 140 if (@_ == 3) { 141 ($PDBRecordLinesRef, $SpecifiedRecordType, $GetRecordLinesFlag) = @_; 142 $SpecifiedRecordType = uc $SpecifiedRecordType; 143 } 144 elsif (@_ == 2) { 145 ($PDBRecordLinesRef, $SpecifiedRecordType) = @_; 146 $SpecifiedRecordType = uc $SpecifiedRecordType; 147 } 148 else { 149 ($PDBRecordLinesRef) = @_; 150 } 151 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 152 $RecordType = _GetRecordType($RecordLine); 153 if ($SpecifiedRecordType && ($SpecifiedRecordType ne $RecordType)) { 154 next LINE; 155 } 156 if (exists $RecordTypeDataMap{Count}{$RecordType}) { 157 # Update count... 158 $RecordTypeDataMap{Count}{$RecordType} += 1; 159 160 if ($GetRecordLinesFlag) { 161 push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine; 162 } 163 } 164 else { 165 # New record type... 166 push @{$RecordTypeDataMap{RecordTypes}}, $RecordType; 167 $RecordTypeDataMap{Count}{$RecordType} = 1; 168 169 if ($GetRecordLinesFlag) { 170 @{$RecordTypeDataMap{Lines}{$RecordType}} = (); 171 push @{$RecordTypeDataMap{Lines}{$RecordType}}, $RecordLine; 172 } 173 } 174 } 175 return (\%RecordTypeDataMap); 176 } 177 178 # Collect CONECT record lines for specific atom number, modified specified data to exclude any atom 179 # number not present in the list of specified atom numbers and return a reference to list of 180 # CONECT record lines. 181 # 182 sub GetConectRecordLines { 183 my($PDBRecordLinesRef, $AtomNumbersMapRef) = @_; 184 my($AtomNumber, $ConectAtomNumber, $RecordLine, @ConectRecordAtomNums, @ConectRecordLines); 185 186 @ConectRecordLines = (); 187 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 188 if (!IsConectRecordType($RecordLine)) { 189 next LINE; 190 } 191 @ConectRecordAtomNums = (); 192 push @ConectRecordAtomNums, ParseConectRecordLine($RecordLine); 193 ATOMNUMBER: for $ConectAtomNumber (@ConectRecordAtomNums) { 194 if (defined $ConectAtomNumber) { 195 $AtomNumber = $ConectAtomNumber; 196 if ($AtomNumber) { 197 if (! exists $AtomNumbersMapRef->{$AtomNumber}) { 198 next LINE; 199 } 200 } 201 } 202 } 203 push @ConectRecordLines, $RecordLine; 204 } 205 return \@ConectRecordLines; 206 } 207 208 # Get chains and residue information using ATOM/HETATM or SEQRES records. And return a reference to a 209 # hash with these keys: 210 # 211 # @{$ChainsDataMap{ChainIDs}} - List of chain IDs with 'None' for no chain identification 212 # @{$ChainsDataMap{Residues}{$ChainID}} - List of residues in order of their appearance in a chain 213 # @{$ChainsDataMap{ResidueNumbers}{$ChainID}} - List of residue numbers in order of their appearance in a chain 214 # %{$ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}} - Count of specific residues in a chain 215 # 216 # Notes: 217 # . Chains and residue data can be extacted using either ATOM/HETATM records or SEQRES records. 218 # . In addition to a different chain ID in ATOM/HETATM a TER record also indicates end of an existing chain 219 # and start of a new one: ChainID in ATOM/HETATM records might still be emtpy. 220 # . ATOM/HETATM records after the first ENDMDL records are simply ingnored. 221 # 222 sub GetChainsAndResidues { 223 my($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag); 224 225 $RecordsSource = 'AtomAndHetatm'; 226 $GetChainResiduesBeyondTERFlag = 0; 227 $GetRecordLinesFlag = 0; 228 229 if (@_ == 4) { 230 ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_; 231 } 232 elsif (@_ == 3) { 233 ($PDBRecordLinesRef, $RecordsSource, $GetChainResiduesBeyondTERFlag) = @_; 234 } 235 elsif (@_ == 2) { 236 ($PDBRecordLinesRef, $RecordsSource) = @_; 237 } 238 else { 239 ($PDBRecordLinesRef) = @_; 240 } 241 242 if ($RecordsSource =~ /^AtomAndHetatm$/i) { 243 return _GetChainsAndResiduesFromAtomHetatmRecords($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag); 244 } 245 elsif ($RecordsSource =~ /^Seqres$/i) { 246 return _GetChainsAndResiduesFromSeqresRecords($PDBRecordLinesRef); 247 } 248 else { 249 my(%ChainsDataMap); 250 %ChainsDataMap = (); 251 @{$ChainsDataMap{ChainIDs}} = (); 252 %{$ChainsDataMap{Residues}} = (); 253 %{$ChainsDataMap{ResidueNumbers}} = (); 254 %{$ChainsDataMap{ResidueCount}} = (); 255 256 return \%ChainsDataMap; 257 } 258 } 259 260 261 # Get residue information using ATOM/HETATM records and return a reference to a hash with 262 # these keys: 263 # 264 # @{$ResiduesDataMap{ResidueNames}} - List of all the residues 265 # %{$ResiduesDataMap{ResidueCount}{$ResidueName}} - Count of residues 266 # @{$ResiduesDataMap{AtomResidueNames}} - List of all the residues 267 # %{$ResiduesDataMap{AtomResidueCount}{$ResidueName}} - Count of residues in ATOM records 268 # @{$ResiduesDataMap{HetatomResidueNames}} - List of all the residues 269 # %{$ResiduesDataMap{HetatmResidueCount}{$ResidueName}} - Count of residues HETATM records 270 # 271 # Notes: 272 # . ATOM/HETATM records after the first ENDMDL records are simply ingnored. 273 # 274 sub GetAllResidues { 275 my($PDBRecordLinesRef) = @_; 276 277 my($PreviousChainID, $PreviousResidueNumber, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ResiduesDataMap); 278 279 %ResiduesDataMap = (); 280 @{$ResiduesDataMap{ResidueNames}} = (); 281 %{$ResiduesDataMap{ResidueCount}} = (); 282 @{$ResiduesDataMap{AtomResidueNames}} = (); 283 %{$ResiduesDataMap{AtomResidueCount}} = (); 284 @{$ResiduesDataMap{HetatmResidueNames}} = (); 285 %{$ResiduesDataMap{HetatmResidueCount}} = (); 286 287 $PreviousChainID = ''; 288 $PreviousResidueNumber = 0; 289 290 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 291 if (IsEndmdlRecordType($RecordLine)) { 292 last LINE; 293 } 294 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 295 next LINE; 296 } 297 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 298 299 if ($PreviousChainID eq $ChainID) { 300 if ($ResidueNumber == $PreviousResidueNumber) { 301 next LINE; 302 } 303 $PreviousResidueNumber = $ResidueNumber; 304 } 305 else { 306 # New chain... 307 $PreviousChainID = $ChainID; 308 $PreviousResidueNumber = $ResidueNumber; 309 } 310 311 # Store the residue and update its count... 312 push @{$ResiduesDataMap{ResidueNames}}, $ResidueName; 313 if (exists $ResiduesDataMap{ResidueCount}{$ResidueName}) { 314 $ResiduesDataMap{ResidueCount}{$ResidueName} += 1; 315 } 316 else { 317 $ResiduesDataMap{ResidueCount}{$ResidueName} = 1; 318 } 319 # Update ATOM residue data... 320 if (IsAtomRecordType($RecordLine)) { 321 push @{$ResiduesDataMap{AtomResidueNames}}, $ResidueName; 322 if (exists $ResiduesDataMap{AtomResidueCount}{$ResidueName}) { 323 $ResiduesDataMap{AtomResidueCount}{$ResidueName} += 1; 324 } 325 else { 326 $ResiduesDataMap{AtomResidueCount}{$ResidueName} = 1; 327 } 328 } 329 # Update HETATM residue data... 330 if (IsHetatmRecordType($RecordLine)) { 331 push @{$ResiduesDataMap{HetatmResidueNames}}, $ResidueName; 332 if (exists $ResiduesDataMap{HetatmResidueCount}{$ResidueName}) { 333 $ResiduesDataMap{HetatmResidueCount}{$ResidueName} += 1; 334 } 335 else { 336 $ResiduesDataMap{HetatmResidueCount}{$ResidueName} = 1; 337 } 338 } 339 } 340 341 return \%ResiduesDataMap; 342 } 343 344 # Return min/max XYZ coordinates for ATOM/HETATM records... 345 sub GetMinMaxCoords { 346 my($PDBRecordLinesRef) = @_; 347 348 my($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 349 350 ($XMin, $YMin, $ZMin) = (99999) x 3; 351 ($XMax, $YMax, $ZMax) = (-99999) x 3; 352 353 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 354 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 355 next LINE; 356 } 357 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 358 359 $XMin = ($X < $XMin) ? $X : $XMin; 360 $YMin = ($Y < $YMin) ? $Y : $YMin; 361 $ZMin = ($Z < $ZMin) ? $Z : $ZMin; 362 363 $XMax = ($X > $XMax) ? $X : $XMax; 364 $YMax = ($Y > $YMax) ? $Y : $YMax; 365 $ZMax = ($Z > $ZMax) ? $Z : $ZMax; 366 } 367 368 if ($XMin == 99999) { $XMin = undef; } 369 if ($YMin == 99999) { $YMin = undef; } 370 if ($ZMin == 99999) { $ZMin = undef; } 371 if ($XMax == -99999) { $XMax = undef; } 372 if ($YMax == -99999) { $YMax = undef; } 373 if ($ZMax == -99999) { $ZMax = undef; } 374 375 return ($XMin, $YMin, $ZMin, $XMax, $YMax, $ZMax); 376 } 377 378 # Read PDB file and return reference to record lines.. 379 sub ReadPDBFile { 380 my($PDBFile) = @_; 381 382 my($Line, @PDBRecordLines); 383 384 @PDBRecordLines = (); 385 open PDBFILE, "$PDBFile" or die "Can't open $PDBFile: $!\n"; 386 while ($Line = GetTextLine(\*PDBFILE)) { 387 push @PDBRecordLines, $Line; 388 } 389 390 close PDBFILE; 391 392 return (\@PDBRecordLines); 393 } 394 395 # 396 # Get experimental technique information... 397 # 398 sub GetExperimentalTechnique { 399 my($PDBRecordLinesRef) = @_; 400 my($RecordLine, $ContinuationNum, $ExperimentalTechnique); 401 402 $ExperimentalTechnique = undef; 403 404 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 405 if (_IsRecordType($RecordLine, 'EXPDTA')) { 406 ($ContinuationNum, $ExperimentalTechnique) = ParseExpdtaRecordLine($RecordLine); 407 last LINE; 408 } 409 } 410 411 return $ExperimentalTechnique; 412 } 413 414 # 415 # Get experimental technique resolution information... 416 # 417 sub GetExperimentalTechniqueResolution { 418 my($PDBRecordLinesRef) = @_; 419 my($RecordLine, $Resolution, $ResolutionUnits); 420 421 ($Resolution, $ResolutionUnits) = ((undef) x 2); 422 423 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 424 if ($RecordLine =~ /^REMARK 2 RESOLUTION./i) { 425 ($Resolution, $ResolutionUnits) = ParseRemark2ResolutionRecordLine($RecordLine); 426 last LINE; 427 } 428 } 429 430 return ($Resolution, $ResolutionUnits); 431 } 432 433 # 434 # Parse HEADER record line... 435 sub ParseHeaderRecordLine { 436 my($Line) = @_; 437 my($Classification, $DepositionDate, $IDCode) = (undef, undef, undef); 438 439 if ($Line !~ /^HEADER/i) { 440 return ($Classification, $DepositionDate, $IDCode); 441 } 442 my($Length); 443 444 ($Classification, $DepositionDate, $IDCode) = ('') x 3; 445 446 $Length = length $Line; 447 448 if ($Length <= 62) { 449 ($Classification, $DepositionDate) = unpack("x10A40A9", $Line); 450 } 451 else { 452 ($Classification, $DepositionDate, $IDCode) = unpack("x10A40A9x3A4", $Line); 453 } 454 455 $Classification = RemoveLeadingAndTrailingWhiteSpaces($Classification); 456 $DepositionDate =~ s/ //g; 457 $IDCode =~ s/ //g; 458 459 return ($Classification, $DepositionDate, $IDCode); 460 } 461 462 # 463 # Generate HEADER record line... 464 sub GenerateHeaderRecordLine { 465 my($Classification, $Date, $IDCode, $Line); 466 467 $Classification = "Created using MayaChemTools"; 468 $Date = GenerateHeaderRecordTimeStamp(); 469 if (@_ == 3) { 470 ($IDCode, $Classification, $Date) = @_; 471 } 472 elsif (@_ == 2) { 473 ($IDCode, $Classification) = @_; 474 } 475 elsif (@_ == 1) { 476 ($IDCode) = @_; 477 } 478 479 $Line = sprintf "HEADER %-40.40s%9.9s %4.4s", $Classification, $Date, $IDCode; 480 return $Line; 481 } 482 483 # Generate PDB header time stamp... 484 sub GenerateHeaderRecordTimeStamp { 485 return TimeUtil::PDBFileTimeStamp(); 486 } 487 488 # 489 # Parse ATOM record line. 490 # 491 sub ParseAtomRecordLine { 492 my($Line) = @_; 493 494 return _ParseAtomOrHetatmRecordLine($Line); 495 } 496 497 # Generate ATOM record line... 498 sub GenerateAtomRecordLine { 499 my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 500 501 return _GenerateAtomOrHetatmRecordLine('ATOM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 502 } 503 504 # 505 # Parse ATOM/HETATm record line. 506 # 507 sub ParseAtomOrHetatmRecordLine { 508 my($Line) = @_; 509 510 return _ParseAtomOrHetatmRecordLine($Line); 511 } 512 513 # Generate ATOM/HETATM record line... 514 sub GenerateAtomOrHetatmRecordLine { 515 my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 516 517 return _GenerateAtomOrHetatmRecordLine($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 518 } 519 # 520 # Parse HETATM record line... 521 # 522 sub ParseHetatmRecordLine { 523 my($Line) = @_; 524 525 return _ParseAtomOrHetatmRecordLine($Line); 526 } 527 528 # Generate HETATM record line... 529 sub GenerateHetatmRecordLine { 530 my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 531 532 return _GenerateAtomOrHetatmRecordLine('HETATM', $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 533 } 534 535 # Parse EXPDTA record line... 536 # 537 # EXPDTA format: 538 # 539 #1 - 6 Record name "EXPDTA" 540 # 9 - 10 Continuation continuation Allows concatenation of multiple records. 541 # 11 - 70 SList technique The experimental technique(s) with optional comment describing the sample or experiment. 542 # 543 # The EXPDTA record identifies the experimental technique used. This may refer to the type of radiation and 544 # sample, or include the spectroscopic or modeling technique. Permitted values include: 545 # 546 # ELECTRON DIFFRACTION 547 # FIBER DIFFRACTION 548 # FLUORESCENCE TRANSFER 549 # NEUTRON DIFFRACTION 550 # NMR 551 # THEORETICAL MODEL 552 # X-RAY DIFFRACTION 553 # 554 sub ParseExpdtaRecordLine { 555 my($Line) = @_; 556 557 if ($Line !~ /^EXPDTA/i) { 558 return ((undef) x 2); 559 } 560 561 my($ContinuationNum, $ExperimentalTechnique) = unpack("x8A2A60" , $Line); 562 563 $ContinuationNum =~ s/ //g; 564 $ExperimentalTechnique = RemoveLeadingAndTrailingWhiteSpaces($ExperimentalTechnique); 565 566 return ($ContinuationNum, $ExperimentalTechnique); 567 } 568 569 # Parse REMARK 2 record line... 570 # 571 # REMARK 2 format: 572 # 573 # The second REMARK 2 record has one of two formats. The first is used for diffraction studies, the second 574 # for other types of experiments in which resolution is not relevant, e.g., NMR and theoretical modeling. 575 # 576 #For diffraction experiments: 577 # 578 # 1 - 6 Record name "REMARK" 579 # 10 LString(1) "2" 580 # 12 - 22 LString(11) "RESOLUTION." 581 # 23 - 27 Real(5.2) resolution Resolution. 582 # 29 - 38 LString(10) "ANGSTROMS." 583 # 584 # REMARK 2 when not a diffraction experiment: 585 # 586 # 1 - 6 Record name "REMARK" 587 # 10 LString(1) "2" 588 # 12 - 38 LString(28) "RESOLUTION. NOT APPLICABLE." 589 # 41 - 70 String comment Comment. 590 # 591 sub ParseRemark2ResolutionRecordLine { 592 my($Line) = @_; 593 594 if ($Line !~ /^REMARK 2 RESOLUTION./i) { 595 return ((undef) x 2); 596 } 597 598 my($Resolution, $ResolutionUnits); 599 600 if ($Line =~ /NOT APPLICABLE/i) { 601 ($Resolution, $ResolutionUnits) = ("NOT APPLICABLE", ""); 602 } 603 else { 604 ($Resolution, $ResolutionUnits) = unpack("x22A5x1A10" , $Line); 605 } 606 607 $Resolution = RemoveLeadingAndTrailingWhiteSpaces($Resolution); 608 609 $ResolutionUnits = RemoveLeadingAndTrailingWhiteSpaces($ResolutionUnits); 610 $ResolutionUnits =~ s/\.$//; 611 612 return ($Resolution, $ResolutionUnits); 613 } 614 615 # 616 # Parse SEQRES record line... 617 # 618 # SEQRES format: 619 # 620 # 1 - 6 Record name "SEQRES" 621 # 9 - 10 Serial number of the SEQRES record for the current chain. Starts at 1 and increments by one each line. Reset to 1 for each chain. 622 # 12 - Chain identifier 623 # 14 - 17 Integer numRes Number of residues in the chain 624 # 20 - 22 24 -26 ... ... 68 - 70 Residue name resName Residue name. 625 # 626 sub ParseSeqresRecordLine { 627 my($Line) = @_; 628 629 if ($Line !~ /^SEQRES/i) { 630 return ((undef) x 5); 631 } 632 my($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames) = unpack("x8A2x1A1x1A4x2A51" , $Line); 633 $RecordSerialNumber =~ s/ //g; 634 $ChainID =~ s/ //g; 635 $NumOfResidues =~ s/ //g; 636 $ResidueNames = RemoveLeadingAndTrailingWhiteSpaces($ResidueNames); 637 638 return ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNames); 639 } 640 641 # 642 # Parse CONECT record line... 643 # 644 # CONECT format: 645 # 646 # 1 - 6 Record name "CONECT" 647 # 7 - 11 Atom number 648 # 12 - 16, 17 - 21, 22 - 26, 27 - 31 Atom number of bonded atom 649 # 650 # 32 - 36, 37 - 41 Atom number of hydrogen bonded atom 651 # 42 - 46 Atom number of salt bridged atom 652 # 47 - 51, 52 -56 Atom number of hydrogen bonded atom 653 # 57 - 61 Atom number of salt bridged atom 654 # 655 sub ParseConectRecordLine { 656 my($Line) = @_; 657 658 if ($Line !~ /^CONECT/i) { 659 return ((undef) x 11); 660 } 661 my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = map {s/ //g; $_} unpack("x6A5A5A5A5A5A5A5A5A5A5A5", $Line); 662 663 return ($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2); 664 } 665 666 # Generate CONECT record line... 667 sub GenerateConectRecordLine { 668 my($AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2) = @_; 669 my($Line); 670 671 $Line = sprintf "CONECT%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s%5.5s", $AtomNum, $BondedAtomNum1, $BondedAtomNum2, $BondedAtomNum3, $BondedAtomNum4, $HBondedAtomNum1, $HBondedAtomNum2, $SaltBridgedAtomNum1, $HBondedAtomNum3, $HBondedAtomNum4, $SaltBridgedAtomNum2; 672 673 return $Line; 674 } 675 676 # 677 # Parse TER record line... 678 # 679 # TER format: 680 # 681 #1 - 6 Record name "TER " 682 # 7 - 11 Serial number 683 # 18 - 20 Residue name 684 # 22 Chain identifier 685 # 23 - 26 Residue sequence number 686 # 27 Insertion code 687 # 688 sub ParseTerRecordLine { 689 my($Line) = @_; 690 691 if ($Line !~ /^TER/i) { 692 return ((undef) x 5); 693 } 694 my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Length); 695 696 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = ('') x 5; 697 698 $Length = length $Line; 699 700 if ($Length <= 17) { 701 ($SerialNumber, $ResidueName) = map {s/ //g; $_} unpack("x6A5", $Line); 702 } 703 elsif ($Length <= 21) { 704 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3", $Line); 705 } 706 else { 707 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = map {s/ //g; $_} unpack("x6A5x6A3xA1A4A1", $Line); 708 } 709 710 return ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode); 711 } 712 713 # Generate TER record line... 714 sub GenerateTerRecordLine { 715 my($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $Line) = ('') x 6; 716 717 if (@_ == 5) { 718 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode) = @_; 719 } 720 elsif (@_ == 4) { 721 ($SerialNumber, $ResidueName, $ChainID, $ResidueNumber) = @_; 722 } 723 elsif (@_ == 3) { 724 ($SerialNumber, $ResidueName) = @_; 725 } 726 elsif (@_ == 2) { 727 ($SerialNumber, $ResidueName) = @_; 728 } 729 elsif (@_ == 1) { 730 ($SerialNumber) = @_; 731 } 732 $Line = sprintf "TER %5.5s %-3.3s %1.1s%4.4s%1.1s", $SerialNumber, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode; 733 734 return $Line; 735 } 736 737 # 738 # Parse MASTER record line... 739 # 740 # MASTER record format: 741 # 742 #1 - 6 Record name "MASTER" 743 # 11 - 15 Number of REMARK records 744 # 16 - 20 "0" 745 # 21 - 25 Number of HET records 746 # 26 - 30 Number of HELIX records 747 # 31 - 35 Number of SHEET records 748 # 36 - 40 Number of TURN records 749 # 41 - 45 Number of SITE records 750 # 46 - 50 Number of coordinate transformation records (ORIGXn+SCALEn+MTRIXn) 751 # 51 - 55 Number of atomic coordinate records (ATOM+HETATM) 752 # 56 - 60 Number of TER records 753 # 61 - 65 Number of CONECT records 754 # 66 - 70 Number of SEQRES records 755 # 756 sub ParseMasterRecordLine { 757 my($Line) = @_; 758 759 if ($Line !~ /^MASTER/i) { 760 return ((undef) x 11); 761 } 762 my($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords) = map {s/ //g; $_} unpack("x6x4A5x5A5A5A5A5A5A5A5A5A5A5", $Line); 763 764 return ($NumOfRemarkRecords, $NumOfHetRecords, $NumOfHelixRecords, $NumOfSheetRecords, $NumOfTurnRecords, $NumOfSiteRecords, $NumOfTransformationsRecords, $NumOfAtomAndHetatmRecords, $NumOfTerRecords, $NumOfConectRecords, $NumOfSeqresRecords); 765 } 766 767 # End record... 768 sub GenerateEndRecordLine { 769 my($Line); 770 $Line = 'END '; 771 return $Line; 772 } 773 774 # ATOM/HETATM record format: 775 # 776 # 1 - 6 Record name 777 # 7 - 11 Atom serial number - right justified 778 # 13 - 16 Atom name 779 # 17 Alternate location indicator. 780 # 18 - 20 Residue name - right justified 781 # 22 Chain identifier. 782 # 23 - 26 Residue sequence number - right justified 783 # 27 Code for insertion of residues. 784 # 31 - 38 Real(8.3), Orthogonal coordinates for X in Angstroms. 785 # 39 - 46 Real(8.3), Orthogonal coordinates for Y in Angstroms. 786 # 47 - 54 Real(8.3), Orthogonal coordinates for Z in Angstroms. 787 # 55 - 60 Real(6.2), Occupancy 788 # 61 - 66 Real(6.2), Temperature factor 789 # 73 - 76 LString(4), Segment identifier, left-justified. 790 # 77 - 78 LString(2), Element symbol, right-justified. 791 #79 - 80 LString(2), Charge on the atom. 792 # 793 # Notes: 794 # . Atom names starting with C, N, O and S are left justified starting with column 14 795 # and others are left justified starting with column 13. 796 # 797 # . Six characters (columns) are reserved for atom names, assigned as follows: 798 # 799 # 13 - 14 Chemical symbol - right justified, except for hydrogen atoms 800 # 801 # And for amino acids: 802 # 803 # 15 Remoteness indicator (alphabetic) (A, B, G, D, E, Z and so on) 804 # 16 Branch designator (numeric) 805 # 806 sub _ParseAtomOrHetatmRecordLine { 807 my($Line) = @_; 808 809 if ($Line !~ /^(ATOM|HETATM)/i) { 810 return ((undef) x 15); 811 } 812 my($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, $Length); 813 814 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ('') x 15; 815 816 $Length = length $Line; 817 818 if ($Length <= 72) { 819 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6", $Line); 820 } 821 else { 822 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = map {s/ //g; $_} unpack("x6A5xA4A1A3xA1A4A1x3A8A8A8A6A6x6A4A2A2", $Line); 823 } 824 return($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge); 825 } 826 827 # Generate ATOM/HETATM record line... 828 sub _GenerateAtomOrHetatmRecordLine { 829 my($RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = @_; 830 my($Line, $AtomNameFormat); 831 832 if (length($AtomName) >= 4) { 833 # Left justified starting at column 13 for all atom names of length 4... 834 $AtomNameFormat = "%-4.4s"; 835 } 836 elsif (IsEmpty($ElementSymbol)) { 837 # No element symbol specified; just guess from atom name to cover most likely cases... 838 $AtomNameFormat = ($AtomName =~ /^(C|N|O|S)/i) ? " %-3.3s" : "%-4.4s"; 839 } 840 else { 841 # Element symbol specified... 842 if ($ElementSymbol =~ /^H$/i) { 843 # Hydrogen atom name with <=3 characters is left justified starting at column 14; 844 # Otherwise, left justified starting at column 13. 845 $AtomNameFormat = (length($AtomName) <= 3) ? " %-3.3s" : "%-4.4s"; 846 } 847 else { 848 # Non-hydrogen atom name... 849 $AtomNameFormat = (length($ElementSymbol) == 1) ? " %-3.3s" : "%-4.4s"; 850 } 851 } 852 853 $Line = sprintf "%-6.6s%5.5s ${AtomNameFormat}%1.1s%3.3s %1.1s%4.4s%1.1s %8.8s%8.8s%8.8s%6.6s%6.6s %-4.4s%2.2s%2.2s", $RecordType, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge; 854 855 return $Line; 856 } 857 858 # Check record type... 859 sub _IsRecordType { 860 my($Line, $SpecifiedType) = @_; 861 my($Type, $Status); 862 863 ($Type) = map {s/ //g; $_} unpack("A6", $Line); 864 865 $Status = ($SpecifiedType eq $Type) ? 1 : 0; 866 867 return $Status; 868 } 869 870 # Get record type... 871 sub _GetRecordType { 872 my($Line) = @_; 873 my($Type); 874 875 ($Type) = map {s/ //g; $_} unpack("A6", $Line); 876 877 return $Type; 878 } 879 880 # Get chains and residues data using ATOM/HETATM records... 881 # 882 sub _GetChainsAndResiduesFromAtomHetatmRecords { 883 my($PDBRecordLinesRef, $GetChainResiduesBeyondTERFlag, $GetRecordLinesFlag) = @_; 884 885 my($LineCount, $TotalChainCount, $PreviousResidueNumber, $ChainCount, $DefaultChainID, $DefaultChainLabel, $RecordLine, $AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge, %ChainsDataMap); 886 887 # Do a quick chain count using TER record... 888 $TotalChainCount = 0; 889 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 890 if (IsEndmdlRecordType($RecordLine)) { 891 last LINE; 892 } 893 if (IsTerRecordType($RecordLine)) { 894 $TotalChainCount++; 895 } 896 } 897 898 %ChainsDataMap = (); 899 @{$ChainsDataMap{ChainIDs}} = (); 900 %{$ChainsDataMap{Residues}} = (); 901 %{$ChainsDataMap{ResidueNumbers}} = (); 902 %{$ChainsDataMap{Lines}} = (); 903 %{$ChainsDataMap{ResidueCount}} = (); 904 905 $LineCount = 0; 906 $ChainCount = 0; 907 $DefaultChainLabel = 'None'; 908 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 909 $PreviousResidueNumber = 0; 910 911 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 912 $LineCount++; 913 if (IsTerRecordType($RecordLine)) { 914 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 915 $ChainCount++; 916 if ($ChainCount == $TotalChainCount) { 917 last LINE; 918 } 919 else { 920 next LINE; 921 } 922 } 923 elsif (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 924 next LINE; 925 } 926 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 927 928 if (IsEmpty($ChainID)) { 929 $ChainID = $DefaultChainID; 930 } 931 if (exists $ChainsDataMap{Residues}{$ChainID}) { 932 # Data for existing chain... 933 if ($GetRecordLinesFlag) { 934 push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine; 935 } 936 937 if ($ResidueNumber != $PreviousResidueNumber) { 938 # Next residue with in the chain... 939 push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName; 940 push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber; 941 942 if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) { 943 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1; 944 } 945 else { 946 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 947 } 948 $PreviousResidueNumber = $ResidueNumber; 949 } 950 } 951 else { 952 # Data for new chain... 953 push @{$ChainsDataMap{ChainIDs}}, $ChainID; 954 955 @{$ChainsDataMap{Residues}{$ChainID}} = (); 956 push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName; 957 958 @{$ChainsDataMap{ResidueNumbers}{$ChainID}} = (); 959 push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber; 960 961 @{$ChainsDataMap{Lines}{$ChainID}} = (); 962 if ($GetRecordLinesFlag) { 963 push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine; 964 } 965 966 %{$ChainsDataMap{ResidueCount}{$ChainID}} = (); 967 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 968 $PreviousResidueNumber = $ResidueNumber; 969 } 970 } 971 if (!$GetChainResiduesBeyondTERFlag) { 972 return \%ChainsDataMap; 973 } 974 # Look for any HETATM residues specified outside TER records which could belong to an existing chain... 975 my($LineIndex, $PreviousChainID); 976 $PreviousChainID = ''; 977 $PreviousResidueNumber = 0; 978 LINE: for $LineIndex (($LineCount - 1) .. $#{$PDBRecordLinesRef}) { 979 $RecordLine = $PDBRecordLinesRef->[$LineIndex]; 980 if (IsEndmdlRecordType($RecordLine)) { 981 last LINE; 982 } 983 if (!(IsAtomRecordType($RecordLine) || IsHetatmRecordType($RecordLine))) { 984 next LINE; 985 } 986 ($AtomNumber, $AtomName, $AlternateLocation, $ResidueName, $ChainID, $ResidueNumber, $InsertionCode, $X, $Y, $Z, $Occupancy, $TemperatureFactor, $SegmentID, $ElementSymbol, $AtomCharge) = ParseAtomRecordLine($RecordLine); 987 if (IsEmpty($ChainID)) { 988 # Ignore the chains with no ids... 989 next LINE; 990 } 991 if (! exists($ChainsDataMap{Residues}{$ChainID})) { 992 # Don't collect any new chains after TER record... 993 next LINE; 994 } 995 if ($GetRecordLinesFlag) { 996 push @{$ChainsDataMap{Lines}{$ChainID}}, $RecordLine; 997 } 998 if ($ResidueNumber != $PreviousResidueNumber || $ChainID ne $PreviousChainID) { 999 1000 push @{$ChainsDataMap{Residues}{$ChainID}}, $ResidueName; 1001 push @{$ChainsDataMap{ResidueNumbers}{$ChainID}}, $ResidueNumber; 1002 1003 if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) { 1004 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1; 1005 } 1006 else { 1007 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 1008 } 1009 $PreviousChainID = $ChainID; 1010 $PreviousResidueNumber = $ResidueNumber; 1011 } 1012 } 1013 return \%ChainsDataMap; 1014 } 1015 1016 # Get chains and residues data using SEQRES records... 1017 # 1018 sub _GetChainsAndResiduesFromSeqresRecords { 1019 my($PDBRecordLinesRef) = @_; 1020 1021 my($ChainCount, $DefaultChainLabel, $DefaultChainID, $RecordLine, $RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueName, $ResidueNamesString, @ResidueNamesList, %ChainsDataMap); 1022 1023 %ChainsDataMap = (); 1024 @{$ChainsDataMap{ChainIDs}} = (); 1025 %{$ChainsDataMap{Residues}} = (); 1026 %{$ChainsDataMap{ResidueNumbers}} = (); 1027 %{$ChainsDataMap{ResidueCount}} = (); 1028 1029 $ChainCount = 0; 1030 $DefaultChainLabel = 'None'; 1031 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 1032 1033 LINE: for $RecordLine (@{$PDBRecordLinesRef}) { 1034 if (!IsSeqresRecordType($RecordLine)) { 1035 next LINE; 1036 } 1037 ($RecordSerialNumber, $ChainID, $NumOfResidues, $ResidueNamesString) = ParseSeqresRecordLine($RecordLine); 1038 if ($RecordSerialNumber == 1) { 1039 # Indicates start of a new chain... 1040 $DefaultChainID = $DefaultChainLabel . ($ChainCount + 1); 1041 $ChainCount++; 1042 } 1043 if (IsEmpty($ChainID)) { 1044 $ChainID = $DefaultChainID; 1045 } 1046 # Process the residues... 1047 @ResidueNamesList = split /[ ]+/, $ResidueNamesString; 1048 1049 if (exists $ChainsDataMap{Residues}{$ChainID}) { 1050 # Data for existing chain... 1051 push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList; 1052 } 1053 else { 1054 # Data for new chain... 1055 push @{$ChainsDataMap{ChainIDs}}, $ChainID; 1056 @{$ChainsDataMap{Residues}{$ChainID}} = (); 1057 push @{$ChainsDataMap{Residues}{$ChainID}}, @ResidueNamesList; 1058 } 1059 1060 # Setup residue count... 1061 for $ResidueName (@ResidueNamesList) { 1062 if (exists $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName}) { 1063 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} += 1; 1064 } 1065 else { 1066 $ChainsDataMap{ResidueCount}{$ChainID}{$ResidueName} = 1; 1067 } 1068 } 1069 } 1070 return \%ChainsDataMap; 1071 } 1072