1 package AminoAcids; 2 # 3 # File: AminoAcids.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Text::ParseWords; 29 use TextUtil; 30 use FileUtil; 31 32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 33 34 @ISA = qw(Exporter); 35 @EXPORT = qw(); 36 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty); 37 38 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 39 40 # 41 # Load amino acids data... 42 # 43 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, ); 44 _LoadAminoAcidsData(); 45 46 # 47 # Get a list of all known amino acids as one of these values: 48 # one letter code, three letter code, or amino acid name... 49 # 50 sub GetAminoAcids { 51 my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap); 52 53 $NameType = 'ThreeLetterCode'; 54 if (@_ >= 1) { 55 ($NameType) = @_; 56 } 57 58 # Collect names... 59 %AminoAcidNamesMap = (); 60 for $ThreeLetterCode (keys %AminoAcidDataMap) { 61 NAME : { 62 if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; } 63 if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; } 64 $Name = $ThreeLetterCode; 65 } 66 $AminoAcidNamesMap{$Name} = $Name; 67 } 68 69 # Sort 'em out 70 @AminoAcidNames = (); 71 for $Name (sort keys %AminoAcidNamesMap) { 72 push @AminoAcidNames, $Name; 73 } 74 75 return (wantarray ? @AminoAcidNames : \@AminoAcidNames); 76 } 77 78 79 # 80 # Get all available properties data for an amino acid using any of these symbols: 81 # three letter code; one letter code; name. 82 # 83 # A reference to a hash array is returned with keys and values representing property 84 # name and its values respectively. 85 # 86 sub GetAminoAcidPropertiesData { 87 my($AminoAcidID) = @_; 88 my($ThreeLetterCode); 89 90 if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) { 91 return \%{$AminoAcidDataMap{$ThreeLetterCode}}; 92 } 93 else { 94 return undef; 95 } 96 } 97 98 # 99 # Get names of all available amino acid properties. A reference to an array containing 100 # names of all available properties is returned. 101 # 102 sub GetAminoAcidPropertiesNames { 103 my($Mode); 104 my($PropertyName, @PropertyNames); 105 106 $Mode = 'ByGroup'; 107 if (@_ == 1) { 108 ($Mode) = @_; 109 } 110 111 @PropertyNames = (); 112 if ($Mode =~ /^Alphabetical$/i) { 113 my($PropertyName); 114 # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first... 115 push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid); 116 for $PropertyName (sort keys %AminoAcidPropertyNamesMap) { 117 if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) { 118 push @PropertyNames, $PropertyName; 119 } 120 } 121 } 122 else { 123 push @PropertyNames, @AminoAcidPropertyNames; 124 } 125 return (wantarray ? @PropertyNames : \@PropertyNames); 126 } 127 128 # 129 # Is it a known amino acid? Input is either an one/three letter code or a name. 130 # 131 sub IsAminoAcid { 132 my($AminoAcidID) = @_; 133 my($Status); 134 135 $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0; 136 137 return $Status; 138 } 139 140 141 # 142 # Is it an available amino acid property? 143 # 144 sub IsAminoAcidProperty { 145 my($PropertyName) = @_; 146 my($Status); 147 148 $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0; 149 150 return $Status; 151 } 152 153 # 154 # Implents GetAminoAcid<PropertyName> for a valid proprty name. 155 # 156 sub AUTOLOAD { 157 my($AminoAcidID) = @_; 158 my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode); 159 160 $PropertyValue = undef; 161 162 use vars qw($AUTOLOAD); 163 $FunctionName = $AUTOLOAD; 164 $FunctionName =~ s/.*:://; 165 166 # Only Get<PropertyName> functions are supported... 167 if ($FunctionName !~ /^Get/) { 168 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented..."; 169 } 170 171 $PropertyName = $FunctionName; 172 $PropertyName =~ s/^GetAminoAcid//; 173 if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) { 174 croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified..."; 175 } 176 177 if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) { 178 return undef; 179 } 180 $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName}; 181 return $PropertyValue; 182 } 183 184 185 # 186 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory... 187 # 188 sub _LoadAminoAcidsData { 189 my($AminoAcidsDataFile, $MayaChemToolsLibDir); 190 191 $MayaChemToolsLibDir = GetMayaChemToolsLibDirName(); 192 193 $AminoAcidsDataFile = "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv"; 194 195 if (! -e "$AminoAcidsDataFile") { 196 croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems..."; 197 } 198 199 _LoadData($AminoAcidsDataFile); 200 } 201 202 # 203 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory... 204 # 205 sub _LoadData { 206 my($AminoAcidsDataFile) = @_; 207 208 %AminoAcidDataMap = (); 209 @AminoAcidPropertyNames = (); 210 %AminoAcidPropertyNamesMap = (); 211 %AminoAcidThreeLetterCodeMap = (); 212 %AminoAcidOneLetterCodeMap = (); 213 %AminoAcidNameMap = (); 214 215 # Load property data for all amino acids... 216 # 217 # File Format: 218 #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4" 219 # 220 # 221 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels); 222 223 $InDelim = "\,"; 224 open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ..."; 225 226 # Skip lines up to column labels... 227 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { 228 if ($Line !~ /^#/) { 229 last LINE; 230 } 231 } 232 @ColLabels= quotewords($InDelim, 0, $Line); 233 $NumOfCols = @ColLabels; 234 235 # Extract property names from column labels... 236 @AminoAcidPropertyNames = (); 237 for $Index (0 .. $#ColLabels) { 238 $Name = $ColLabels[$Index]; 239 push @AminoAcidPropertyNames, $Name; 240 241 # Store property names... 242 $AminoAcidPropertyNamesMap{$Name} = $Name; 243 } 244 245 # Process amino acid data... 246 LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) { 247 if ($Line =~ /^#/) { 248 next LINE; 249 } 250 @LineWords = (); 251 @LineWords = quotewords($InDelim, 0, $Line); 252 if (@LineWords != $NumOfCols) { 253 croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line..."; 254 } 255 $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3]; 256 if (exists $AminoAcidDataMap{$ThreeLetterCode}) { 257 carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line...."; 258 next LINE; 259 } 260 261 # Store all the values... 262 %{$AminoAcidDataMap{$ThreeLetterCode}} = (); 263 for $Index (0 .. $#LineWords) { 264 $Name = $AminoAcidPropertyNames[$Index]; 265 $Value = $LineWords[$Index]; 266 $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value; 267 } 268 } 269 close AMINOACIDSDATAFILE; 270 271 # Setup one letter and amino acid name maps... 272 _SetupAminoAcidIDMap(); 273 } 274 275 276 # 277 # Setup lowercase three/one letter code and name maps pointing 278 # to three letter code as show in data file. 279 # 280 sub _SetupAminoAcidIDMap { 281 my($ThreeLetterCode, $OneLetterCode, $AminoAcidName); 282 283 %AminoAcidThreeLetterCodeMap = (); 284 %AminoAcidOneLetterCodeMap = (); 285 %AminoAcidNameMap = (); 286 287 for $ThreeLetterCode (keys %AminoAcidDataMap) { 288 $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; 289 $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; 290 291 $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode; 292 $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode; 293 $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode; 294 } 295 } 296 297 # Validate amino acid ID... 298 sub _ValidateAminoAcidID { 299 my($AminoAcidID) = @_; 300 my($ThreeLetterCode); 301 302 303 if (length($AminoAcidID) == 3) { 304 if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) { 305 return undef; 306 } 307 $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}; 308 } 309 elsif (length($AminoAcidID) == 1) { 310 if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) { 311 return undef; 312 } 313 $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}; 314 } 315 else { 316 if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) { 317 return undef; 318 } 319 $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)}; 320 } 321 return $ThreeLetterCode; 322 } 323 324