MayaChemTools

   1 package AminoAcids;
   2 #
   3 # File: AminoAcids.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Text::ParseWords;
  29 use TextUtil;
  30 use FileUtil;
  31 
  32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  33 
  34 @ISA = qw(Exporter);
  35 @EXPORT = qw();
  36 @EXPORT_OK = qw(GetAminoAcids GetAminoAcidPropertiesData GetAminoAcidPropertiesNames IsAminoAcid IsAminoAcidProperty);
  37 
  38 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  39 
  40 #
  41 # Load amino acids data...
  42 #
  43 my(%AminoAcidDataMap, %AminoAcidThreeLetterCodeMap, %AminoAcidOneLetterCodeMap, %AminoAcidNameMap, @AminoAcidPropertyNames, %AminoAcidPropertyNamesMap, );
  44 _LoadAminoAcidsData();
  45 
  46 #
  47 # Get a list of all known amino acids as one of these values:
  48 # one letter code, three letter code, or amino acid name...
  49 #
  50 sub GetAminoAcids {
  51   my($NameType, $ThreeLetterCode, $Name, @AminoAcidNames, %AminoAcidNamesMap);
  52 
  53   $NameType = 'ThreeLetterCode';
  54   if (@_ >= 1) {
  55     ($NameType) = @_;
  56   }
  57 
  58   # Collect names...
  59   %AminoAcidNamesMap = ();
  60   for $ThreeLetterCode (keys %AminoAcidDataMap) {
  61     NAME : {
  62       if ($NameType =~ /^OneLetterCode$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode}; last NAME; }
  63       if ($NameType =~ /^AminoAcid$/i) {$Name = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid}; last NAME; }
  64       $Name = $ThreeLetterCode;
  65     }
  66     $AminoAcidNamesMap{$Name} = $Name;
  67   }
  68 
  69   # Sort 'em out
  70   @AminoAcidNames = ();
  71   for $Name (sort keys %AminoAcidNamesMap) {
  72     push @AminoAcidNames, $Name;
  73   }
  74 
  75   return (wantarray ? @AminoAcidNames : \@AminoAcidNames);
  76 }
  77 
  78 
  79 #
  80 # Get all available properties data for an amino acid using any of these symbols:
  81 # three letter code; one letter code; name.
  82 #
  83 # A reference to a hash array is returned with keys and values representing property
  84 # name and its values respectively.
  85 #
  86 sub GetAminoAcidPropertiesData {
  87   my($AminoAcidID) = @_;
  88   my($ThreeLetterCode);
  89 
  90   if ($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID)) {
  91     return \%{$AminoAcidDataMap{$ThreeLetterCode}};
  92   }
  93   else {
  94     return undef;
  95   }
  96 }
  97 
  98 #
  99 # Get names of all available amino acid properties. A reference to  an array containing
 100 # names of all available properties is returned.
 101 #
 102 sub GetAminoAcidPropertiesNames {
 103   my($Mode);
 104   my($PropertyName, @PropertyNames);
 105 
 106   $Mode = 'ByGroup';
 107   if (@_ == 1) {
 108     ($Mode) = @_;
 109   }
 110 
 111   @PropertyNames = ();
 112   if ($Mode =~ /^Alphabetical$/i) {
 113     my($PropertyName);
 114     # ThreeLetterCode, OneLetterCode, and AminoAcid are always listed first...
 115     push @PropertyNames, qw(ThreeLetterCode OneLetterCode AminoAcid);
 116     for $PropertyName (sort keys %AminoAcidPropertyNamesMap) {
 117       if ($PropertyName !~ /^(ThreeLetterCode|OneLetterCode|AminoAcid)$/) {
 118         push @PropertyNames, $PropertyName;
 119       }
 120     }
 121   }
 122   else {
 123     push @PropertyNames, @AminoAcidPropertyNames;
 124   }
 125   return (wantarray ? @PropertyNames : \@PropertyNames);
 126 }
 127 
 128 #
 129 # Is it a known amino acid? Input is either an one/three letter code or a name.
 130 #
 131 sub IsAminoAcid {
 132   my($AminoAcidID) = @_;
 133   my($Status);
 134 
 135   $Status = (_ValidateAminoAcidID($AminoAcidID)) ? 1 : 0;
 136 
 137   return $Status;
 138 }
 139 
 140 
 141 #
 142 # Is it an available amino acid property?
 143 #
 144 sub IsAminoAcidProperty {
 145   my($PropertyName) = @_;
 146   my($Status);
 147 
 148   $Status = (exists($AminoAcidPropertyNamesMap{$PropertyName})) ? 1 : 0;
 149 
 150   return $Status;
 151 }
 152 
 153 #
 154 # Implents GetAminoAcid<PropertyName> for a valid proprty name.
 155 #
 156 sub AUTOLOAD {
 157   my($AminoAcidID) = @_;
 158   my($FunctionName, $PropertyName, $PropertyValue, $ThreeLetterCode);
 159 
 160   $PropertyValue = undef;
 161 
 162   use vars qw($AUTOLOAD);
 163   $FunctionName = $AUTOLOAD;
 164   $FunctionName =~ s/.*:://;
 165 
 166   # Only Get<PropertyName> functions are supported...
 167   if ($FunctionName !~ /^Get/) {
 168     croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Only Get<PropertyName> functions are implemented...";
 169   }
 170 
 171   $PropertyName = $FunctionName;
 172   $PropertyName =~  s/^GetAminoAcid//;
 173   if (!exists $AminoAcidPropertyNamesMap{$PropertyName}) {
 174     croak "Error: Function, AminoAcid::$FunctionName, is not supported by AUTOLOAD in AminoAcid module: Unknown amino acid property name, $PropertyName, specified...";
 175   }
 176 
 177   if (!($ThreeLetterCode = _ValidateAminoAcidID($AminoAcidID))) {
 178     return undef;
 179   }
 180   $PropertyValue = $AminoAcidDataMap{$ThreeLetterCode}{$PropertyName};
 181   return $PropertyValue;
 182 }
 183 
 184 
 185 #
 186 # Load AminoAcidsData.csv files from <MayaChemTools>/lib directory...
 187 #
 188 sub _LoadAminoAcidsData {
 189   my($AminoAcidsDataFile, $MayaChemToolsLibDir);
 190 
 191   $MayaChemToolsLibDir = GetMayaChemToolsLibDirName();
 192 
 193   $AminoAcidsDataFile =  "$MayaChemToolsLibDir" . "/data/AminoAcidsData.csv";
 194 
 195   if (! -e "$AminoAcidsDataFile") {
 196     croak "Error: MayaChemTools package file, $AminoAcidsDataFile, is missing: Possible installation problems...";
 197   }
 198 
 199   _LoadData($AminoAcidsDataFile);
 200 }
 201 
 202 #
 203 # Load AminoAcidsData.csv file from <MayaChemTools>/lib directory...
 204 #
 205 sub _LoadData {
 206   my($AminoAcidsDataFile) = @_;
 207 
 208   %AminoAcidDataMap = ();
 209   @AminoAcidPropertyNames = ();
 210   %AminoAcidPropertyNamesMap = ();
 211   %AminoAcidThreeLetterCodeMap = ();
 212   %AminoAcidOneLetterCodeMap = ();
 213   %AminoAcidNameMap = ();
 214 
 215   # Load property data for all amino acids...
 216   #
 217   # File Format:
 218   #"ThreeLetterCode","OneLetterCode","AminoAcid","AcidicBasic","PolarNonpolar","Charged","Aromatic","HydrophobicHydophilic","IsoelectricPoint","pKCOOH","pKNH3+","MolecularWeight","MolecularWeightMinusH2O(18.01524)","ExactMass","ExactMassMinusH2O(18.01056)","vanderWaalsVolume","%AccessibleResidues","%BuriedResidues","AlphaHelixChouAndFasman","AlphaHelixDeleageAndRoux","AlphaHelixLevitt","AminoAcidsComposition","AminoAcidsCompositionInSwissProt","AntiparallelBetaStrand","AverageAreaBuried","AverageFlexibility","BetaSheetChouAndFasman","BetaSheetDeleageAndRoux","BetaSheetLevitt","BetaTurnChouAndFasman","BetaTurnDeleageAndRoux","BetaTurnLevitt","Bulkiness","CoilDeleageAndRoux","HPLCHFBARetention","HPLCRetentionAtpH2.1","HPLCRetentionAtpH7.4","HPLCTFARetention","HydrophobicityAbrahamAndLeo","HydrophobicityBlack","HydrophobicityBullAndBreese","HydrophobicityChothia","HydrophobicityEisenbergAndOthers","HydrophobicityFauchereAndOthers","HydrophobicityGuy","HydrophobicityHPLCAtpH3.4Cowan","HydrophobicityHPLCAtpH7.5Cowan","HydrophobicityHPLCParkerAndOthers","HydrophobicityHPLCWilsonAndOthers","HydrophobicityHoppAndWoods","HydrophobicityJanin","HydrophobicityKyteAndDoolittle","HydrophobicityManavalanAndOthers","HydrophobicityMiyazawaAndOthers","HydrophobicityOMHSweetAndOthers","HydrophobicityRaoAndArgos","HydrophobicityRfMobility","HydrophobicityRoseAndOthers","HydrophobicityRoseman","HydrophobicityWellingAndOthers","HydrophobicityWolfendenAndOthers","MolecularWeight","NumberOfCodons","ParallelBetaStrand","PolarityGrantham","PolarityZimmerman","RatioHeteroEndToSide","RecognitionFactors","Refractivity","RelativeMutability","TotalBetaStrand","LinearStructure","LinearStructureAtpH7.4"
 219   #
 220   #
 221   my($ThreeLetterCode, $OneLetterCode, $AminoAcidName, $Line, $NumOfCols, $InDelim, $Index, $Name, $Value, $Units, @LineWords, @ColLabels);
 222 
 223   $InDelim = "\,";
 224   open AMINOACIDSDATAFILE, "$AminoAcidsDataFile" or croak "Couldn't open $AminoAcidsDataFile: $! ...";
 225 
 226   # Skip lines up to column labels...
 227   LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
 228     if ($Line !~ /^#/) {
 229       last LINE;
 230     }
 231   }
 232   @ColLabels= quotewords($InDelim, 0, $Line);
 233   $NumOfCols = @ColLabels;
 234 
 235   # Extract property names from column labels...
 236   @AminoAcidPropertyNames = ();
 237   for $Index (0 .. $#ColLabels) {
 238     $Name = $ColLabels[$Index];
 239     push @AminoAcidPropertyNames, $Name;
 240 
 241     # Store property names...
 242     $AminoAcidPropertyNamesMap{$Name} = $Name;
 243   }
 244 
 245   # Process amino acid data...
 246   LINE: while ($Line = GetTextLine(\*AMINOACIDSDATAFILE)) {
 247     if ($Line =~ /^#/) {
 248       next LINE;
 249     }
 250     @LineWords = ();
 251     @LineWords = quotewords($InDelim, 0, $Line);
 252     if (@LineWords != $NumOfCols) {
 253       croak "Error: The number of data fields, @LineWords, in $AminoAcidsDataFile must be $NumOfCols.\nLine: $Line...";
 254     }
 255     $ThreeLetterCode = $LineWords[0]; $OneLetterCode = $LineWords[1]; $AminoAcidName = $LineWords[3];
 256     if (exists $AminoAcidDataMap{$ThreeLetterCode}) {
 257       carp "Warning: Ignoring data for amino acid $ThreeLetterCode: It has already been loaded.\nLine: $Line....";
 258       next LINE;
 259     }
 260 
 261     # Store all the values...
 262     %{$AminoAcidDataMap{$ThreeLetterCode}} = ();
 263     for $Index (0 .. $#LineWords) {
 264       $Name = $AminoAcidPropertyNames[$Index];
 265       $Value = $LineWords[$Index];
 266       $AminoAcidDataMap{$ThreeLetterCode}{$Name} = $Value;
 267     }
 268   }
 269   close AMINOACIDSDATAFILE;
 270 
 271   # Setup one letter and amino acid name maps...
 272   _SetupAminoAcidIDMap();
 273 }
 274 
 275 
 276 #
 277 # Setup lowercase three/one letter code and name maps pointing
 278 # to three letter code as show in data file.
 279 #
 280 sub _SetupAminoAcidIDMap {
 281   my($ThreeLetterCode, $OneLetterCode, $AminoAcidName);
 282 
 283   %AminoAcidThreeLetterCodeMap = ();
 284   %AminoAcidOneLetterCodeMap = ();
 285   %AminoAcidNameMap = ();
 286 
 287   for $ThreeLetterCode (keys %AminoAcidDataMap) {
 288     $OneLetterCode = $AminoAcidDataMap{$ThreeLetterCode}{OneLetterCode};
 289     $AminoAcidName = $AminoAcidDataMap{$ThreeLetterCode}{AminoAcid};
 290 
 291     $AminoAcidThreeLetterCodeMap{lc($ThreeLetterCode)} = $ThreeLetterCode;
 292     $AminoAcidOneLetterCodeMap{lc($OneLetterCode)} = $ThreeLetterCode;
 293     $AminoAcidNameMap{lc($AminoAcidName)} = $ThreeLetterCode;
 294   }
 295 }
 296 
 297 # Validate amino acid ID...
 298 sub _ValidateAminoAcidID {
 299   my($AminoAcidID) = @_;
 300   my($ThreeLetterCode);
 301 
 302 
 303   if (length($AminoAcidID) == 3) {
 304     if (! exists $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)}) {
 305       return undef;
 306     }
 307     $ThreeLetterCode = $AminoAcidThreeLetterCodeMap{lc($AminoAcidID)};
 308   }
 309   elsif (length($AminoAcidID) == 1) {
 310     if (! exists $AminoAcidOneLetterCodeMap{lc($AminoAcidID)}) {
 311       return undef;
 312     }
 313     $ThreeLetterCode = $AminoAcidOneLetterCodeMap{lc($AminoAcidID)};
 314   }
 315   else {
 316     if (! exists $AminoAcidNameMap{lc($AminoAcidID)}) {
 317       return undef;
 318     }
 319     $ThreeLetterCode = $AminoAcidNameMap{lc($AminoAcidID)};
 320   }
 321   return $ThreeLetterCode;
 322 }
 323 
 324