MayaChemTools

   1 package Parsers::YYLexer;
   2 #
   3 # File: YYLexer.pm
   4 # Author: Manish Sud <msud@san.rr.com>
   5 #
   6 # Copyright (C) 2024 Manish Sud. All rights reserved.
   7 #
   8 # This file is part of MayaChemTools.
   9 #
  10 # MayaChemTools is free software; you can redistribute it and/or modify it under
  11 # the terms of the GNU Lesser General Public License as published by the Free
  12 # Software Foundation; either version 3 of the License, or (at your option) any
  13 # later version.
  14 #
  15 # MayaChemTools is distributed in the hope that it will be useful, but without
  16 # any warranty; without even the implied warranty of merchantability of fitness
  17 # for a particular purpose.  See the GNU Lesser General Public License for more
  18 # details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public License
  21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or
  22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330,
  23 # Boston, MA, 02111-1307, USA.
  24 #
  25 
  26 use strict;
  27 use Carp;
  28 use Exporter;
  29 use Scalar::Util ();
  30 use Parsers::Lexer;
  31 
  32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  33 
  34 @ISA = qw(Parsers::Lexer Exporter);
  35 @EXPORT = qw();
  36 @EXPORT_OK = qw();
  37 
  38 %EXPORT_TAGS = (all  => [@EXPORT, @EXPORT_OK]);
  39 
  40 # Setup class variables...
  41 my($ClassName);
  42 _InitializeClass();
  43 
  44 # Overload Perl functions...
  45 use overload '""' => 'StringifyYYLexer';
  46 
  47 # Class constructor...
  48 sub new {
  49   my($Class, $Input,  @TokensSpec) = @_;
  50 
  51   # Initialize object...
  52   my $This = $Class->SUPER::new($Input,  @TokensSpec);
  53   bless $This, ref($Class) || $Class;
  54   $This->_InitializeYYLexer();
  55 
  56   return $This;
  57 }
  58 
  59 # Initialize object data...
  60 #
  61 sub _InitializeYYLexer {
  62   my($This) = @_;
  63 
  64   # File generated containing mapping of token labels to token numbers by
  65   # running command byacc with -d option on a parser definition file.
  66   #
  67   # For example, "byacc -l -P -d -b Parser Parser.yy" would generate file
  68   # Parser.tab.ph, which might contain the following tokem name and values
  69   # for a parser for a simple calculator:
  70   #
  71   #  $NUMBER=257;
  72   #  $LETTER=258;
  73   #
  74   #
  75   $This->{YYTabFile} = undef;
  76   $This->{YYTabFilePath} = undef;
  77 
  78   # Mapping of token lables to token numbers...
  79   %{$This->{YYTabDataMap}} = ();
  80 
  81   return $This;
  82 }
  83 
  84 # Initialize class ...
  85 sub _InitializeClass {
  86   #Class name...
  87 
  88   $ClassName = __PACKAGE__;
  89 }
  90 
  91 # Process tokens in YYTab file and load mapping of token labels to integers
  92 # for return during YYLex method invocation...
  93 #
  94 # Notes:
  95 #   . YYTabFile must be a complete path or available through @INC path in the
  96 #     same directory where this package is located.
  97 #   . Name of YYTabFile might start with any valid sub directory name in @INC
  98 #     For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub directory
  99 #     under MayaChemTools lib directory as it would be already in @INC path.
 100 #   . YYTabFile must be explicitly set by the caller. The default YYTabFile name,
 101 #     y.tab.ph, generated by byacc is not used implicitly to avoid confusion among
 102 #     multiple distinct instances of YYLexer.
 103 #   . YYTabFile is generated by byacc during its usage with -d options and contains
 104 #     mapping of token codes to token names/labels. YYLexer used this file to map
 105 #     token lables to token codes before retuning token code and value pair back
 106 #     to yyparse function used by byacc.
 107 #   . User defined token numbers start from 257
 108 #
 109 #     The format of YYTabFile generted by byacc during generation of parser code in
 110 #     Perl code is:
 111 #
 112 #     ... ...
 113 #     $NUMBER=257;
 114 #     $ADDOP=258;
 115 #     $SUBOP=259;
 116 #     ... ..
 117 #
 118 sub SetupYYTabFile {
 119   my($This, $YYTabFile) = @_;
 120   my($YYTabFilePath, $Line, $TokenLabel, $TokenNumber);
 121 
 122   $This->{YYTabFile} = undef;
 123   $This->{YYTabFilePath} = undef;
 124   %{$This->{YYTabDataMap}} = ();
 125 
 126   if (!defined $YYTabFile) {
 127     croak "Error: ${ClassName}->SetupYYTabFile: YYTabFile must be specified...";
 128   }
 129   $This->{YYTabFile} = $YYTabFile;
 130 
 131   if (-e $YYTabFile) {
 132     $YYTabFilePath = $YYTabFile;
 133   }
 134   else {
 135     ($YYTabFilePath) = grep {-f "$_/$YYTabFile"}  @INC;
 136     if (!$YYTabFilePath) {
 137       carp "Warning: ${ClassName}->SetupYYTabFile: YYTabFile, $YYTabFile,  can't be located in \@INC path: @INC...";
 138       return $This;
 139     }
 140     $YYTabFilePath = "${YYTabFilePath}/$YYTabFile";
 141   }
 142 
 143   $This->{YYTabFilePath} = $YYTabFilePath;
 144 
 145   open YYTABFILE, "$YYTabFilePath" or die "Couldn't open $YYTabFilePath: $_\n";
 146   while ($Line = <YYTABFILE>) {
 147     ($TokenLabel, $TokenNumber) = ($Line =~ /^\$(.*?)=(.*?);$/);
 148     if (!(defined($TokenLabel) && defined($TokenNumber))) {
 149       croak "Error: ${ClassName}->SetupYYTabFile: Couldn't extract token label and number from YYTabFile $YYTabFile at line: $Line...";
 150     }
 151     if (exists $This->{YYTabDataMap}{$TokenLabel}) {
 152       carp "Warning: ${ClassName}->SetupYYTabFile: Token lable, $TokenLabel, already defined in YYTabFile $YYTabFile...";
 153     }
 154     $This->{YYTabDataMap}{$TokenLabel} = $TokenNumber;
 155   }
 156   close YYTABFILE;
 157 
 158   return $This;
 159 }
 160 
 161 # Get next available token number and any matched text from input stream
 162 # by either removing it from the input stream or simply peeking ahead.
 163 #
 164 # Supported mode values: Peek, Next. Default: Next
 165 #
 166 # Notes:
 167 #   . Token label and value pairs returned by lexer, which can't be mapped to token
 168 #     labels specified in YYTabFile are ignored.
 169 #   . Token text of length 1 returned by lexer without a corresponding explicit token label,
 170 #     which can't be mapped to a token number using Perl ord function, is ignored.
 171 #
 172 sub YYLex {
 173   my($This, $Mode) = @_;
 174   my($LexerToken, $TokenLabel, $TokenNumber, $TokenText);
 175 
 176   ($TokenLabel, $TokenNumber, $TokenText) = (undef) x 3;
 177 
 178   TOKEN: while (defined($LexerToken = $This->Lex($Mode))) {
 179     if (ref $LexerToken) {
 180       ($TokenLabel, $TokenText) = @{$LexerToken};
 181       if (exists $This->{YYTabDataMap}{$TokenLabel}) {
 182         $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
 183       }
 184       elsif ($TokenLabel =~ /^EOI$/i) {
 185         $TokenNumber = 0;
 186       }
 187     }
 188     else {
 189       $TokenText = $LexerToken;
 190     }
 191 
 192     # Check for any literals (+, - , = etc.) to generte token numbers...
 193     #
 194     if (!defined $TokenNumber) {
 195       if (length($TokenText) == 1 && ord $TokenText) {
 196         $TokenNumber = ord $TokenText;
 197       }
 198     }
 199 
 200     # Generate error message for no mapping to token numbers...
 201     if (defined $TokenNumber) {
 202       last TOKEN;
 203     }
 204     else {
 205       if (defined $TokenLabel) {
 206         carp "Warning: ${ClassName}->YYLex: Igorning token label, $TokenLabel, with matched text, $TokenText, returned by lexer and retrieving next available token or text. Token label couldn't be mapped to token numbers specified in YYTabFile generated from a parser defintion file using byacc. After updating parser definition file, a new YYTabFile containing entry for token label must be generated...";
 207       }
 208       else {
 209         carp "Warning: ${ClassName}->YYLex: Igorning token text, $TokenText, returned by lexer and retrieving next available token or text. Token text returned by lexer couldn't be mapped to token number using Perl ord function. After updating lexer token specifications and parser definition file, a new YYTabFile containing entry for a new token label to match unrecognized text must be generated...  ";
 210       }
 211       next TOKEN;
 212     }
 213   }
 214 
 215   if (!defined $LexerToken) {
 216     # Chained lexer returns undefined at end of input. So it's equivalent to EOI
 217     # token.
 218     if (exists $This->{YYTabDataMap}{EOI}) {
 219       $TokenLabel = "EOI";
 220       $TokenNumber = $This->{YYTabDataMap}{$TokenLabel};
 221       $TokenText = "0";
 222     }
 223     else {
 224       ($TokenLabel, $TokenNumber, $TokenText) = ("EOI", 0, "0");
 225     }
 226   }
 227 
 228   return ($TokenNumber, $TokenText);
 229 }
 230 
 231 # Get next available token number and text pair from input stream by removing it
 232 # from the input stream...
 233 #
 234 sub Next {
 235   my($This) = @_;
 236 
 237   return $This->YYLex();
 238 }
 239 
 240 # Get next available token number and text pair from input stream by by simply
 241 # peeking ahead and without removing it from the input stream...
 242 #
 243 sub Peek {
 244   my($This) = @_;
 245 
 246   return $This->YYLex('Peek')
 247 }
 248 
 249 # Return a curried verson of lexer: yyparse in parser generated by byacc expects it
 250 # to call without passing any argument for the YYLexer object...
 251 #
 252 sub GetYYLex {
 253   my($This) = @_;
 254 
 255   return sub { my($Mode) = @_; $This->YYLex($Mode); };
 256 }
 257 
 258 # Is it a lexer object?
 259 sub _IsYYLexer {
 260   my($Object) = @_;
 261 
 262   return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0;
 263 }
 264 
 265 # Return a string containing information about lexer...
 266 sub StringifyYYLexer {
 267   my($This) = @_;
 268   my($YYLexerString);
 269 
 270   $YYLexerString = "YYLexer: PackageName: $ClassName; " . $This->_GetYYLexerInfoString();
 271 
 272   return $YYLexerString;
 273 }
 274 
 275 # Stringigy YYTabFile token name and value information...
 276 #
 277 sub _GetYYLexerInfoString {
 278   my($This) = @_;
 279   my($YYLexerInfoString, $TokenValue, $YYTabFile, $YYTabFilePath, $YYTabDataMapString);
 280 
 281   $YYTabFile = defined $This->{YYTabFile} ? $This->{YYTabFile} : 'None';
 282   $YYTabFilePath = defined $This->{YYTabFilePath} ? $This->{YYTabFilePath} : 'None';
 283 
 284   $YYLexerInfoString = "YYTabFile: $YYTabFile; YYTabFilePath: $YYTabFilePath";
 285 
 286   $YYTabDataMapString = "YYTabDataMap: None";
 287   if (keys %{$This->{YYTabDataMap}}) {
 288     my($TokenLabel, $TokenNumber);
 289 
 290     $YYTabDataMapString = "YYTabDataMap:";
 291     for $TokenLabel (sort keys %{$This->{YYTabDataMap}}) {
 292       $TokenValue = $This->{YYTabDataMap}{$TokenLabel};
 293       $YYTabDataMapString .= " ${TokenLabel}=${TokenValue}";
 294     }
 295   }
 296 
 297   $YYLexerInfoString .= "; $YYTabDataMapString; " . $This->_GetLexerInfoString();
 298 
 299   return $YYLexerInfoString;
 300 }
 301