1 package Parsers::YYLexer; 2 # 3 # File: YYLexer.pm 4 # Author: Manish Sud <msud@san.rr.com> 5 # 6 # Copyright (C) 2024 Manish Sud. All rights reserved. 7 # 8 # This file is part of MayaChemTools. 9 # 10 # MayaChemTools is free software; you can redistribute it and/or modify it under 11 # the terms of the GNU Lesser General Public License as published by the Free 12 # Software Foundation; either version 3 of the License, or (at your option) any 13 # later version. 14 # 15 # MayaChemTools is distributed in the hope that it will be useful, but without 16 # any warranty; without even the implied warranty of merchantability of fitness 17 # for a particular purpose. See the GNU Lesser General Public License for more 18 # details. 19 # 20 # You should have received a copy of the GNU Lesser General Public License 21 # along with MayaChemTools; if not, see <http://www.gnu.org/licenses/> or 22 # write to the Free Software Foundation Inc., 59 Temple Place, Suite 330, 23 # Boston, MA, 02111-1307, USA. 24 # 25 26 use strict; 27 use Carp; 28 use Exporter; 29 use Scalar::Util (); 30 use Parsers::Lexer; 31 32 use vars qw(@ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); 33 34 @ISA = qw(Parsers::Lexer Exporter); 35 @EXPORT = qw(); 36 @EXPORT_OK = qw(); 37 38 %EXPORT_TAGS = (all => [@EXPORT, @EXPORT_OK]); 39 40 # Setup class variables... 41 my($ClassName); 42 _InitializeClass(); 43 44 # Overload Perl functions... 45 use overload '""' => 'StringifyYYLexer'; 46 47 # Class constructor... 48 sub new { 49 my($Class, $Input, @TokensSpec) = @_; 50 51 # Initialize object... 52 my $This = $Class->SUPER::new($Input, @TokensSpec); 53 bless $This, ref($Class) || $Class; 54 $This->_InitializeYYLexer(); 55 56 return $This; 57 } 58 59 # Initialize object data... 60 # 61 sub _InitializeYYLexer { 62 my($This) = @_; 63 64 # File generated containing mapping of token labels to token numbers by 65 # running command byacc with -d option on a parser definition file. 66 # 67 # For example, "byacc -l -P -d -b Parser Parser.yy" would generate file 68 # Parser.tab.ph, which might contain the following tokem name and values 69 # for a parser for a simple calculator: 70 # 71 # $NUMBER=257; 72 # $LETTER=258; 73 # 74 # 75 $This->{YYTabFile} = undef; 76 $This->{YYTabFilePath} = undef; 77 78 # Mapping of token lables to token numbers... 79 %{$This->{YYTabDataMap}} = (); 80 81 return $This; 82 } 83 84 # Initialize class ... 85 sub _InitializeClass { 86 #Class name... 87 88 $ClassName = __PACKAGE__; 89 } 90 91 # Process tokens in YYTab file and load mapping of token labels to integers 92 # for return during YYLex method invocation... 93 # 94 # Notes: 95 # . YYTabFile must be a complete path or available through @INC path in the 96 # same directory where this package is located. 97 # . Name of YYTabFile might start with any valid sub directory name in @INC 98 # For example, "Parsers/<YYTablFile>" implies the tab file in parsers sub directory 99 # under MayaChemTools lib directory as it would be already in @INC path. 100 # . YYTabFile must be explicitly set by the caller. The default YYTabFile name, 101 # y.tab.ph, generated by byacc is not used implicitly to avoid confusion among 102 # multiple distinct instances of YYLexer. 103 # . YYTabFile is generated by byacc during its usage with -d options and contains 104 # mapping of token codes to token names/labels. YYLexer used this file to map 105 # token lables to token codes before retuning token code and value pair back 106 # to yyparse function used by byacc. 107 # . User defined token numbers start from 257 108 # 109 # The format of YYTabFile generted by byacc during generation of parser code in 110 # Perl code is: 111 # 112 # ... ... 113 # $NUMBER=257; 114 # $ADDOP=258; 115 # $SUBOP=259; 116 # ... .. 117 # 118 sub SetupYYTabFile { 119 my($This, $YYTabFile) = @_; 120 my($YYTabFilePath, $Line, $TokenLabel, $TokenNumber); 121 122 $This->{YYTabFile} = undef; 123 $This->{YYTabFilePath} = undef; 124 %{$This->{YYTabDataMap}} = (); 125 126 if (!defined $YYTabFile) { 127 croak "Error: ${ClassName}->SetupYYTabFile: YYTabFile must be specified..."; 128 } 129 $This->{YYTabFile} = $YYTabFile; 130 131 if (-e $YYTabFile) { 132 $YYTabFilePath = $YYTabFile; 133 } 134 else { 135 ($YYTabFilePath) = grep {-f "$_/$YYTabFile"} @INC; 136 if (!$YYTabFilePath) { 137 carp "Warning: ${ClassName}->SetupYYTabFile: YYTabFile, $YYTabFile, can't be located in \@INC path: @INC..."; 138 return $This; 139 } 140 $YYTabFilePath = "${YYTabFilePath}/$YYTabFile"; 141 } 142 143 $This->{YYTabFilePath} = $YYTabFilePath; 144 145 open YYTABFILE, "$YYTabFilePath" or die "Couldn't open $YYTabFilePath: $_\n"; 146 while ($Line = <YYTABFILE>) { 147 ($TokenLabel, $TokenNumber) = ($Line =~ /^\$(.*?)=(.*?);$/); 148 if (!(defined($TokenLabel) && defined($TokenNumber))) { 149 croak "Error: ${ClassName}->SetupYYTabFile: Couldn't extract token label and number from YYTabFile $YYTabFile at line: $Line..."; 150 } 151 if (exists $This->{YYTabDataMap}{$TokenLabel}) { 152 carp "Warning: ${ClassName}->SetupYYTabFile: Token lable, $TokenLabel, already defined in YYTabFile $YYTabFile..."; 153 } 154 $This->{YYTabDataMap}{$TokenLabel} = $TokenNumber; 155 } 156 close YYTABFILE; 157 158 return $This; 159 } 160 161 # Get next available token number and any matched text from input stream 162 # by either removing it from the input stream or simply peeking ahead. 163 # 164 # Supported mode values: Peek, Next. Default: Next 165 # 166 # Notes: 167 # . Token label and value pairs returned by lexer, which can't be mapped to token 168 # labels specified in YYTabFile are ignored. 169 # . Token text of length 1 returned by lexer without a corresponding explicit token label, 170 # which can't be mapped to a token number using Perl ord function, is ignored. 171 # 172 sub YYLex { 173 my($This, $Mode) = @_; 174 my($LexerToken, $TokenLabel, $TokenNumber, $TokenText); 175 176 ($TokenLabel, $TokenNumber, $TokenText) = (undef) x 3; 177 178 TOKEN: while (defined($LexerToken = $This->Lex($Mode))) { 179 if (ref $LexerToken) { 180 ($TokenLabel, $TokenText) = @{$LexerToken}; 181 if (exists $This->{YYTabDataMap}{$TokenLabel}) { 182 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel}; 183 } 184 elsif ($TokenLabel =~ /^EOI$/i) { 185 $TokenNumber = 0; 186 } 187 } 188 else { 189 $TokenText = $LexerToken; 190 } 191 192 # Check for any literals (+, - , = etc.) to generte token numbers... 193 # 194 if (!defined $TokenNumber) { 195 if (length($TokenText) == 1 && ord $TokenText) { 196 $TokenNumber = ord $TokenText; 197 } 198 } 199 200 # Generate error message for no mapping to token numbers... 201 if (defined $TokenNumber) { 202 last TOKEN; 203 } 204 else { 205 if (defined $TokenLabel) { 206 carp "Warning: ${ClassName}->YYLex: Igorning token label, $TokenLabel, with matched text, $TokenText, returned by lexer and retrieving next available token or text. Token label couldn't be mapped to token numbers specified in YYTabFile generated from a parser defintion file using byacc. After updating parser definition file, a new YYTabFile containing entry for token label must be generated..."; 207 } 208 else { 209 carp "Warning: ${ClassName}->YYLex: Igorning token text, $TokenText, returned by lexer and retrieving next available token or text. Token text returned by lexer couldn't be mapped to token number using Perl ord function. After updating lexer token specifications and parser definition file, a new YYTabFile containing entry for a new token label to match unrecognized text must be generated... "; 210 } 211 next TOKEN; 212 } 213 } 214 215 if (!defined $LexerToken) { 216 # Chained lexer returns undefined at end of input. So it's equivalent to EOI 217 # token. 218 if (exists $This->{YYTabDataMap}{EOI}) { 219 $TokenLabel = "EOI"; 220 $TokenNumber = $This->{YYTabDataMap}{$TokenLabel}; 221 $TokenText = "0"; 222 } 223 else { 224 ($TokenLabel, $TokenNumber, $TokenText) = ("EOI", 0, "0"); 225 } 226 } 227 228 return ($TokenNumber, $TokenText); 229 } 230 231 # Get next available token number and text pair from input stream by removing it 232 # from the input stream... 233 # 234 sub Next { 235 my($This) = @_; 236 237 return $This->YYLex(); 238 } 239 240 # Get next available token number and text pair from input stream by by simply 241 # peeking ahead and without removing it from the input stream... 242 # 243 sub Peek { 244 my($This) = @_; 245 246 return $This->YYLex('Peek') 247 } 248 249 # Return a curried verson of lexer: yyparse in parser generated by byacc expects it 250 # to call without passing any argument for the YYLexer object... 251 # 252 sub GetYYLex { 253 my($This) = @_; 254 255 return sub { my($Mode) = @_; $This->YYLex($Mode); }; 256 } 257 258 # Is it a lexer object? 259 sub _IsYYLexer { 260 my($Object) = @_; 261 262 return (Scalar::Util::blessed($Object) && $Object->isa($ClassName)) ? 1 : 0; 263 } 264 265 # Return a string containing information about lexer... 266 sub StringifyYYLexer { 267 my($This) = @_; 268 my($YYLexerString); 269 270 $YYLexerString = "YYLexer: PackageName: $ClassName; " . $This->_GetYYLexerInfoString(); 271 272 return $YYLexerString; 273 } 274 275 # Stringigy YYTabFile token name and value information... 276 # 277 sub _GetYYLexerInfoString { 278 my($This) = @_; 279 my($YYLexerInfoString, $TokenValue, $YYTabFile, $YYTabFilePath, $YYTabDataMapString); 280 281 $YYTabFile = defined $This->{YYTabFile} ? $This->{YYTabFile} : 'None'; 282 $YYTabFilePath = defined $This->{YYTabFilePath} ? $This->{YYTabFilePath} : 'None'; 283 284 $YYLexerInfoString = "YYTabFile: $YYTabFile; YYTabFilePath: $YYTabFilePath"; 285 286 $YYTabDataMapString = "YYTabDataMap: None"; 287 if (keys %{$This->{YYTabDataMap}}) { 288 my($TokenLabel, $TokenNumber); 289 290 $YYTabDataMapString = "YYTabDataMap:"; 291 for $TokenLabel (sort keys %{$This->{YYTabDataMap}}) { 292 $TokenValue = $This->{YYTabDataMap}{$TokenLabel}; 293 $YYTabDataMapString .= " ${TokenLabel}=${TokenValue}"; 294 } 295 } 296 297 $YYLexerInfoString .= "; $YYTabDataMapString; " . $This->_GetLexerInfoString(); 298 299 return $YYLexerInfoString; 300 } 301