/*******************************************************************/ Write a C+
ID: 3678129 • Letter: #
Question
/*******************************************************************/
Write a C++ program that simulates a lexical/syntax analyzer (parser) system for the following simple if..else.. statement.
The BNF is:
-> if () | if () else
-> + | - |
The EBNF is:
-> id = ; | id = ;
NOTE:
Operator/keyword: +, -, *, /, %, (, ), ;, TRUE, FALSE, OR, AND, NOT
id: (a+b+...+z+A+B+...Z)(a+b+...+z+A+B+...Z+0+1+2+...+9)*
int_constant: (0+1+2+...+9)(0+1+2+...+9)*
/********************************************************************/
Sample syntax #1:
if (TRUE AND FALSE)
A=sum / (total + 47); B=(4+A)%3; C=B-15;
Sample syntax #2:
if (FALSE OR (NOT TRUE))
A=sum * (total - 47); B=(4+A)%3; C=B-15;
else
A = (sum + total) * 4; B=B+1;
C=B/2;
________________________________________________________
Lexical Analyzer
Explanation / Answer
#include <iostream>
#include <stdio.h>
#include <ctype.h>
using namespace std;
/* Character classes */
#define LETTER 0
#define DIGIT 1
#define PUNTO 2
#define UNKNOWN 99
/* Token codes */
#define INT_LIT 10
#define FLO_LIT 13
#define IDENT 11
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26
int charClass;
char lexeme [100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();
void factor();
void term();
void expr();
/*****************************************************/
/* addChar - a function to add nextChar to lexeme */
void addChar()
{
if (lexLen <= 98)
{
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
else
printf("Error - lexeme is too long ");
}
/* lookup - a function to lookup operators and parentheses
and return the token */
int lookup(char ch)
{
switch (ch)
{
case '(':
addChar();
nextToken = LEFT_PAREN;
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
break;
case '+':
addChar();
nextToken = ADD_OP;
break;
case '-':
addChar();
nextToken = SUB_OP;
break;
case '*':
addChar();
nextToken = MULT_OP;
break;
case '/':
addChar();
nextToken = DIV_OP;
break;
case '.':
addChar();
nextToken = PUNTO;
break;
default:
addChar();
nextToken = EOF;
break;
}
return nextToken;
}
/*****************************************************/
/* getChar - a function to get the next character of
input and determine its character class */
void getChar()
{
if ((nextChar = getc(in_fp)) != EOF)
{
if (isalpha(nextChar))
charClass = LETTER;
else
{
if (isdigit(nextChar))
charClass = DIGIT;
else
{
if (nextChar == '.')
charClass = PUNTO;
else
charClass = UNKNOWN;
}
}
}
else
charClass = EOF;
}
/*****************************************************/
/* getNonBlank - a function to call getChar until it
returns a non-whitespace character */
void getNonBlank()
{
while (isspace(nextChar))
getChar();
}
int tmp = 0;
/*****************************************************/
/* lex - a simple lexical analyzer for arithmetic
expressions */
int lex()
{
lexLen = 0;
getNonBlank();
switch (charClass)
{
/* Parse identifiers */
case LETTER:
addChar();
getChar();
while (charClass == LETTER || charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = IDENT;
break;
/* Parse integer literals */
case DIGIT:
addChar();
getChar();
while (charClass == DIGIT)
{
addChar();
getChar();
}
if(charClass == PUNTO)
{
addChar();
getChar();
if (charClass == PUNTO || charClass == LETTER)
{
nextToken = UNKNOWN;
}
while (charClass == DIGIT)
{
addChar();
getChar();
tmp = 1;
}
}
if(charClass == LETTER || charClass == PUNTO)
{
addChar();
getChar();
while (!isspace(nextChar))
{
addChar();
getChar();
}
tmp = 2;
}
if (tmp == 2)
nextToken = UNKNOWN;
if(nextToken != UNKNOWN)
{
if (tmp == 1)
nextToken = FLO_LIT;
else
nextToken = INT_LIT;
}
break;
case PUNTO:
addChar();
getChar();
while(charClass == DIGIT)
{
addChar();
getChar();
}
if(charClass == LETTER)
{
addChar();
getChar();
while (!isspace(nextChar)) {
addChar();
getChar();
}
nextToken = UNKNOWN;
}
else
nextToken = FLO_LIT;
break;
/* Parentheses and operators */
case UNKNOWN:
lookup(nextChar);
getChar();
break;
/* EOF */
case EOF:
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
break;
} /* End of switch */
printf("Next token is: %d, Next lexeme is %s ", nextToken, lexeme);
return nextToken;
} /* End of function lex */
void error()
{
printf("Error ");
}
void factor()
{
printf("Enter <factor> ");
if (nextToken == IDENT || nextToken == INT_LIT || nextToken == FLO_LIT)
lex();
else
{
if (nextToken == LEFT_PAREN)
{
lex();
expr();
if (nextToken == RIGHT_PAREN)
lex();
else
error();
}
else
error();
}
printf("Exit <factor> ");
}
void term()
{
printf("Enter <term> ");
factor();
while (nextToken == MULT_OP || nextToken == DIV_OP)
{
lex();
factor();
}
printf("Exit <term> ");
}
void expr()
{
printf("Enter <expr> ");
term();
while (nextToken == ADD_OP || nextToken == SUB_OP)
{
lex();
term();
}
printf("Exit <expr> ");
}
int main()
{
if ((in_fp = fopen("front.in", "r")) == NULL)
printf("ERROR - cannot open front.in ");
else
{
getChar();
do {
lex();
expr();
}
while (nextToken != EOF);
}
return 0;
}
front.in
(sum + 47) / total
sample output
Next token is: 10, Next lexeme is 47
Enter <term>
Enter <factor>
Next token is: 26, Next lexeme is )
Exit <factor>
Exit <term>
Exit <expr>
Next token is: 24, Next lexeme is /
Exit <factor>
Next token is: 11, Next lexeme is total
Enter <factor>
Next token is: -1, Next lexeme is EOF
Exit <factor>
Exit <term>
Exit <expr>
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.