Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

/*******************************************************************/ Write a C+

ID: 3678129 • Letter: #

Question

/*******************************************************************/

Write a C++ program that simulates a lexical/syntax analyzer (parser) system for the following simple if..else.. statement.

The BNF is:

-> if () | if () else

-> + | - |

The EBNF is:

-> id = ; | id = ;

NOTE:

Operator/keyword: +, -, *, /, %, (, ), ;, TRUE, FALSE, OR, AND, NOT

id: (a+b+...+z+A+B+...Z)(a+b+...+z+A+B+...Z+0+1+2+...+9)*

int_constant: (0+1+2+...+9)(0+1+2+...+9)*

/********************************************************************/

Sample syntax #1:
if (TRUE AND FALSE)

A=sum / (total + 47); B=(4+A)%3; C=B-15;

Sample syntax #2:
if (FALSE OR (NOT TRUE))

A=sum * (total - 47); B=(4+A)%3; C=B-15;

else
A = (sum + total) * 4; B=B+1;
C=B/2;

________________________________________________________

Lexical Analyzer

Explanation / Answer

#include <iostream>
#include <stdio.h>
#include <ctype.h>

using namespace std;


/* Character classes */
#define LETTER 0
#define DIGIT 1
#define PUNTO 2
#define UNKNOWN 99

/* Token codes */
#define INT_LIT 10
#define FLO_LIT 13
#define IDENT 11
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26

int charClass;
char lexeme [100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();

void factor();
void term();
void expr();

/*****************************************************/
/* addChar - a function to add nextChar to lexeme */
void addChar()
{
    if (lexLen <= 98)
    {
        lexeme[lexLen++] = nextChar;
        lexeme[lexLen] = 0;
    }
    else
        printf("Error - lexeme is too long ");
}

/* lookup - a function to lookup operators and parentheses
            and return the token */
int lookup(char ch)
{
    switch (ch)
    {
        case '(':
              addChar();
              nextToken = LEFT_PAREN;
              break;
        case ')':
              addChar();
              nextToken = RIGHT_PAREN;
              break;
        case '+':
              addChar();
              nextToken = ADD_OP;
              break;
        case '-':
              addChar();
              nextToken = SUB_OP;
              break;
        case '*':
              addChar();
              nextToken = MULT_OP;
              break;
        case '/':
              addChar();
              nextToken = DIV_OP;
              break;
        case '.':
              addChar();
              nextToken = PUNTO;
              break;
        default:
              addChar();
              nextToken = EOF;
              break;
    }
    return nextToken;
}

/*****************************************************/
/* getChar - a function to get the next character of
             input and determine its character class */
void getChar()
{
    if ((nextChar = getc(in_fp)) != EOF)
    {
        if (isalpha(nextChar))
            charClass = LETTER;
        else
        {
            if (isdigit(nextChar))
                charClass = DIGIT;
            else
            {
                if (nextChar == '.')
                    charClass = PUNTO;
                else
                    charClass = UNKNOWN;
            }
        }
    }
    else
        charClass = EOF;
}

/*****************************************************/
/* getNonBlank - a function to call getChar until it
                 returns a non-whitespace character */
void getNonBlank()
{
    while (isspace(nextChar))
        getChar();
}

int tmp = 0;

/*****************************************************/
/* lex - a simple lexical analyzer for arithmetic
         expressions */
int lex()
{
    lexLen = 0;
    getNonBlank();
    switch (charClass)
    {
        /* Parse identifiers */
        case LETTER:
            addChar();
            getChar();
            while (charClass == LETTER || charClass == DIGIT)
            {
                addChar();
                getChar();
            }
            nextToken = IDENT;
            break;
        /* Parse integer literals */
        case DIGIT:
            addChar();
            getChar();
            while (charClass == DIGIT)
            {
                addChar();
                getChar();
            }
            if(charClass == PUNTO)
            {
                addChar();
                getChar();
                if (charClass == PUNTO || charClass == LETTER)
                {
                    nextToken = UNKNOWN;
                }
                while (charClass == DIGIT)
                {
                    addChar();
                    getChar();
                    tmp = 1;
                }
            }

            if(charClass == LETTER || charClass == PUNTO)
            {
                addChar();
                getChar();
                while (!isspace(nextChar))
                {
                    addChar();
                    getChar();
                }
                tmp = 2;
            }

            if (tmp == 2)
                nextToken = UNKNOWN;

            if(nextToken != UNKNOWN)
            {
                if (tmp == 1)
                    nextToken = FLO_LIT;
                else
                    nextToken = INT_LIT;
            }
            break;

        case PUNTO:
            addChar();
            getChar();
            while(charClass == DIGIT)
            {
                addChar();
                getChar();
            }

            if(charClass == LETTER)
            {
                addChar();
                getChar();
                while (!isspace(nextChar)) {
                    addChar();
                    getChar();
                }
                nextToken = UNKNOWN;
            }
            else
                nextToken = FLO_LIT;
            break;
        /* Parentheses and operators */
        case UNKNOWN:
            lookup(nextChar);
            getChar();
            break;
        /* EOF */
        case EOF:
            nextToken = EOF;
            lexeme[0] = 'E';
            lexeme[1] = 'O';
            lexeme[2] = 'F';
            lexeme[3] = 0;
            break;
    } /* End of switch */

      printf("Next token is: %d, Next lexeme is %s ", nextToken, lexeme);
    return nextToken;
} /* End of function lex */

void error()
{
    printf("Error ");
}

void factor()
{
    printf("Enter <factor> ");
    if (nextToken == IDENT || nextToken == INT_LIT || nextToken == FLO_LIT)
        lex();
    else
    {
        if (nextToken == LEFT_PAREN)
        {
            lex();
            expr();
            if (nextToken == RIGHT_PAREN)
                lex();
            else
                error();
        }
        else
            error();
    }
    printf("Exit <factor> ");
}

void term()
{
    printf("Enter <term> ");
    factor();
    while (nextToken == MULT_OP || nextToken == DIV_OP)
    {
        lex();
        factor();
    }
    printf("Exit <term> ");
}

void expr()
{
    printf("Enter <expr> ");
    term();
    while (nextToken == ADD_OP || nextToken == SUB_OP)
    {
        lex();
        term();
    }
    printf("Exit <expr> ");
}

int main()
{
    if ((in_fp = fopen("front.in", "r")) == NULL)
        printf("ERROR - cannot open front.in ");
    else
    {
        getChar();
        do {
             lex();
             expr();
           }
        while (nextToken != EOF);
    }
    return 0;
}


front.in
(sum + 47) / total

sample output

Next token is: 10, Next lexeme is 47                                                                                                                        
Enter <term>                                                                                                                                                
Enter <factor>                                                                                                                                              
Next token is: 26, Next lexeme is )                                                                                                                         
Exit <factor>                                                                                                                                               
Exit <term>                                                                                                                                                 
Exit <expr>                                                                                                                                                 
Next token is: 24, Next lexeme is /                                                                                                                         
Exit <factor>                                                                                                                                               
Next token is: 11, Next lexeme is total                                                                                                                     
Enter <factor>                                                                                                                                              
Next token is: -1, Next lexeme is EOF                                                                                                                       
Exit <factor>                                                                                                                                               
Exit <term>                                                                                                                                                 
Exit <expr>