Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

C++ Lexical Analyzer My program needs to be able to catch any syntax error if it

ID: 3679574 • Letter: C

Question

C++ Lexical Analyzer

My program needs to be able to catch any syntax error if it doesn't follow the grammar rules.

For example, if the syntax is "if (TRUE) A = (x + 4) / 3", your program should output: "ERROR - missing a semicolon".

Another example, if the syntax is "if (TRUE) A (x + 4) / 3;", your program should output: "ERROR - missing an assignment operator".

I just need to add the syntax error to my code to make it have the output stated above.

My code:

#include

#include

#include

using namespace std;

/* Global declarations */

/* Variables */

int charClass;

char lexeme[100];

char nextChar;

int lexLen;

int token;

int nextToken;

ifstream in_fp("syntax.txt");

/* Function declarations */

void getChar();

void addChar();

void getNonBlank();

// Function to get the nextToken

int lex();

/* Character classes */

#define LETTER 0

#define DIGIT 1

#define UNKNOWN 99

/* Token codes */

#define INT_LIT 10

#define IDENT 11

#define ADD_OP 21

#define SUB_OP 22

#define MULT_OP 23

#define DIV_OP 24

#define LEFT_PAREN 25

#define RIGHT_PAREN 26

#define EQUAL_OP 27

#define SEMICOLON_OP 26

/******************************************************/

/* Main function */

int main()

{

/* Open the input data file and process its contents */

if (in_fp.fail())

{

cout << "File could not be opened ";

cin.get();

exit(1);

}

else

{

getChar();

do

{

lex(); // Getting the nextToken

} while (nextToken != EOF);

}

// Close the file

in_fp.close();

}

/*****************************************************/

/* lookup - a function to lookup operators and parentheses

and return the token */

int lookup(char ch)

{

switch (ch)

{

case '(':

addChar();

nextToken = LEFT_PAREN;

//cout << " Left paranthesis: ";

break;

case ')':

addChar();

nextToken = RIGHT_PAREN;

//cout << " right paranthesis: ";

break;

case '+':

addChar();

nextToken = ADD_OP;

//cout << " add operator: ";

break;

case '-':

addChar();

nextToken = SUB_OP;

//cout << " sub operator: ";

break;

case '*':

addChar();

nextToken = MULT_OP;

//cout << " mult operator: ";

break;

case '/':

addChar();

nextToken = DIV_OP;

//cout << " div operator: ";

break;

case '%':

addChar();

nextToken = DIV_OP;

//cout << " Remainder operator: ";

break;

case '=':

addChar();

nextToken = EQUAL_OP;

//cout << " Equal operator: ";

break;

case ';':

addChar();

nextToken = SEMICOLON_OP;

//cout << " Semicolon operator: ";

break;

default:

addChar();

nextToken = EOF;

//cout << " EOF: ";

break;

}

return nextToken;

}

/*****************************************************/

/* addChar - a function to add nextChar to lexeme */

void addChar()

{

if (lexLen <= 398)

{

lexeme[lexLen++] = nextChar;

lexeme[lexLen] = 0;

}

else

cout << " Error - lexeme is too long ";

}

/*****************************************************/

/* getChar - a function to get the next character of

input and determine its character class */

void getChar()

{

in_fp.get(nextChar);

if (in_fp.eof()) // if no more character in the file

nextChar = EOF;

if (nextChar != EOF)

{

if (isalpha(nextChar))

charClass = LETTER;

else if (isdigit(nextChar))

charClass = DIGIT;

else charClass = UNKNOWN;

}

else

charClass = EOF;

}

/*****************************************************/

/* getNonBlank - a function to call getChar until it

returns a non-whitespace character */

void getNonBlank()

{

while (isspace(nextChar))

getChar();

}

/***************************************************** /

/* lex - a simple lexical analyzer for arithmetic

expressions */

int lex()

{

lexLen = 0;

getNonBlank();

switch (charClass)

{

/* Parse identifiers */

case LETTER:

addChar();

getChar();

while (charClass == LETTER || charClass == DIGIT)

{

addChar();

getChar();

}

nextToken = IDENT;

//cout << " Identifier : ";

break;

/* Parse integer literals */

case DIGIT:

addChar();

getChar();

while (charClass == DIGIT)

{

addChar();

getChar();

}

nextToken = INT_LIT;

//cout << " Number: ";

break;

/* Parentheses and operators */

case UNKNOWN:

lookup(nextChar);

getChar();

break;

/* EOF */

case EOF:

nextToken = EOF;

lexeme[0] = 'E';

lexeme[1] = 'O';

lexeme[2] = 'F';

lexeme[3] = 0;

break;

} /* End of switch */

cout << "Next token is: " << nextToken << " Next lexeme is " << lexeme << " ";

return nextToken;

} /* End of function lex */

Explanation / Answer

#include <stdio.h>
#include <ctype.h>
#include <string.h>

#define KEYWORD_LIST_SIZE 40
char* keywordList[]= {"ASSIGN","BACKSPACE","BLOCK DATA","CALL","CLOSE","COMMON","CONTINUE"};
int charClass;
char lexeme [100];
char nextChar=' ';
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();
FILE *tkn_fp;
char previousChar=' ';

int commentFlag=0;
int line=1;
int exception=0;
int errors=0;
int endFlag=0;
int positionFileEnd;
int keywordFlag=0;

void addChar();
void getChar();
void getNonBlank();
int lex();
void exceptionHandler(int);

/* Character classes */
#define LETTER 0
#define DIGIT 1
#define LOGICAL_EXP 2
#define UNKNOWN 99
/* Token codes */

#define INT_LIT 10
#define IDENT 11
#define KEYWORD 12
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define ASTERISK_OP 23
#define SLASH_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26

#define STRING_LIT 27
#define COMMA_OP 28

#define GT_OP 30
#define LT_OP 31
#define GE_OP 32
#define LE_OP 33
#define EQ_OP 34
#define NE_OP 35

#define POW_OP 36
#define REAL_LIT 37

#define NOT_OP 38
#define AND_OP 39
#define OR_OP 40
#define EQV_OP 41
#define NEQV_OP 42

#define EXC_STRING_UNCLOSED 1000
#define EXC_UNDEFINED_CHAR 1001
#define EXC_LONG_ID 1002
#define EXC_IMPROPER_ID 1003
#define EXC_IMPROPER_REAL_TYPE 1004
#define EXC_UNDEFINED_COMP_OP 1005
int main()
{
    char fileName[50];
    printf("Write the name of the file (.for) ");
    gets(fileName);
    if ((in_fp = fopen(fileName, "r")) == NULL)
        printf("ERROR - cannot open %s ",fileName);
    else
    {

        char *x = strrchr(fileName,'.');
        strcpy(x,".lex");
        tkn_fp=fopen(fileName,"w+");
        commentFlag=0;
        exception=0;
        getChar();
        do
        {
            lex();
            commentFlag=0;
            exception=0;
        }
        while (nextToken != EOF);
        if(errors>0)
            printf("There are %d errors in source code, .lex file not created. ",errors);
    }
    getchar();
    return 0;
}

int Look(char ch)
{
    switch (ch)
    {
    case '(':
        addChar();
        nextToken = LEFT_PAREN;
        break;
    case ')':
        addChar();
        nextToken = RIGHT_PAREN;
        break;
    case '+':
        addChar();
        nextToken = ADD_OP;
        break;
    case '-':
        addChar();
        nextToken = SUB_OP;
        break;
    case '*':
        addChar();
        nextToken = ASTERISK_OP;
        break;
    case '/':
        addChar();
        nextToken = SLASH_OP;
        break;
    case ',':
        addChar();
        nextToken = COMMA_OP;
        break;
    case '=':
        addChar();
        nextToken=ASSIGN_OP;
        break;
    case EOF:
        addChar();
        nextToken=EOF;
        break;
    default:
        exceptionHandler(EXC_UNDEFINED_CHAR);
        break;
    }
    return nextToken;
}

void addChar()
{
    if (lexLen <= 31)
    {
        lexeme[lexLen++] = nextChar;
        lexeme[lexLen] = 0;
    }
}

void getChar()
{

    if((previousChar==' '&& nextChar=='C')&& commentFlag==0)
        commentFlag=1;
    previousChar=nextChar;

    if ((nextChar = getc(in_fp)) != EOF)
    {
        if (isalpha(nextChar))
            charClass = LETTER;
        else if (isdigit(nextChar))
            charClass = DIGIT;
        else if(nextChar=='.')
            charClass=LOGICAL_EXP;
        else charClass = UNKNOWN;
    }
    else
        charClass = EOF;
    if(nextChar==' ')
        line++;
}

void getNonBlank()
{
    while (isspace(nextChar))
        getChar();
}

int lex()
{
    lexLen = 0;
    getNonBlank();
    switch (charClass)
    {
    case LETTER:
        addChar();
        getChar();
        while ((charClass == LETTER || charClass == DIGIT)|| nextChar=='_')
        {
            addChar();
            getChar();
        }
        endFlag=0;
        keywordFlag=0;
        char afterEndIdent [31];
        if(strcasecmp(lexeme,"END")==0)
        {
            positionFileEnd=ftell(in_fp);
            endFlag=1;
            getNonBlank();
            if(charClass==LETTER)
            {
                lexeme[lexLen++]=' ';
                lexeme[lexLen]=0;

                while(charClass==LETTER)
                {
                    addChar();
                    getChar();
                }
            }
            strcpy(afterEndIdent,lexeme);
        }
        int i;
        for(i=0; i<KEYWORD_LIST_SIZE; i++)
            if(strcasecmp(keywordList[i],lexeme)==0)
            {
                keywordFlag=1;
                break;
            }
        if(keywordFlag==1)
            nextToken = KEYWORD;
        else
            nextToken = IDENT;
        break;
    case DIGIT:
        addChar();
        getChar();
        nextToken = INT_LIT;
        int dotFlag=0;
        while (charClass==DIGIT)
        {
            addChar();
            getChar();
        }
        if(charClass==LETTER)
        {
            exceptionHandler(EXC_IMPROPER_ID);
            getChar();
            break;
        }
        if(nextChar=='.')
        {
            nextToken=REAL_LIT;
            addChar();
            getChar();
        }
        while ((charClass == DIGIT ||nextChar=='D')||( nextChar=='E' ||(nextChar=='F' || nextChar=='G')))
        {
            addChar();
            getChar();
        }
        break;
    case UNKNOWN:
        Look(nextChar);
        getChar();
        if(previousChar=='*' && nextChar=='*')
        {
            addChar();
            nextToken=POW_OP;
            getChar();
        }
        break;
    case LOGICAL_EXP:
        addChar();
        getChar();
        while (charClass == LETTER )
        {
            addChar();
            getChar();
        }
        if(nextChar=='.')
        {
            addChar();
            getChar();
            if(strcasecmp(lexeme,".gt.")==0)
                nextToken=GT_OP;
            else if(strcasecmp(lexeme,".lt.")==0)
                nextToken=LT_OP;
            else if(strcasecmp(lexeme,".ge.")==0)
                nextToken=GE_OP;
            else if(strcasecmp(lexeme,".le.")==0)
                nextToken=LE_OP;
            else if(strcasecmp(lexeme,".eq.")==0)
                nextToken=EQ_OP;
            else if(strcasecmp(lexeme,".ne.")==0)
                nextToken=NE_OP;
            else if(strcasecmp(lexeme,".not.")==0)
                nextToken=NOT_OP;
            else if(strcasecmp(lexeme,".and.")==0)
                nextToken=AND_OP;
            else if(strcasecmp(lexeme,".or.")==0)
                nextToken=OR_OP;
            else if(strcasecmp(lexeme,".eqv.")==0)
                nextToken=EQV_OP;
            else if(strcasecmp(lexeme,".neqv.")==0)
                nextToken=NEQV_OP;
            else exceptionHandler(EXC_UNDEFINED_COMP_OP);
        }
        break;
    case EOF:
        nextToken = EOF;
        lexeme[0] = 'E';
        lexeme[1] = 'O';
        lexeme[2] = 'F';
        lexeme[3] = 0;
        break;
    }
    if(lexLen>31)
        exceptionHandler(EXC_LONG_ID);
    if(commentFlag==0&&exception==0)
    {
        if(endFlag==1 && keywordFlag==1)
        {
            printf("Next token is: %d, Next lexeme is %s ",
                   nextToken, lexeme);
            fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
        }
        else if(endFlag==1 && keywordFlag==0)
        {
            strcpy(lexeme,"END");
            nextToken=KEYWORD;
            printf("Next token is: %d, Next lexeme is %s ",
                   nextToken, lexeme);
            fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
            fseek(in_fp,positionFileEnd,SEEK_SET);
        }
        else if(endFlag==0)
        {
            printf("Next token is: %d, Next lexeme is %s ",
                   nextToken, lexeme);
            fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
        }
    }
    if(commentFlag==1)
    {
        while(nextChar!=' '&&nextChar!=EOF)
            nextChar=getc(in_fp);
        if(nextChar==EOF)
        {
            nextToken = EOF;
            lexeme[0] = 'E';
            lexeme[1] = 'O';
            lexeme[2] = 'F';
            lexeme[3] = 0;
            printf("Next token is: %d, Next lexeme is %s ",
                   nextToken, lexeme);
            fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
        }
    }
    return nextToken;
}
void exceptionHandler(int exceptionCode)
{
    errors++;
    printf("ERROR! ");
    exception=1;
    switch(exceptionCode)
    {
    case EXC_STRING_UNCLOSED:
        printf("String not closed in line: %d ",line-1);
        break;
    case EXC_IMPROPER_ID:
        printf("Identiifer started with digit in line: %d ",line-1);
        break;
    case EXC_IMPROPER_REAL_TYPE:
        printf("Real type defined improperly in line: %d ",line-1);
        break;
    case EXC_LONG_ID:
        printf("Identifier has more character than 31 in line: %d ",line-1);
        break;
    case EXC_UNDEFINED_CHAR:
        printf("Undefined character in line: %d ",line-1);
        break;
    case EXC_UNDEFINED_COMP_OP:
        printf("Undefined compresion operator in line: %d ",line-1);
        break;
    }
}

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Chat Now And Get Quote