C++ Lexical Analyzer My program needs to be able to catch any syntax error if it
ID: 3679574 • Letter: C
Question
C++ Lexical Analyzer
My program needs to be able to catch any syntax error if it doesn't follow the grammar rules.
For example, if the syntax is "if (TRUE) A = (x + 4) / 3", your program should output: "ERROR - missing a semicolon".
Another example, if the syntax is "if (TRUE) A (x + 4) / 3;", your program should output: "ERROR - missing an assignment operator".
I just need to add the syntax error to my code to make it have the output stated above.
My code:
#include
#include
#include
using namespace std;
/* Global declarations */
/* Variables */
int charClass;
char lexeme[100];
char nextChar;
int lexLen;
int token;
int nextToken;
ifstream in_fp("syntax.txt");
/* Function declarations */
void getChar();
void addChar();
void getNonBlank();
// Function to get the nextToken
int lex();
/* Character classes */
#define LETTER 0
#define DIGIT 1
#define UNKNOWN 99
/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define ADD_OP 21
#define SUB_OP 22
#define MULT_OP 23
#define DIV_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26
#define EQUAL_OP 27
#define SEMICOLON_OP 26
/******************************************************/
/* Main function */
int main()
{
/* Open the input data file and process its contents */
if (in_fp.fail())
{
cout << "File could not be opened ";
cin.get();
exit(1);
}
else
{
getChar();
do
{
lex(); // Getting the nextToken
} while (nextToken != EOF);
}
// Close the file
in_fp.close();
}
/*****************************************************/
/* lookup - a function to lookup operators and parentheses
and return the token */
int lookup(char ch)
{
switch (ch)
{
case '(':
addChar();
nextToken = LEFT_PAREN;
//cout << " Left paranthesis: ";
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
//cout << " right paranthesis: ";
break;
case '+':
addChar();
nextToken = ADD_OP;
//cout << " add operator: ";
break;
case '-':
addChar();
nextToken = SUB_OP;
//cout << " sub operator: ";
break;
case '*':
addChar();
nextToken = MULT_OP;
//cout << " mult operator: ";
break;
case '/':
addChar();
nextToken = DIV_OP;
//cout << " div operator: ";
break;
case '%':
addChar();
nextToken = DIV_OP;
//cout << " Remainder operator: ";
break;
case '=':
addChar();
nextToken = EQUAL_OP;
//cout << " Equal operator: ";
break;
case ';':
addChar();
nextToken = SEMICOLON_OP;
//cout << " Semicolon operator: ";
break;
default:
addChar();
nextToken = EOF;
//cout << " EOF: ";
break;
}
return nextToken;
}
/*****************************************************/
/* addChar - a function to add nextChar to lexeme */
void addChar()
{
if (lexLen <= 398)
{
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
else
cout << " Error - lexeme is too long ";
}
/*****************************************************/
/* getChar - a function to get the next character of
input and determine its character class */
void getChar()
{
in_fp.get(nextChar);
if (in_fp.eof()) // if no more character in the file
nextChar = EOF;
if (nextChar != EOF)
{
if (isalpha(nextChar))
charClass = LETTER;
else if (isdigit(nextChar))
charClass = DIGIT;
else charClass = UNKNOWN;
}
else
charClass = EOF;
}
/*****************************************************/
/* getNonBlank - a function to call getChar until it
returns a non-whitespace character */
void getNonBlank()
{
while (isspace(nextChar))
getChar();
}
/***************************************************** /
/* lex - a simple lexical analyzer for arithmetic
expressions */
int lex()
{
lexLen = 0;
getNonBlank();
switch (charClass)
{
/* Parse identifiers */
case LETTER:
addChar();
getChar();
while (charClass == LETTER || charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = IDENT;
//cout << " Identifier : ";
break;
/* Parse integer literals */
case DIGIT:
addChar();
getChar();
while (charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = INT_LIT;
//cout << " Number: ";
break;
/* Parentheses and operators */
case UNKNOWN:
lookup(nextChar);
getChar();
break;
/* EOF */
case EOF:
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
break;
} /* End of switch */
cout << "Next token is: " << nextToken << " Next lexeme is " << lexeme << " ";
return nextToken;
} /* End of function lex */
Explanation / Answer
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define KEYWORD_LIST_SIZE 40
char* keywordList[]= {"ASSIGN","BACKSPACE","BLOCK DATA","CALL","CLOSE","COMMON","CONTINUE"};
int charClass;
char lexeme [100];
char nextChar=' ';
int lexLen;
int token;
int nextToken;
FILE *in_fp, *fopen();
FILE *tkn_fp;
char previousChar=' ';
int commentFlag=0;
int line=1;
int exception=0;
int errors=0;
int endFlag=0;
int positionFileEnd;
int keywordFlag=0;
void addChar();
void getChar();
void getNonBlank();
int lex();
void exceptionHandler(int);
/* Character classes */
#define LETTER 0
#define DIGIT 1
#define LOGICAL_EXP 2
#define UNKNOWN 99
/* Token codes */
#define INT_LIT 10
#define IDENT 11
#define KEYWORD 12
#define ASSIGN_OP 20
#define ADD_OP 21
#define SUB_OP 22
#define ASTERISK_OP 23
#define SLASH_OP 24
#define LEFT_PAREN 25
#define RIGHT_PAREN 26
#define STRING_LIT 27
#define COMMA_OP 28
#define GT_OP 30
#define LT_OP 31
#define GE_OP 32
#define LE_OP 33
#define EQ_OP 34
#define NE_OP 35
#define POW_OP 36
#define REAL_LIT 37
#define NOT_OP 38
#define AND_OP 39
#define OR_OP 40
#define EQV_OP 41
#define NEQV_OP 42
#define EXC_STRING_UNCLOSED 1000
#define EXC_UNDEFINED_CHAR 1001
#define EXC_LONG_ID 1002
#define EXC_IMPROPER_ID 1003
#define EXC_IMPROPER_REAL_TYPE 1004
#define EXC_UNDEFINED_COMP_OP 1005
int main()
{
char fileName[50];
printf("Write the name of the file (.for) ");
gets(fileName);
if ((in_fp = fopen(fileName, "r")) == NULL)
printf("ERROR - cannot open %s ",fileName);
else
{
char *x = strrchr(fileName,'.');
strcpy(x,".lex");
tkn_fp=fopen(fileName,"w+");
commentFlag=0;
exception=0;
getChar();
do
{
lex();
commentFlag=0;
exception=0;
}
while (nextToken != EOF);
if(errors>0)
printf("There are %d errors in source code, .lex file not created. ",errors);
}
getchar();
return 0;
}
int Look(char ch)
{
switch (ch)
{
case '(':
addChar();
nextToken = LEFT_PAREN;
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
break;
case '+':
addChar();
nextToken = ADD_OP;
break;
case '-':
addChar();
nextToken = SUB_OP;
break;
case '*':
addChar();
nextToken = ASTERISK_OP;
break;
case '/':
addChar();
nextToken = SLASH_OP;
break;
case ',':
addChar();
nextToken = COMMA_OP;
break;
case '=':
addChar();
nextToken=ASSIGN_OP;
break;
case EOF:
addChar();
nextToken=EOF;
break;
default:
exceptionHandler(EXC_UNDEFINED_CHAR);
break;
}
return nextToken;
}
void addChar()
{
if (lexLen <= 31)
{
lexeme[lexLen++] = nextChar;
lexeme[lexLen] = 0;
}
}
void getChar()
{
if((previousChar==' '&& nextChar=='C')&& commentFlag==0)
commentFlag=1;
previousChar=nextChar;
if ((nextChar = getc(in_fp)) != EOF)
{
if (isalpha(nextChar))
charClass = LETTER;
else if (isdigit(nextChar))
charClass = DIGIT;
else if(nextChar=='.')
charClass=LOGICAL_EXP;
else charClass = UNKNOWN;
}
else
charClass = EOF;
if(nextChar==' ')
line++;
}
void getNonBlank()
{
while (isspace(nextChar))
getChar();
}
int lex()
{
lexLen = 0;
getNonBlank();
switch (charClass)
{
case LETTER:
addChar();
getChar();
while ((charClass == LETTER || charClass == DIGIT)|| nextChar=='_')
{
addChar();
getChar();
}
endFlag=0;
keywordFlag=0;
char afterEndIdent [31];
if(strcasecmp(lexeme,"END")==0)
{
positionFileEnd=ftell(in_fp);
endFlag=1;
getNonBlank();
if(charClass==LETTER)
{
lexeme[lexLen++]=' ';
lexeme[lexLen]=0;
while(charClass==LETTER)
{
addChar();
getChar();
}
}
strcpy(afterEndIdent,lexeme);
}
int i;
for(i=0; i<KEYWORD_LIST_SIZE; i++)
if(strcasecmp(keywordList[i],lexeme)==0)
{
keywordFlag=1;
break;
}
if(keywordFlag==1)
nextToken = KEYWORD;
else
nextToken = IDENT;
break;
case DIGIT:
addChar();
getChar();
nextToken = INT_LIT;
int dotFlag=0;
while (charClass==DIGIT)
{
addChar();
getChar();
}
if(charClass==LETTER)
{
exceptionHandler(EXC_IMPROPER_ID);
getChar();
break;
}
if(nextChar=='.')
{
nextToken=REAL_LIT;
addChar();
getChar();
}
while ((charClass == DIGIT ||nextChar=='D')||( nextChar=='E' ||(nextChar=='F' || nextChar=='G')))
{
addChar();
getChar();
}
break;
case UNKNOWN:
Look(nextChar);
getChar();
if(previousChar=='*' && nextChar=='*')
{
addChar();
nextToken=POW_OP;
getChar();
}
break;
case LOGICAL_EXP:
addChar();
getChar();
while (charClass == LETTER )
{
addChar();
getChar();
}
if(nextChar=='.')
{
addChar();
getChar();
if(strcasecmp(lexeme,".gt.")==0)
nextToken=GT_OP;
else if(strcasecmp(lexeme,".lt.")==0)
nextToken=LT_OP;
else if(strcasecmp(lexeme,".ge.")==0)
nextToken=GE_OP;
else if(strcasecmp(lexeme,".le.")==0)
nextToken=LE_OP;
else if(strcasecmp(lexeme,".eq.")==0)
nextToken=EQ_OP;
else if(strcasecmp(lexeme,".ne.")==0)
nextToken=NE_OP;
else if(strcasecmp(lexeme,".not.")==0)
nextToken=NOT_OP;
else if(strcasecmp(lexeme,".and.")==0)
nextToken=AND_OP;
else if(strcasecmp(lexeme,".or.")==0)
nextToken=OR_OP;
else if(strcasecmp(lexeme,".eqv.")==0)
nextToken=EQV_OP;
else if(strcasecmp(lexeme,".neqv.")==0)
nextToken=NEQV_OP;
else exceptionHandler(EXC_UNDEFINED_COMP_OP);
}
break;
case EOF:
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
break;
}
if(lexLen>31)
exceptionHandler(EXC_LONG_ID);
if(commentFlag==0&&exception==0)
{
if(endFlag==1 && keywordFlag==1)
{
printf("Next token is: %d, Next lexeme is %s ",
nextToken, lexeme);
fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
}
else if(endFlag==1 && keywordFlag==0)
{
strcpy(lexeme,"END");
nextToken=KEYWORD;
printf("Next token is: %d, Next lexeme is %s ",
nextToken, lexeme);
fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
fseek(in_fp,positionFileEnd,SEEK_SET);
}
else if(endFlag==0)
{
printf("Next token is: %d, Next lexeme is %s ",
nextToken, lexeme);
fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
}
}
if(commentFlag==1)
{
while(nextChar!=' '&&nextChar!=EOF)
nextChar=getc(in_fp);
if(nextChar==EOF)
{
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
printf("Next token is: %d, Next lexeme is %s ",
nextToken, lexeme);
fprintf(tkn_fp,"(%d,%s) ",nextToken,lexeme);
}
}
return nextToken;
}
void exceptionHandler(int exceptionCode)
{
errors++;
printf("ERROR! ");
exception=1;
switch(exceptionCode)
{
case EXC_STRING_UNCLOSED:
printf("String not closed in line: %d ",line-1);
break;
case EXC_IMPROPER_ID:
printf("Identiifer started with digit in line: %d ",line-1);
break;
case EXC_IMPROPER_REAL_TYPE:
printf("Real type defined improperly in line: %d ",line-1);
break;
case EXC_LONG_ID:
printf("Identifier has more character than 31 in line: %d ",line-1);
break;
case EXC_UNDEFINED_CHAR:
printf("Undefined character in line: %d ",line-1);
break;
case EXC_UNDEFINED_COMP_OP:
printf("Undefined compresion operator in line: %d ",line-1);
break;
}
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.