lexical analyzer in java. I\'m trying to translate a C code into Java of a lexic
ID: 670423 • Letter: L
Question
lexical analyzer in java.
I'm trying to translate a C code into Java of a lexical analyzer.
The output should look something like this
Here is the code:
*
* Description: A modified version of 'front.c' from Sebesta, 10e
*
*/
/* front.c - a lexical analyzer and syntax analyzer simple arithmetic expressions */
/**
* This program only parses a single expression terminated by a semicolon
* Additional expressions/lines will result in an error
*/
# include
# include
/* Global declarations */
/* Variables */
int charClass;
char lexeme [100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE * in_fp, * fopen();
/* Function declarations */
void addChar();
void getChar();
void getNonBlank();
int lex();
void start();
void expr();
void term();
void factor();
void error();
void help();
/* Character classes */
# define LETTER 0
# define DIGIT 1
/* Token codes */
# define INT_LIT 10
# define IDENT 11
# define ASSIGN_OP 20
# define ADD_OP 21
# define SUB_OP 22
# define MULT_OP 23
# define DIV_OP 24
# define LEFT_PAREN 25
# define RIGHT_PAREN 26
# define SEMICOLON 27
# define UNKNOWN 99
/*****************************************************************************/
/* BEGIN main function */
/*****************************************************************************/
// int main(int argc, char *argv[]) /* These two main headers are equivalent */
int main(int argc, char **argv)
{
// printf("THIS EXECUTABLE FILE IS: %s ", argv[0]);
// while(argc--)
// printf("Now parsing file: %s ", *argv++);
if(argc==1)
{
// printf("There is only %i arguments ", argc);
printf("ERROR: Need to supply file name to parse ");
help();
return 1;
}
// char file_name[] = argv[1];
printf("SOURCE FILE: %s ", argv[1]);
/* Open the input data file and process its contents */
// if ((in_fp = fopen(file_name, "r")) == NULL)
// if ((in_fp = fopen("front.in", "r")) == NULL)
if ((in_fp = fopen(argv[1], "r")) == NULL)
printf("ERROR - cannot open file 'front.in' ");
else {
getChar();
lex();
start();
if(nextToken == EOF) {
printf(">> EOF ");
}
else {
printf(" ERROR: Expected EOF ");
printf("ERROR: instead found next token is: %d, Next lexeme is '%s' ", nextToken, lexeme);
error();
}
// do {
// lex();
// } while (nextToken != EOF);
}
return 1;
}
/*****************************************************************************/
/* END main function */
/*****************************************************************************/
/* lookup - a function to lookup operators and parentheses and return the token */
int lookup(char ch) {
switch (ch) {
case ';':
addChar();
nextToken = SEMICOLON;
break;
case '=':
addChar();
nextToken = ASSIGN_OP;
break;
case '(':
addChar();
nextToken = LEFT_PAREN;
break;
case ')':
addChar();
nextToken = RIGHT_PAREN;
break;
case '+':
addChar();
nextToken = ADD_OP;
break;
case '-':
addChar();
nextToken = SUB_OP;
break;
case '*':
addChar();
nextToken = MULT_OP;
break;
case '/':
addChar();
nextToken = DIV_OP;
break;
default:
addChar();
nextToken = EOF;
break;
}
return nextToken;
} /* end lookup(char ch) */
/* addChar - a function to add nextChar to lexeme */
void addChar() {
if ( lexLen <= 98) {
lexeme[ lexLen++] = nextChar;
lexeme[ lexLen] = 0;
}
else
printf("Error - lexeme is too long ");
}
/* getChar - a function to get the next character of input and determine its character class */
void getChar() {
if ((nextChar = getc(in_fp)) != EOF) {
if (isalpha(nextChar))
charClass = LETTER;
else if (isdigit(nextChar))
charClass = DIGIT;
else
charClass = UNKNOWN;
}
else
charClass = EOF;
}
/* getNonBlank - a function to call getChar until it returns a non-whitespace character */
void getNonBlank()
{
while (isspace(nextChar))
getChar();
}
/* lex - a simple lexical analyzer for arithmetic expressions */
int lex()
{
lexLen = 0;
getNonBlank();
switch (charClass) {
/* Parse identifiers */
case LETTER:
addChar();
getChar();
while ( charClass == LETTER || charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = IDENT;
break;
/* Parse integer literals */
case DIGIT:
addChar();
getChar();
while ( charClass == DIGIT)
{
addChar();
getChar();
}
nextToken = INT_LIT;
break;
/* Parentheses and operators */
case UNKNOWN:
lookup(nextChar);
getChar();
break;
/* EOF */
case EOF:
nextToken = EOF;
lexeme[0] = 'E';
lexeme[1] = 'O';
lexeme[2] = 'F';
lexeme[3] = 0;
break;
} /* End of switch */
printf("Next token is: %d, Next lexeme is '%s' ", nextToken, lexeme);
return nextToken;
} /* End of function lex */
/**
* The Syntactic Parser function begin here
* Each function is an implementation of a BNF Grammar production rule
*/
/**
* start
* -> ;
*/
void start() {
printf("Enter ");
/* Parse the first expr */
expr();
if(nextToken == SEMICOLON) {
lex();
// printf("SEMICOLON ");
}
else {
printf("ERROR: Missing semicolon after ");
error();
}
printf("Exit ");
}
/**
* expr
* -> {(+ | -) }
*/
void expr() {
printf("Enter ");
/* Parse the first term */
term();
/* As long as the next token is + or -, get
the next token and parse the next term */
while (nextToken == ADD_OP || nextToken == SUB_OP) {
lex();
term();
}
printf("Exit ");
} /* End of function expr */
/**
* term
* -> {(* | /) )
*/
void term() {
printf("Enter ");
/* Parse the first factor */
factor();
/* As long as the next token is * or /, get the
next token and parse the next factor */
while (nextToken == MULT_OP || nextToken == DIV_OP) {
lex();
factor();
}
printf("Exit ");
} /* End of function term */
/**
* factor
* -> id | int_constant | ( */
void factor() {
printf("Enter ");
/* Determine which RHS */
if (nextToken == IDENT || nextToken == INT_LIT) {
/* Get the next token */
lex();
}
/*
* If the RHS is (), call lex to pass over the
* left parenthesis, call expr, and check for the right
* parenthesis
*/
else {
if (nextToken == LEFT_PAREN) {
lex();
expr();
if (nextToken == RIGHT_PAREN) {
lex();
}
else {
error();
printf("Expecting Right Parenthesis! ");
}
} /* End of if (nextToken == LEFT_PAREN */
/* It was not an id, an integer literal, or a left
parenthesis */
else {
error();
} /* End of else */
} /* End of else */
printf("Exit ");
} /* End of function factor */
/*****************************************************************************/
/*****************************************************************************/
void help() {
printf("Compile with: gcc front.c -o parser.exe ");
printf("USAGE: parser.exe .c ");
}
void error() {
/*
* Perhaps create an error function that accepts an int error code
* and prints an appropriate message
*/
printf("ERROR: this is a generic error message! ");
}
Explanation / Answer
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
public class Lexer
{
private BufferedReader reader;
private char curr;
private static final char EOF = (char) (-1);
public Lexer(String file)
{
try
{
reader = new BufferedReader(new FileReader(file));
}
catch (Exception e)
{
e.printStackTrace();
}
curr = read();
}
private char read()
{
try
{
return (char) (reader.read());
}
catch (IOException e)
{
e.printStackTrace();
return EOF;
}
}
private boolean isNumeric(char c)
{
if (c >= '0' && c <= '9')
return true;
return false;
}
public boolean isAlpha(char c)
{
if(c>='a' && c<='z' )
return true;
if(c>='A' && c<='Z' )
return true;
return false;
}
public Token nextToken()
{
int state = 1;
int numBuffer = 0;
String alphaBuffer = "";
int decBuffer=0;
boolean skipped = false;
while (true)
{
if (curr == EOF && !skipped)
{
skipped = true;
}
else if (skipped)
{
try
{
reader.close();
}
catch (IOException e)
{
e.printStackTrace();
}
return null;
}
switch (state)
{
case 1:
switch (curr)
{
case ' ':
case ' ':
case '':
case '':
case ' ':
case ' ':
curr = read();
continue;
case ';':
curr = read();
return new Token("SM", ";");
case '+':
curr = read();
return new Token("PO", "+");
case '-':
curr = read();
return new Token("MO", "-");
case '*':
curr = read();
return new Token("TO", "*");
case '/':
curr = read();
state = 14;
continue;
//return new Token("DO", "/");
case',':
curr=read();
return new Token("FA",",");
case'(':
curr=read();
return new Token("LP","(");
case')':
curr=read();
return new Token("RP",")");
case'{':
curr=read();
return new Token("LB","{");
case'}':
curr=read();
return new Token("RB","}");
case'%':
curr=read();
return new Token("MD","%");
case'=':
curr=read();
state=8;
continue;
case'!':
curr=read();
state=9;
continue;
case'&':
curr=read();
state=10;
continue;
case'|':
curr=read();
state=11;
continue;
case '"':
curr=read();
state=13;
alphaBuffer="";
continue;
default:
state = 2;
continue;
}
case 2:
if (isNumeric(curr))
{
numBuffer = 0;
numBuffer += (curr - '0');
state = 3;
curr = read();
}
else
{
state=5;
}
continue;
case 3:
if (isNumeric(curr))
{
numBuffer *= 10;
numBuffer += (curr - '0');
curr = read();
}
else if(curr=='.')
{
curr = read();
state=4;
}
else
{
return new Token("NM", "" + numBuffer);
}
continue;
case 4:
if (isNumeric(curr))
{
decBuffer = 0;
decBuffer += (curr - '0');
state=7;
curr = read();
}
else
{
return new Token("ERROR", "Invalid input: "+numBuffer+"." );
}
continue;
case 7:
if (isNumeric(curr))
{
decBuffer *= 10;
decBuffer += (curr - '0');
curr = read();
}
else
{
return new Token("NM", "" + numBuffer+"."+decBuffer);
}
continue;
case 5:
if(isAlpha(curr)|| curr=='_')
{
alphaBuffer = "";
alphaBuffer+=curr;
state=6;
curr = read();
}
else
{
alphaBuffer = "";
alphaBuffer+=curr;
curr=read();
return new Token("ERROR", "Invalid input:"+alphaBuffer);
}
continue;
case 6:
if ((isAlpha(curr) || isNumeric(curr) || curr=='_'))
{
alphaBuffer += curr;
curr = read();
}
else
{
if( alphaBuffer.equals("class")||alphaBuffer.equals("static")||alphaBuffer.equals("else")||alphaBuffer.equals("if")||alphaBuffer.equals("int")||alphaBuffer.equals("float")|alphaBuffer.equals("boolean")||alphaBuffer.equals("String")||alphaBuffer.equals("return")||alphaBuffer.equals("while"))
{
return new Token("KW", "" + alphaBuffer);
}
else if(alphaBuffer.equals("true")||alphaBuffer.equals("false"))
{
return new Token("BL", "" + alphaBuffer);
}
return new Token("ID", "" + alphaBuffer);
}
continue;
case 8:
if(curr=='=')
{
curr=read();
return new Token("EQ","==");
}
else
{
return new Token("AO","=");
}
case 9:
if(curr=='=')
{
curr=read();
return new Token("NE","!=");
}
else
{
return new Token("ERROR", "Invalid input: !");
}
case 10:
if(curr=='&')
{
curr=read();
return new Token("LA","&&");
}
else
{
return new Token("ERROR", "Invalid input: &");
}
case 11:
if(curr=='|')
{
curr=read();
return new Token("LO","||");
}
else
{
return new Token("ERROR", "Invalid input: |");
}
case 13:
if(curr=='"')
{
curr=read();
return new Token("ST","""+alphaBuffer+""");
}
else if(curr==' ' || curr==EOF)
{
curr=read();
return new Token("ERROR","Invalid string literal");
}
else
{
alphaBuffer += curr;
curr = read();
}
continue;
case 14:
if(curr=='/')
{
state = 15;
curr=read();
}
else if(curr=='*')
{
state = 16;
curr=read();
}
else
{
return new Token("DO", "/");
}
continue;
case 15:
if(curr==' ')
{
state = 1;
}
curr=read();
continue;
case 16:
if(curr=='*')
{
state = 17;
}
curr=read();
continue;
case 17:
if(curr=='/')
{
curr=read();
state = 1;
}
else
{
curr=read();
state=16;
}
continue;
}
}
}
}
Easy way:
Public static void main(String[] args)
{
String s = "(3+4)*5";
System.out.println("s = " + s);
char[] chars = s.toCharArray();
for (int i = 0; i < chars.length ; i++)
{
//System.out.println("s = " + s);
lookup(chars[i]);
}
}
public static int lookup(int i)
{
switch (i)
{
case '(':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 25");
break;
case ')':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 26");
break;
case '+':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 21");
break;
case '-':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 22");
break;
case '*':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 23");
break;
case '/':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 24");
default:
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 20");
}
return 0;
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.