Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

lexical analyzer in java. I\'m trying to translate a C code into Java of a lexic

ID: 670423 • Letter: L

Question

lexical analyzer in java.

I'm trying to translate a C code into Java of a lexical analyzer.

The output should look something like this

Here is the code:


*
* Description: A modified version of 'front.c' from Sebesta, 10e
*
*/

/* front.c - a lexical analyzer and syntax analyzer simple arithmetic expressions */

/**
*           This program only parses a single expression terminated by a semicolon
*           Additional expressions/lines will result in an error
*/

# include
# include

/* Global declarations */
/* Variables */
int charClass;
char lexeme [100];
char nextChar;
int lexLen;
int token;
int nextToken;
FILE * in_fp, * fopen();

/* Function declarations */
void addChar();
void getChar();
void getNonBlank();
int lex();
void start();
void expr();
void term();
void factor();
void error();
void help();

/* Character classes */
# define LETTER 0
# define DIGIT 1

/* Token codes */
# define INT_LIT 10
# define IDENT 11
# define ASSIGN_OP 20
# define ADD_OP 21
# define SUB_OP 22
# define MULT_OP 23
# define DIV_OP 24
# define LEFT_PAREN 25
# define RIGHT_PAREN 26
# define SEMICOLON 27
# define UNKNOWN 99

/*****************************************************************************/
/* BEGIN main function */
/*****************************************************************************/
// int main(int argc, char *argv[]) /* These two main headers are equivalent */
int main(int argc, char **argv)
{
  
    // printf("THIS EXECUTABLE FILE IS: %s ", argv[0]);
    // while(argc--)
    //    printf("Now parsing file: %s ", *argv++);
    if(argc==1)
    {
        // printf("There is only %i arguments ", argc);
        printf("ERROR: Need to supply file name to parse ");
        help();
        return 1;
    }
  
    // char file_name[] = argv[1];
    printf("SOURCE FILE: %s ", argv[1]);
  
  
    /* Open the input data file and process its contents */
    // if ((in_fp = fopen(file_name, "r")) == NULL)
    // if ((in_fp = fopen("front.in", "r")) == NULL)
    if ((in_fp = fopen(argv[1], "r")) == NULL)
        printf("ERROR - cannot open file 'front.in' ");
    else {
        getChar();
        lex();
        start();
        if(nextToken == EOF) {
            printf(">> EOF ");
        }
        else {
            printf(" ERROR: Expected EOF ");
            printf("ERROR: instead found next token is: %d, Next lexeme is '%s' ", nextToken, lexeme);
            error();
        }
        // do {
        //     lex();
        // } while (nextToken != EOF);
    }
    return 1;
}
/*****************************************************************************/
/* END main function */
/*****************************************************************************/

/* lookup - a function to lookup operators and parentheses and return the token */
int lookup(char ch) {
    switch (ch) {
        case ';':
            addChar();
            nextToken = SEMICOLON;
            break;
        case '=':
            addChar();
            nextToken = ASSIGN_OP;
            break;
        case '(':
            addChar();
            nextToken = LEFT_PAREN;
            break;
        case ')':
            addChar();
            nextToken = RIGHT_PAREN;
            break;
        case '+':
            addChar();
            nextToken = ADD_OP;
            break;
        case '-':
            addChar();
            nextToken = SUB_OP;
            break;
        case '*':
            addChar();
            nextToken = MULT_OP;
            break;
        case '/':
            addChar();
            nextToken = DIV_OP;
            break;
        default:
            addChar();
            nextToken = EOF;
            break;
    }
    return nextToken;
} /* end lookup(char ch) */


/* addChar - a function to add nextChar to lexeme */
void addChar() {
    if ( lexLen <= 98) {
        lexeme[ lexLen++] = nextChar;
        lexeme[ lexLen] = 0;
    }
    else
        printf("Error - lexeme is too long ");
}


/* getChar - a function to get the next character of input and determine its character class */
void getChar() {
    if ((nextChar = getc(in_fp)) != EOF) {
        if (isalpha(nextChar))
            charClass = LETTER;
        else if (isdigit(nextChar))
            charClass = DIGIT;
        else
            charClass = UNKNOWN;
    }
    else
        charClass = EOF;
}


/* getNonBlank - a function to call getChar until it returns a non-whitespace character */
void getNonBlank()
{
    while (isspace(nextChar))
        getChar();
}


/* lex - a simple lexical analyzer for arithmetic expressions */
int lex()
{
    lexLen = 0;
    getNonBlank();
    switch (charClass) {
            /* Parse identifiers */
        case LETTER:
            addChar();
            getChar();
            while ( charClass == LETTER || charClass == DIGIT)
            {
                addChar();
                getChar();
            }
            nextToken = IDENT;
            break;
            /* Parse integer literals */
        case DIGIT:
            addChar();
            getChar();
            while ( charClass == DIGIT)
            {
                addChar();
                getChar();
            }
            nextToken = INT_LIT;
            break;
            /* Parentheses and operators */
        case UNKNOWN:
            lookup(nextChar);
            getChar();
            break;
            /* EOF */
        case EOF:
            nextToken = EOF;
            lexeme[0] = 'E';
            lexeme[1] = 'O';
            lexeme[2] = 'F';
            lexeme[3] = 0;
            break;
    } /* End of switch */
    printf("Next token is: %d, Next lexeme is '%s' ", nextToken, lexeme);
    return nextToken;
} /* End of function lex */

/**
* The Syntactic Parser function begin here
* Each function is an implementation of a BNF Grammar production rule
*/


/**
*   start
*   -> ;
*/
void start() {
    printf("Enter ");
    /* Parse the first expr */
    expr();
    if(nextToken == SEMICOLON) {
        lex();
        // printf("SEMICOLON ");
    }
    else {
        printf("ERROR: Missing semicolon after ");
        error();
    }
    printf("Exit ");
}


/**
*   expr
*   -> {(+ | -) }
*/
void expr() {
    printf("Enter ");
  
    /* Parse the first term */
    term();
  
    /*   As long as the next token is + or -, get
     the next token and parse the next term */
    while (nextToken == ADD_OP || nextToken == SUB_OP) {
        lex();
        term();
    }
    printf("Exit ");
} /* End of function expr */


/**
* term
* -> {(* | /) )
*/
void term() {
    printf("Enter ");
  
    /* Parse the first factor */
    factor();
  
    /* As long as the next token is * or /, get the
     next token and parse the next factor */
    while (nextToken == MULT_OP || nextToken == DIV_OP) {
        lex();
        factor();
    }
    printf("Exit ");
} /* End of function term */

/**
* factor
* -> id | int_constant | ( */
void factor() {
    printf("Enter ");
    /* Determine which RHS */
  
    if (nextToken == IDENT || nextToken == INT_LIT) {
        /* Get the next token */
        lex();
    }
    /*
     * If the RHS is (), call lex to pass over the
     * left parenthesis, call expr, and check for the right
     * parenthesis
     */
    else {
        if (nextToken == LEFT_PAREN) {
            lex();
            expr();
            if (nextToken == RIGHT_PAREN) {
                lex();
            }
            else {
                error();
                printf("Expecting Right Parenthesis! ");
            }
        } /* End of if (nextToken == LEFT_PAREN */
        /*   It was not an id, an integer literal, or a left
         parenthesis */
        else {
            error();
        } /* End of else */
    } /* End of else */
    printf("Exit ");
} /* End of function factor */

/*****************************************************************************/
/*****************************************************************************/


void help() {
    printf("Compile with: gcc front.c -o parser.exe ");
    printf("USAGE: parser.exe .c ");
}

void error() {
    /*
     * Perhaps create an error function that accepts an int error code
     * and prints an appropriate message
     */
    printf("ERROR: this is a generic error message! ");
}

Explanation / Answer

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
public class Lexer
{
private BufferedReader reader;
private char curr;
private static final char EOF = (char) (-1);
public Lexer(String file)
{
try
{
reader = new BufferedReader(new FileReader(file));
}
catch (Exception e)
{
e.printStackTrace();
}
curr = read();
}
private char read()
{
try
{
return (char) (reader.read());
}
catch (IOException e)
{
e.printStackTrace();
return EOF;
}
}
private boolean isNumeric(char c)
{
if (c >= '0' && c <= '9')
return true;
return false;
}
public boolean isAlpha(char c)
{
if(c>='a' && c<='z' )
return true;
if(c>='A' && c<='Z' )
return true;
return false;
}
public Token nextToken()
{
int state = 1;
int numBuffer = 0;
String alphaBuffer = "";
int decBuffer=0;
boolean skipped = false;
while (true)
{
if (curr == EOF && !skipped)
{
skipped = true;
}
else if (skipped)
{
try
{
reader.close();
}
catch (IOException e)
{
e.printStackTrace();
}
return null;
}
switch (state)
{
case 1:
switch (curr)
{
case ' ':
case ' ':
case '':
case ' ':
case ' ':
case ' ':
curr = read();
continue;
case ';':
curr = read();
return new Token("SM", ";");
case '+':
curr = read();
return new Token("PO", "+");
case '-':
curr = read();
return new Token("MO", "-");
case '*':
curr = read();
return new Token("TO", "*");
case '/':
curr = read();
state = 14;
continue;
//return new Token("DO", "/");
case',':
curr=read();
return new Token("FA",",");
case'(':
curr=read();
return new Token("LP","(");
case')':
curr=read();
return new Token("RP",")");
case'{':
curr=read();
return new Token("LB","{");
case'}':
curr=read();
return new Token("RB","}");
case'%':
curr=read();
return new Token("MD","%");
case'=':
curr=read();
state=8;
continue;
case'!':
curr=read();
state=9;
continue;
case'&':
curr=read();
state=10;
continue;
case'|':
curr=read();
state=11;
continue;
case '"':
curr=read();
state=13;
alphaBuffer="";
continue;
default:
state = 2;
continue;
}
case 2:
if (isNumeric(curr))
{
numBuffer = 0;
numBuffer += (curr - '0');
state = 3;
curr = read();
}
else
{
state=5;
}
continue;
case 3:
if (isNumeric(curr))
{
numBuffer *= 10;
numBuffer += (curr - '0');
curr = read();
}
else if(curr=='.')
{
curr = read();
state=4;
}
else
{
return new Token("NM", "" + numBuffer);
}
continue;
case 4:
if (isNumeric(curr))
{
decBuffer = 0;
decBuffer += (curr - '0');
state=7;     
curr = read();
}
else
{
return new Token("ERROR", "Invalid input: "+numBuffer+"." );
}
continue;
case 7:
if (isNumeric(curr))
{
decBuffer *= 10;
decBuffer += (curr - '0');
curr = read();
}
else
{
return new Token("NM", "" + numBuffer+"."+decBuffer);
}
continue;
case 5:
if(isAlpha(curr)|| curr=='_')
{
alphaBuffer = "";     
alphaBuffer+=curr;
state=6;
curr = read();
}
else
{
alphaBuffer = "";     
alphaBuffer+=curr;
curr=read();
return new Token("ERROR", "Invalid input:"+alphaBuffer);
}
continue;
case 6:
if ((isAlpha(curr) || isNumeric(curr) || curr=='_'))
{
alphaBuffer += curr;
curr = read();
}
else
{
if( alphaBuffer.equals("class")||alphaBuffer.equals("static")||alphaBuffer.equals("else")||alphaBuffer.equals("if")||alphaBuffer.equals("int")||alphaBuffer.equals("float")|alphaBuffer.equals("boolean")||alphaBuffer.equals("String")||alphaBuffer.equals("return")||alphaBuffer.equals("while"))
{
return new Token("KW", "" + alphaBuffer);
}
else if(alphaBuffer.equals("true")||alphaBuffer.equals("false"))
{
return new Token("BL", "" + alphaBuffer);
}
return new Token("ID", "" + alphaBuffer);
}
continue;
case 8:
if(curr=='=')
{
curr=read();
return new Token("EQ","==");
}
else
{
return new Token("AO","=");  
}
case 9:
if(curr=='=')
{
curr=read();
return new Token("NE","!=");
}
else
{
return new Token("ERROR", "Invalid input: !");
}
case 10:
if(curr=='&')
{
curr=read();
return new Token("LA","&&");
}
else
{
return new Token("ERROR", "Invalid input: &");
}
case 11:
if(curr=='|')
{
curr=read();
return new Token("LO","||");
}
else
{
return new Token("ERROR", "Invalid input: |");
}
case 13:
if(curr=='"')
{
curr=read();
return new Token("ST","""+alphaBuffer+""");
}
else if(curr==' ' || curr==EOF)
{
curr=read();
return new Token("ERROR","Invalid string literal");
}
else
{
alphaBuffer += curr;
curr = read();
}
continue;
case 14:
if(curr=='/')
{
state = 15;
curr=read();
}
else if(curr=='*')
{
state = 16;
curr=read();
}
else
{
return new Token("DO", "/");
}
continue;
case 15:
if(curr==' ')
{
state = 1;
}
curr=read();
continue;
case 16:
if(curr=='*')
{
state = 17;
}
curr=read();
continue;
case 17:
if(curr=='/')
{
curr=read();
state = 1;
}
else
{
curr=read();
state=16;
}
continue;
}
}
}
}

Easy way:

Public static void main(String[] args)
{
String s = "(3+4)*5";
System.out.println("s = " + s);
char[] chars = s.toCharArray();
for (int i = 0; i < chars.length ; i++)
{
//System.out.println("s = " + s);
lookup(chars[i]);
}
}
public static int lookup(int i)
{
switch (i)
{
case '(':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 25");
break;
case ')':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 26");
break;
case '+':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 21");
break;
case '-':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 22");
break;
case '*':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 23");
break;
case '/':
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 24");
default:
System.out.println("Next Token = " + "'" + (char)i + "'" + " Next lexeme = 20");
}
return 0;
}