Write in C++ an assembler for a subset of the MIPS instruction set. It should re
ID: 3748257 • Letter: W
Question
Write in C++ an assembler for a subset of the MIPS instruction set. It should read the assembly file from standard input and write the machine code to standard output. It must recognize the directives: .text , .data, .word, .space n. The assembler needs to read the I-Instruction format lw, sw, beq, and bne and identify the labels.
.text
addu $s0,$zero,$zero # s0 = 0
addu $s1,$zero,$zero # s1 = 0
addiu $v0,$zero,5 # v0 = read value
syscall
sw $v0,n($gp) # M[n] = v0
L1: lw $s2,n($gp) # s2 = M[n]
slt $t0,$s1,$s2 # if s1 >= s2 then
beq $t0,$zero,L2 # goto L2
addiu $v0,$zero,5 # v0 = read value
syscall
addu $s0,$s0,$v0 # s0 += v0
addiu $s1,$s1,1 # s1 += 1
j L1 # goto L2
L2: addu $a0,$s0,$zero # a0 = s0
addiu $v0,$zero,1 # print a0
syscall
addiu $v0,$zero,10 # exit
syscall
.data
n: .word 0
Explanation / Answer
tokenizer.h :
#ifndef __PARSE_UTILS_H_
#define __PARSE_UTILS_H_
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <errno.h>
/* parses the token in_str delimited by the characters in delim. it returns
an output string out_str, which contains the remaining elements of
the string after the first token. this can be used to parse all tokens in
a line by recursively calling the parse function.
it also returns the actual delimiting character in delim_char, which can
be used to define numerical ranges. if this is not desired, delim_char may be
set to NULL.
it returns the first token delimited by characters in delim or NULL,
if no such token is found.
this is a thread safe implementation
*/
static inline char *parse_token(char *in_str, char *delim, char **out_str, char *delim_char)
{
int len;
char *ptr, *tptr, *token;
/* Bypass leading whitespace delimiters */
len = strspn(in_str, delim);
ptr = (in_str + len);
/* Get end of token */
tptr = strpbrk(ptr, delim);
if (tptr == NULL) return(NULL);
len = tptr - ptr;
if (delim_char != NULL) *delim_char = *tptr;
/* Create output string */
*out_str = tptr + 1; /* go past the delimiter */
/* Create token */
token = (char *) malloc(len + 1);
if (token == NULL) return(NULL);
memcpy(token, ptr, len);
token[len] = (char) 0;
return(token);
}
#endif
*********************************************************************************************************
hash_table.h :
#ifndef __HASH_TABLE__
#define __HASH_TABLE__
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <semaphore.h>
#include <stdint.h>
#include "hash_function.h"
#define TRUE 1
#define FALSE 0
typedef struct hash_entry_type
{
void *key;
void *data;
uint32_t key_len;
struct hash_entry_type *next;
struct hash_entry_type *prev;
} hash_entry_t;
typedef struct
{
hash_entry_t **row;
hash_entry_t **tail;
#ifdef __USE_HASH_LOCKS__
sem_t *row_lock;
#endif
uint32_t size;
} hash_table_t;
static inline hash_table_t *create_hash_table(uint32_t hash_table_size)
{
uint32_t t;
hash_table_t *hash_table;
hash_table = ( hash_table_t *) malloc(sizeof( hash_table_t));
if (hash_table == NULL) return(NULL);
hash_table->row = ( hash_entry_t **) malloc(sizeof( hash_entry_t *) * (hash_table_size));
if (hash_table->row == NULL) return(NULL);
hash_table->tail = ( hash_entry_t **) malloc(sizeof( hash_entry_t *) * (hash_table_size));
if (hash_table->tail == NULL) return(NULL);
#ifdef __USE_HASH_LOCKS__
hash_table->row_lock = (sem_t *) malloc(sizeof(sem_t) * (hash_table_size + 2));
if (hash_table->row_lock == NULL) return(NULL);
#endif
for (t=0; t<hash_table_size; t++)
{
hash_table->row[t] = NULL;
hash_table->tail[t] = NULL;
#ifdef __USE_HASH_LOCKS__
sem_init(&hash_table->row_lock[t], 0, 1);
#endif
}
hash_table->size = hash_table_size;
return(hash_table);
}
/*
inserts a structure into the hash table.
parameters :
hash_table : Hash table to use
key : key to index the hash table
key_len: length of the hash key in bytes
data : pointer to the data to insert. you should allocate and free the
data pointer within your application
returns:
TRUE if key was inserted into the table
FALSE if key could not be inserted into the table
note: a data element can be inserted more than once in this
hash structure. be careful when you use hash_insert to make sure
that if you insert multiple times, you also delete multiple times.
*/
static inline int32_t hash_insert( hash_table_t *hash_table, void *key, uint32_t key_len, void *data)
{
uint32_t hash_key, hash_table_size;
hash_entry_t *new_entry, *prev_ptr;
hash_table_size = hash_table->size;
hash_key = hash(key, key_len, 7) % hash_table_size;
#ifdef __USE_HASH_LOCKS__
sem_wait(&hash_table->row_lock[hash_key]);
#endif
new_entry = ( hash_entry_t *) malloc(sizeof( hash_entry_t));
if (new_entry == NULL)
{
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[hash_key]);
#endif
return(FALSE);
}
new_entry->key = (char *) malloc(key_len);
if (new_entry->key == NULL)
{
printf("Warning: Unable to allocate memory for hash key. ");
free(new_entry);
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[hash_key]);
#endif
return(FALSE);
}
prev_ptr = hash_table->tail[hash_key];
new_entry->next = NULL;
new_entry->prev = hash_table->tail[hash_key];
if (prev_ptr == NULL)
hash_table->row[hash_key] = new_entry;
else
prev_ptr->next = new_entry;
hash_table->tail[hash_key] = new_entry;
memcpy(new_entry->key, key, key_len);
new_entry->data = data;
new_entry->key_len = key_len;
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[hash_key]);
#endif
return(TRUE);
}
/*
deletes a hash table entry.
parameters :
hash_table : hash table to use
key : key to index the hash table
key_len: length of the key in bytes
returns:
TRUE: if key was successfully deleted
FALSE: if key could not be deleted (key was not found)
*/
static inline int32_t hash_delete( hash_table_t *hash_table, void *key, uint32_t key_len)
{
uint32_t hash_key, hash_table_size;
hash_entry_t *ptr, *prev_ptr;
hash_table_size = hash_table->size;
hash_key = hash(key, key_len, 7) % hash_table_size;
#ifdef __USE_HASH_LOCKS__
sem_wait(&(hash_table->row_lock[hash_key]));
#endif
ptr = hash_table->row[hash_key];
prev_ptr = NULL;
while (ptr != NULL)
{
if (memcmp(ptr->key, key, key_len) == 0)
{
if (prev_ptr == NULL) // First entry
hash_table->row[hash_key] = ptr->next;
else
prev_ptr->next = ptr->next;
if (ptr->next == NULL) hash_table->tail[hash_key] = prev_ptr;
free(ptr->key);
free(ptr);
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[hash_key]);
#endif
return(TRUE);
}
prev_ptr = ptr;
ptr = ptr->next;
}
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[hash_key]);
#endif
return(FALSE);
}
/*
finds the entry corresponding to key in the hash table
parameters:
hash_table : pointer to the hash table to use
key : key to index the hash table.
key_len: length of the key in bytes
returns:
pointer to the data field in the hash table on success
NULL on failure
*/
static inline void *hash_find( hash_table_t *hash_table, void *key, uint32_t key_len)
{
uint32_t hash_key, hash_table_size;
hash_entry_t *ptr;
hash_table_size = hash_table->size;
hash_key = hash(key, key_len, 7) % hash_table_size;
#ifdef __USE_HASH_LOCKS__
sem_wait(&hash_table->row_lock[hash_key]);
#endif
ptr = hash_table->row[hash_key];
while (ptr != NULL)
{
if ((key_len == ptr->key_len) && (memcmp(ptr->key, key, key_len) == 0))
{
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[hash_key]);
#endif
return(ptr->data);
}
ptr = ptr->next;
}
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[hash_key]);
#endif
return(NULL);
}
/*
destroys the hash table and frees all allocated memory
parameters:
hash_table : pointer to the hash table to use
returns : nothing
*/
static inline void destroy_hash_table( hash_table_t *hash_table)
{
uint32_t t, count, max_count=0, tot_count=0, hash_table_size;
hash_entry_t *cur_ptr, *tmp_ptr;
hash_table_size = hash_table->size;
for (t=0; t<hash_table_size; t++)
{
#ifdef __USE_HASH_LOCKS__
sem_wait(&hash_table->row_lock[t]);
#endif
if (hash_table->row[t] != NULL)
{
cur_ptr = hash_table->row[t];
count = 1;
while (cur_ptr != NULL)
{
free(cur_ptr->key);
tmp_ptr = cur_ptr->next;
free(cur_ptr);
cur_ptr = tmp_ptr;
count++;
}
hash_table->row[t] = NULL;
tot_count += count;
if (count > max_count) max_count = count;
}
#ifdef __USE_HASH_LOCKS__
sem_post(&hash_table->row_lock[t]);
#endif
}
printf("Max collision list entries: %u. Total: %u ", max_count, tot_count);
free(hash_table->row);
free(hash_table->tail);
#ifdef __USE_HASH_LOCKS__
free(hash_table->row_lock);
#endif
free(hash_table);
}
#endif
*****************************************************************************************************
hash_function.h :
#ifndef __HASH_FUNCTION_H
#define __HASH_FUNCTION_H
#include <stdio.h>
#include <stddef.h>
#include <stdlib.h>
typedef unsigned long int ub4; /* unsigned 4-byte quantities */
typedef unsigned char ub1;
#define hashsize(n) ((ub4)1<<(n))
#define hashmask(n) (hashsize(n)-1)
static ub4 hash(register ub1 *k, register ub4 length, register ub4 level);
#define mix(a,b,c,d)
{
a += d; d += a; a ^= (a>>7);
b += a; a += b; b ^= (b<<13);
c += b; b += c; c ^= (c>>17);
d += c; c += d; d ^= (d<<9);
a += d; d += a; a ^= (a>>3);
b += a; a += b; b ^= (b<<7);
c += b; b += c; c ^= (c>>15);
d += c; c += d; d ^= (d<<11);
}
static inline ub4 hash(register ub1 *k, register ub4 length, register ub4 level)
{
register ub4 a,b,c,d,len;
/* Set up the internal state */
len = length;
a = b = c = 0x9e3779b9; /* the golden ratio; an arbitrary value */
d = level; /* the previous hash value */
/*---------------------------------------- handle most of the key */
while (len >= 16)
{
a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
d += (k[12]+((ub4)k[13]<<8)+((ub4)k[14]<<16)+((ub4)k[15]<<24));
mix(a,b,c,d);
k += 16; len -= 16;
}
/*------------------------------------- handle the last 15 bytes */
d += length;
switch(len) /* all the case statements fall through */
{
case 15: d+=((ub4)k[14]<<24);
case 14: d+=((ub4)k[13]<<16);
case 13: d+=((ub4)k[12]<<8);
/* the first byte of d is reserved for the length */
case 12: c+=((ub4)k[11]<<24);
case 11: c+=((ub4)k[10]<<16);
case 10: c+=((ub4)k[9]<<8);
case 9 : c+=k[8];
case 8 : b+=((ub4)k[7]<<24);
case 7 : b+=((ub4)k[6]<<16);
case 6 : b+=((ub4)k[5]<<8);
case 5 : b+=k[4];
case 4 : a+=((ub4)k[3]<<24);
case 3 : a+=((ub4)k[2]<<16);
case 2 : a+=((ub4)k[1]<<8);
case 1 : a+=k[0];
/* case 0: nothing left to add */
}
mix(a,b,c,d);
/*-------------------------------------------- report the result */
return d;
}
#endif
***********************************************************************************************************
file_parser.h :
#include "hash_table.h"
#ifndef FILE_PARSER_H_
#define FILE_PARSER_H_
#define MAX_LINE_LENGTH 256
void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, hash_table_t *hash_table, FILE *Out);
int binarySearch(char *instructions[], int low, int high, char *string);
char instruction_type(char *instruction);
char *register_address(char *registerName);
void rtype_instruction(char *instruction, char *rs, char *rt, char *rd, int shamt, FILE *Out);
void itype_instruction(char *instruction, char *rs, char *rt, int immediate, FILE *Out);
void jtype_instruction(char *instruction, int immediate, FILE *Out);
void word_rep(int binary_rep, FILE *Out);
void ascii_rep(char string[], FILE *Out);
void getBin(int num, char *str, int padding);
int getDec(char *bin);
#endif /* FILE_PARSER_H_ */
****************************************************************
file_parser.c :
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <stdint.h>
#include <unistd.h>
#include "file_parser.h"
#include "tokenizer.h"
/*
* The structs below map a character to an integer.
* They are used in order to map a specific instruction/register to its binary format in ASCII
*/
// Struct that stores registers and their respective binary reference
struct {
const char *name;
char *address;
} registerMap[] = {
{ "zero", "00000" },
{ "at", "00001" },
{ "v0", "00010" },
{ "v1", "00011" },
{ "a0", "00100" },
{ "a1", "00101" },
{ "a2", "00110" },
{ "a3", "00111" },
{ "t0", "01000" },
{ "t1", "01001" },
{ "t2", "01010" },
{ "t3", "01011" },
{ "t4", "01100" },
{ "t5", "01101" },
{ "t6", "01110" },
{ "t7", "01111" },
{ "s0", "10000" },
{ "s1", "10001" },
{ "s2", "10010" },
{ "s3", "10011" },
{ "s4", "10100" },
{ "s5", "10101" },
{ "s6", "10110" },
{ "s7", "10111" },
{ "t8", "11000" },
{ "t9", "11001" },
{ NULL, 0 } };
// Struct for R-Type instructions mapping for the 'function' field in the instruction
struct {
const char *name;
char *function;
} rMap[] = {
{ "add", "100000" },
{ "sub", "100001" },
{ "and", "100100" },
{ "or", "100101" },
{ "sll", "000000" },
{ "slt", "101010" },
{ "srl", "000010" },
{ "jr", "001000" },
{ NULL, 0 } };
// Struct for I-Type instructions
struct {
const char *name;
char *address;
} iMap[] = {
{ "lw", "100011" },
{ "sw", "101011" },
{ "andi", "001100" },
{ "ori", "001101" },
{ "lui", "001111" },
{ "beq", "000100" },
{ "slti", "001010" },
{ "addi", "001000" },
{ NULL, 0 } };
// Struct for J-Type instructions
struct {
const char *name;
char *address;
} jMap[] = {
{ "j", "000010" },
{ "jal", "000011" },
{ NULL, 0 } };
void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, hash_table_t *hash_table, FILE *Out) {
char line[MAX_LINE_LENGTH + 1];
char *tok_ptr, *ret, *token = NULL;
int32_t line_num = 1;
int32_t instruction_count = 0x00000000;
int data_reached = 0;
//FILE *fptr;
/*fptr = fopen(src_file, "r");
if (fptr == NULL) {
fprintf(Out, "unable to open file %s. aborting ... ", src_file);
exit(-1);
}*/
while (1) {
if ((ret = fgets(line, MAX_LINE_LENGTH, fptr)) == NULL)
break;
line[MAX_LINE_LENGTH] = 0;
tok_ptr = line;
if (strlen(line) == MAX_LINE_LENGTH) {
fprintf(Out,
"line %d: line is too long. ignoring line ... ", line_num);
line_num++;
continue;
}
/* parse the tokens within a line */
while (1) {
token = parse_token(tok_ptr, " $,", &tok_ptr, NULL);
/* blank line or comment begins here. go to the next line */
if (token == NULL || *token == '#') {
line_num++;
free(token);
break;
}
printf("token: %s ", token);
/*
* If token is "la", increment by 8, otherwise if it exists in instructions[],
* increment by 4.
*/
int x = search(token);
//int x = (binarySearch(instructions, 0, inst_len, token));
if (x >= 0) {
if (strcmp(token, "la") == 0)
instruction_count = instruction_count + 8;
else
instruction_count = instruction_count + 4;
}
// If token is ".data", reset instruction to .data starting address
else if (strcmp(token, ".data") == 0) {
instruction_count = 0x00002000;
data_reached = 1;
}
printf("PC Count: %d ", instruction_count);
// If first pass, then add labels to hash table
if (pass == 1) {
printf("First pass ");
// if token has ':', then it is a label so add it to hash table
if (strstr(token, ":") && data_reached == 0) {
printf("Label ");
// Strip out ':'
//printf("Label: %s at %d with address %d: ", token, line_num, instruction_count);
size_t token_len = strlen(token);
token[token_len - 1] = '';
// Insert variable to hash table
uint32_t *inst_count;
inst_count = (uint32_t *)malloc(sizeof(uint32_t));
*inst_count = instruction_count;
int32_t insert = hash_insert(hash_table, token, strlen(token)+1, inst_count);
if (insert != 1) {
fprintf(Out, "Error inserting into hash table ");
exit(1);
}
}
// If .data has been reached, increment instruction count accordingly
// and store to hash table
else {
char *var_tok = NULL;
char *var_tok_ptr = tok_ptr;
// If variable is .word
if (strstr(tok_ptr, ".word")) {
printf(".word ");
// Variable is array
if (strstr(var_tok_ptr, ":")) {
printf("array ");
// Store the number in var_tok and the occurance in var_tok_ptr
var_tok = parse_token(var_tok_ptr, ":", &var_tok_ptr, NULL);
// Convert char* to int
int freq = atoi(var_tok_ptr);
int num;
sscanf(var_tok, "%*s %d", &num);
// Increment instruction count by freq
instruction_count = instruction_count + (freq * 4);
// Strip out ':' from token
size_t token_len = strlen(token);
token[token_len - 1] = '';
//printf("Key: '%s', len: %zd ", token, strlen(token));
// Insert variable to hash table
uint32_t *inst_count;
inst_count = (uint32_t *)malloc(sizeof(uint32_t));
*inst_count = instruction_count;
int32_t insert = hash_insert(hash_table, token, strlen(token)+1, inst_count);
if (insert == 0) {
fprintf(Out, "Error in hash table insertion ");
exit(1);
}
printf("End array ");
}
// Variable is a single variable
else {
instruction_count = instruction_count + 4;
// Strip out ':' from token
size_t token_len = strlen(token);
token[token_len - 1] = '';
// Insert variable to hash table
uint32_t *inst_count;
inst_count = (uint32_t *)malloc(sizeof(uint32_t));
*inst_count = instruction_count;
int32_t insert = hash_insert(hash_table, token, strlen(token)+1, inst_count);
if (insert == 0) {
fprintf(Out, "Error in hash table insertion ");
exit(1);
}
printf("end singe var ");
}
}
// Variable is a string
else if (strstr(tok_ptr, ".asciiz")) {
// Store the ascii in var_tok
var_tok_ptr+= 8;
var_tok = parse_token(var_tok_ptr, """, &var_tok_ptr, NULL);
// Increment instruction count by string length
size_t str_byte_len = strlen(var_tok);
instruction_count = instruction_count + str_byte_len;
// Strip out ':' from token
size_t token_len = strlen(token);
token[token_len - 1] = '';
// Insert variable to hash table
uint32_t *inst_count;
inst_count = (uint32_t *)malloc(sizeof(uint32_t));
*inst_count = instruction_count;
int32_t insert = hash_insert(hash_table, token, strlen(token)+1, inst_count);
if (insert == 0) {
fprintf(Out, "Error in hash table insertion ");
exit(1);
}
}
}
}
// If second pass, then interpret
else if (pass == 2) {
printf("############ Pass 2 ############## ");
// start interpreting here
// if j loop --> then instruction is: 000010 then immediate is insturction count in 26 bits??
// If in .text section
if (data_reached == 0) {
// Check instruction type
int instruction_supported = search(token);
char inst_type;
// If instruction is supported
if (instruction_supported != -1) {
// token contains the instruction
// tok_ptr points to the rest of the line
// Determine instruction type
inst_type = instruction_type(token);
if (inst_type == 'r') {
// R-Type with $rd, $rs, $rt format
if (strcmp(token, "add") == 0 || strcmp(token, "sub") == 0
|| strcmp(token, "and") == 0
|| strcmp(token, "or") == 0 || strcmp(token, "slt") == 0) {
// Parse the instructio - get rd, rs, rt registers
char *inst_ptr = tok_ptr;
char *reg = NULL;
// Create an array of char* that stores rd, rs, rt respectively
char **reg_store;
reg_store = malloc(3 * sizeof(char*));
if (reg_store == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < 3; i++) {
reg_store[i] = malloc(2 * sizeof(char));
if (reg_store[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
// Keeps a reference to which register has been parsed for storage
int count = 0;
while (1) {
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
if (reg == NULL || *reg == '#') {
break;
}
strcpy(reg_store[count], reg);
count++;
free(reg);
}
// Send reg_store for output
// rd is in position 0, rs is in position 1 and rt is in position 2
rtype_instruction(token, reg_store[1], reg_store[2], reg_store[0], 0, Out);
// Dealloc reg_store
for (int i = 0; i < 3; i++) {
free(reg_store[i]);
}
free(reg_store);
}
// R-Type with $rd, $rs, shamt format
else if (strcmp(token, "sll") == 0 || strcmp(token, "srl") == 0) {
// Parse the instructio - get rd, rs, rt registers
char *inst_ptr = tok_ptr;
char *reg = NULL;
// Create an array of char* that stores rd, rs and shamt
char **reg_store;
reg_store = malloc(3 * sizeof(char*));
if (reg_store == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < 3; i++) {
reg_store[i] = malloc(2 * sizeof(char));
if (reg_store[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
// Keeps a reference to which register has been parsed for storage
int count = 0;
while (1) {
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
if (reg == NULL || *reg == '#') {
break;
}
strcpy(reg_store[count], reg);
count++;
free(reg);
}
// Send reg_store for output
// rd is in position 0, rs is in position 1 and shamt is in position 2
rtype_instruction(token, "00000", reg_store[1], reg_store[0], atoi(reg_store[2]), Out);
// Dealloc reg_store
for (int i = 0; i < 3; i++) {
free(reg_store[i]);
}
free(reg_store);
}
else if (strcmp(token, "jr") == 0) {
// Parse the instruction - rs is in tok_ptr
char *inst_ptr = tok_ptr;
char *reg = NULL;
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
rtype_instruction(token, reg, "00000", "00000", 0, Out);
}
}
// I-Type
else if (inst_type == 'i') {
// la is pseudo instruction for lui and ori
// Convert to lui and ori and pass those instructions
if (strcmp(token, "la") == 0) {
// Parse the instruction - get register & immediate
char *inst_ptr = tok_ptr;
char *reg = NULL;
// Create an array of char* that stores rd, rs and shamt
char **reg_store;
reg_store = malloc(2 * sizeof(char*));
if (reg_store == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < 2; i++) {
reg_store[i] = malloc(2 * sizeof(char));
if (reg_store[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
// Keeps a reference to which register has been parsed for storage
int count = 0;
while (1) {
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
if (reg == NULL || *reg == '#') {
break;
}
strcpy(reg_store[count], reg);
count++;
free(reg);
}
// Interpret la instruction.
// The register is at reg_store[0] and the variable is at reg_store[1]
// Find address of label in hash table
int *address = hash_find(hash_table, reg_store[1], strlen(reg_store[1])+1);
// Convert address to binary in char*
char addressBinary[33];
getBin(*address, addressBinary, 32);
// Get upper and lower bits of address
char upperBits[16];
char lowerBits[16];
for (int i = 0; i < 32; i++) {
if (i < 16)
lowerBits[i] = addressBinary[i];
else
upperBits[i-16] = addressBinary[i];
}
// Call the lui instruction with: lui $reg, upperBits
// Convert upperBits binary to int
int immediate = getDec(upperBits);
itype_instruction("lui", "00000", reg_store[0], immediate, Out);
// Call the ori instruction with: ori $reg, $reg, lowerBits
// Convert lowerBits binary to int
immediate = getDec(lowerBits);
itype_instruction("ori", reg_store[0], reg_store[0], immediate, Out);
// Dealloc reg_store
for (int i = 0; i < 2; i++) {
free(reg_store[i]);
}
free(reg_store);
}
// I-Type $rt, i($rs)
else if (strcmp(token, "lw") == 0 || strcmp(token, "sw") == 0) {
// Parse the instructio - rt, immediate and rs
char *inst_ptr = tok_ptr;
char *reg = NULL;
//
// Create an array of char* that stores rd, rs, rt respectively
char **reg_store;
reg_store = malloc(3 * sizeof(char*));
if (reg_store == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < 3; i++) {
reg_store[i] = malloc(2 * sizeof(char));
if (reg_store[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
// Keeps a reference to which register has been parsed for storage
int count = 0;
while (1) {
reg = parse_token(inst_ptr, " $, ()", &inst_ptr, NULL);
if (reg == NULL || *reg == '#') {
break;
}
strcpy(reg_store[count], reg);
count++;
free(reg);
}
// rt in position 0, immediate in position 1 and rs in position2
int immediate = atoi(reg_store[1]);
itype_instruction(token, reg_store[2], reg_store[0], immediate, Out);
// Dealloc reg_store
for (int i = 0; i < 3; i++) {
free(reg_store[i]);
}
free(reg_store);
}
// I-Type rt, rs, im
else if (strcmp(token, "andi") == 0 || strcmp( token, "ori") == 0
|| strcmp(token, "slti") == 0 || strcmp(token, "addi") == 0) {
// Parse the instruction - rt, rs, immediate
char *inst_ptr = tok_ptr;
char *reg = NULL;
// Create an array of char* that stores rt, rs
char **reg_store;
reg_store = malloc(3 * sizeof(char*));
if (reg_store == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < 3; i++) {
reg_store[i] = malloc(2 * sizeof(char));
if (reg_store[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
// Keeps a reference to which register has been parsed for storage
int count = 0;
while (1) {
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
if (reg == NULL || *reg == '#') {
break;
}
strcpy(reg_store[count], reg);
count++;
free(reg);
}
// rt in position 0, rs in position 1 and immediate in position 2
int immediate = atoi(reg_store[2]);
itype_instruction(token, reg_store[1], reg_store[0], immediate, Out);
// Dealloc reg_store
for (int i = 0; i < 3; i++) {
free(reg_store[i]);
}
free(reg_store);
}
// I-Type $rt, immediate
else if (strcmp(token, "lui") == 0) {
// Parse the insturction, rt - immediate
char *inst_ptr = tok_ptr;
char *reg = NULL;
// Create an array of char* that stores rs, rt
char **reg_store;
reg_store = malloc(2 * sizeof(char*));
if (reg_store == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < 2; i++) {
reg_store[i] = malloc(2 * sizeof(char));
if (reg_store[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
// Keeps a reference to which register has been parsed for storage
int count = 0;
while (1) {
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
if (reg == NULL || *reg == '#') {
break;
}
strcpy(reg_store[count], reg);
count++;
free(reg);
}
// rt in position 0, immediate in position 1
int immediate = atoi(reg_store[1]);
itype_instruction(token, "00000", reg_store[0], immediate, Out);
// Dealloc reg_store
for (int i = 0; i < 3; i++) {
free(reg_store[i]);
}
free(reg_store);
}
// I-Type $rs, $rt, label
else if (strcmp(token, "beq") == 0) {
// Parse the instruction - rs, rt
char *inst_ptr = tok_ptr;
char *reg = NULL;
// Create an array of char* that stores rs, rt
char **reg_store;
reg_store = malloc(2 * sizeof(char*));
if (reg_store == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < 2; i++) {
reg_store[i] = malloc(2 * sizeof(char));
if (reg_store[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
// Keeps a reference to which register has been parsed for storage
int count = 0;
while (1) {
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
if (reg == NULL || *reg == '#') {
break;
}
strcpy(reg_store[count], reg);
count++;
free(reg);
if (count == 2)
break;
}
reg = parse_token(inst_ptr, " $, ", &inst_ptr, NULL);
// Find hash address for a register and put in an immediate
int *address = hash_find(hash_table, reg, strlen(reg)+1);
int immediate = *address + instruction_count;
// Send instruction to itype function
itype_instruction(token, reg_store[0], reg_store[1], immediate, Out);
// Dealloc reg_store
for (int i = 0; i < 2; i++) {
free(reg_store[i]);
}
free(reg_store);
}
}
// J-Type
else if (inst_type == 'j') {
// Parse the instruction - get label
char *inst_ptr = tok_ptr;
// If comment, extract the label alone
char *comment = strchr(inst_ptr, '#');
if (comment != NULL) {
int str_len_count = 0;
for (int i = 0; i < strlen(inst_ptr); i++) {
if (inst_ptr[i] != ' ')
str_len_count++;
else
break;
}
char new_label[str_len_count+1];
for (int i = 0; i < str_len_count; i++)
new_label[i] = inst_ptr[i];
new_label[str_len_count] = '';
strcpy(inst_ptr, new_label);
}
else { printf("NO COMMENT ");
inst_ptr[strlen(inst_ptr)-1] = '';
}
// Find hash address for a label and put in an immediate
int *address = hash_find(hash_table, inst_ptr, strlen(inst_ptr)+1);
// Send to jtype function
jtype_instruction(token, *address, Out);
}
}
if (strcmp(token, "nop") == 0) {
fprintf(Out, "00000000000000000000000000000000 ");
}
}
// If .data part reached
else {
char *var_tok = NULL;
char *var_tok_ptr = tok_ptr;
// If variable is .word
if (strstr(tok_ptr, ".word")) {
int var_value;
// Variable is array
if (strstr(var_tok_ptr, ":")) {
// Store the number in var_tok and the occurance in var_tok_ptr
var_tok = parse_token(var_tok_ptr, ":", &var_tok_ptr, NULL);
// Extract array size, or variable frequency
int freq = atoi(var_tok_ptr);
// Extract variable value
sscanf(var_tok, "%*s %d", &var_value);
// Value var_value is repeated freq times. Send to binary rep function
for (int i = 0; i < freq; i++) {
word_rep(var_value, Out);
}
}
// Variable is a single variable
else {
// Extract variable value
sscanf(var_tok_ptr, "%*s, %d", &var_value);
// Variable is in var_value. Send to binary rep function
word_rep(var_value, Out);
}
}
// Variable is a string
else if (strstr(tok_ptr, ".asciiz")) {
printf("tok_ptr '%s' ", tok_ptr);
if (strncmp(".asciiz ", var_tok_ptr, 8) == 0) {
// Move var_tok_ptr to beginning of string
var_tok_ptr = var_tok_ptr + 9;
// Strip out quotation at the end
// Place string in var_tok
var_tok = parse_token(var_tok_ptr, """, &var_tok_ptr, NULL);
ascii_rep(var_tok, Out);
}
}
}
}
free(token);
}
}
}
// Binary Search the Array
int binarySearch(char *instructions[], int low, int high, char *string) {
int mid = low + (high - low) / 2;
int comp = strcmp(instructions[mid], string);
if (comp == 0)
return mid;
// Not found
if (high <= low)
return -1;
// If instructions[mid] is less than string
else if (comp > 0)
return binarySearch(instructions, low, mid - 1, string);
// If instructions[mid] is larger than string
else if (comp < 0)
return binarySearch(instructions, mid + 1, high, string);
// Return position
else
return mid;
// Error
return -2;
}
// Determine Instruction Type
char instruction_type(char *instruction) {
if (strcmp(instruction, "add") == 0 || strcmp(instruction, "sub") == 0
|| strcmp(instruction, "and") == 0 || strcmp(instruction, "or")
== 0 || strcmp(instruction, "sll") == 0 || strcmp(instruction,
"slt") == 0 || strcmp(instruction, "srl") == 0 || strcmp(
instruction, "jr") == 0) {
return 'r';
}
else if (strcmp(instruction, "lw") == 0 || strcmp(instruction, "sw") == 0
|| strcmp(instruction, "andi") == 0 || strcmp(instruction, "ori")
== 0 || strcmp(instruction, "lui") == 0 || strcmp(instruction,
"beq") == 0 || strcmp(instruction, "slti") == 0 || strcmp(
instruction, "addi") == 0 || strcmp(instruction, "la") == 0) {
return 'i';
}
else if (strcmp(instruction, "j") == 0 || strcmp(instruction, "jal") == 0) {
return 'j';
}
// Failsafe return statement
return 0;
}
// Return the binary representation of the register
char *register_address(char *registerName) {
size_t i;
for (i = 0; registerMap[i].name != NULL; i++) {
if (strcmp(registerName, registerMap[i].name) == 0) {
return registerMap[i].address;
}
}
return NULL;
}
// Write out the R-Type instruction
void rtype_instruction(char *instruction, char *rs, char *rt, char *rd, int shamt, FILE *Out) {
// Set the instruction bits
char *opcode = "000000";
char *rdBin = "00000";
if (strcmp(rd, "00000") != 0)
rdBin = register_address(rd);
char *rsBin = "00000";
if (strcmp(rs, "00000") != 0)
rsBin = register_address(rs);
char *rtBin = "00000";
if (strcmp(rt, "00000") != 0)
rtBin = register_address(rt);
char *func = NULL;
char shamtBin[6];
// Convert shamt to binary and put in shamtBin as a char*
getBin(shamt, shamtBin, 5);
size_t i;
for (i = 0; rMap[i].name != NULL; i++) {
if (strcmp(instruction, rMap[i].name) == 0) {
func = rMap[i].function;
}
}
// Print out the instruction to the file
fprintf(Out, "%s%s%s%s%s%s ", opcode, rsBin, rtBin, rdBin, shamtBin, func);
}
// Write out the I-Type instruction
void itype_instruction(char *instruction, char *rs, char *rt, int immediateNum, FILE *Out) {
// Set the instruction bits
char *rsBin = "00000";
if (strcmp(rs, "00000") != 0)
rsBin = register_address(rs);
char *rtBin = "00000";
if (strcmp(rt, "00000") != 0)
rtBin = register_address(rt);
char *opcode = NULL;
char immediate[17];
size_t i;
for (i = 0; iMap[i].name != NULL; i++) {
if (strcmp(instruction, iMap[i].name) == 0) {
opcode = iMap[i].address;
}
}
// Convert immediate to binary
getBin(immediateNum, immediate, 16);
// Print out the instruction to the file
fprintf(Out, "%s%s%s%s ", opcode, rsBin, rtBin, immediate);
}
// Write out the J-Type instruction
void jtype_instruction(char *instruction, int immediate, FILE *Out) {
// Set the instruction bits
char *opcode = NULL;
// Get opcode bits
size_t i;
for (i = 0; jMap[i].name != NULL; i++) {
if (strcmp(instruction, jMap[i].name) == 0) {
opcode = jMap[i].address;
}
}
// Convert immediate to binary
char immediateStr[27];
getBin(immediate, immediateStr, 26);
// Print out instruction to file
fprintf(Out, "%s%s ", opcode, immediateStr);
}
// Write out the variable in binary
void word_rep(int binary_rep, FILE *Out) {
for (int k = 31; k >= 0; k--) {
fprintf(Out, "%c", (binary_rep & (1 << k)) ? '1' : '0');
}
fprintf(Out, " ");
}
// Write out the ascii string
void ascii_rep(char string[], FILE *Out) {
// Separate the string, and put each four characters in an element of an array of strings
size_t str_length = strlen(string);
str_length++;
int num_strs = str_length / 4;
if ((str_length % 4) > 0)
num_strs++;
char *ptr;
ptr = &string[0];
// Create an array of strings which separates each 4-char string
char **sep_str;
sep_str = malloc(num_strs * sizeof(char*));
if (sep_str == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
for (int i = 0; i < num_strs; i++) {
sep_str[i] = malloc(4 * sizeof(char));
if (sep_str[i] == NULL) {
fprintf(Out, "Out of memory ");
exit(1);
}
}
int count = 0;
for (int i = 0; i < str_length; i++) {
sep_str[i / 4][i % 4] = *ptr;
ptr++;
count++;
}
// Reverse each element in the array
char temp;
for (int i = 0; i < num_strs; i++) {
for (int j = 0, k = 3; j < k; j++, k--) {
temp = sep_str[i][j];
sep_str[i][j] = sep_str[i][k];
sep_str[i][k] = temp;
}
}
// Convert into binary
for (int i = 0; i < num_strs; i++) {
for (int j = 0; j < 4; j++) {
char c = sep_str[i][j];
for (int k = 7; k >= 0; k--) {
fprintf(Out, "%c", (c & (1 << k)) ? '1' : '0');
}
}
fprintf(Out, " ");
}
// Deallocate sep_str
for (int i = 0; i < num_strs; i++) {
free(sep_str[i]);
}
free(sep_str);
sep_str = NULL;
}
void getBin(int num, char *str, int padding) {
*(str + padding) = '';
long pos;
if (padding == 5)
pos = 0x10;
else if (padding == 16)
pos = 0x8000;
else if (padding == 26)
pos = 0x2000000;
else if (padding == 32)
pos = 0x80000000;
long mask = pos << 1;
while (mask >>= 1)
*str++ = !!(mask & num) + '0';
}
// Convert a binary string to a decimal value
int getDec(char *bin) {
int b, k, m, n;
int len, sum = 0;
// Length - 1 to accomodate for null terminator
len = strlen(bin) - 1;
// Iterate the string
for(k = 0; k <= len; k++) {
// Convert char to numeric value
n = (bin[k] - '0');
// Check the character is binary
if ((n > 1) || (n < 0)) {
return 0;
}
for(b = 1, m = len; m > k; m--)
b *= 2;
// sum it up
sum = sum + n * b;
}
return sum;
}
*****************************************************************************************************************
assembler.c :
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "file_parser.h"
#include "hash_table.h"
int search(char *instruction);
// Array that holds the supported instructions
char *instructions[] = {
"la", // 0
"lui", // 1
"lw", // 2
"sw", // 3
"add", // 4
"sub", // 5
"addi", // 6
"or", // 7
"and", // 8
"ori", // 9
"andi", // 10
"slt", // 11
"slti", // 12
"sll", // 13
"srl", // 14
"beq", // 15
"j", // 16
"jr", // 17
"jal" // 18
};
// Size of array
size_t inst_len = sizeof(instructions)/sizeof(char *);
int search(char *instruction) {
int found = 0;
for (int i = 0; i < inst_len; i++) {
if (strcmp(instruction, instructions[i]) == 0) {
found = 1;
return i;
}
}
if (found == 0)
return -1;
}
// Quick Sort String Comparison Function
int string_comp(const void *a, const void *b) {
return strcmp(*(char **)a, *(char **)b);
}
int main (int argc, char *argv[]) {
// Make sure correct number of arguments input
if (argc != 3) {
printf("Incorrect number of arguments");
}
else {
// Open I/O files
// Check that files opened properly
FILE *In;
In = fopen(argv[1], "r");
if (In == NULL) {
printf("Input file could not be opened.");
exit(1);
}
FILE *Out;
Out = fopen(argv[2], "w");
if (Out == NULL) {
printf("Output file could not opened.");
exit(1);
}
// Sort the array using qsort for faster search
qsort(instructions, inst_len, sizeof(char *), string_comp);
// Create a hash table of size 127
hash_table_t *hash_table = create_hash_table(127);
// Parse in passes
int passNumber = 1;
parse_file(In, passNumber, instructions, inst_len, hash_table, Out);
// Rewind input file & start pass 2
rewind(In);
passNumber = 2;
parse_file(In, passNumber, instructions, inst_len, hash_table, Out);
// Close files
fclose(In);
fclose(Out);
return 0;
}
}
*************************************************************
*************************************************************
After compiling, run:
$ ./assembler add.asm add.txt
*************************************************************
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.