Spell Checker In this exercise you will design and implement a basic spell check
ID: 3684006 • Letter: S
Question
Spell Checker In this exercise you will design and implement a basic spell checker program. Specifically, you will write a program that opens a text file, processes each word separately and checks its spelling against a dictionary file of words to determine if it is a valid English word. All of your code should be placed in one file, spellChecker.c: the program accepts the file name as a command line argument so that it is executable like this: >./a.out infile Your program should open that input file and spell check each individual word. You may assume that each word is separated by some whitespace, and you may assume that there are no multiline or hyphenated words. Ignore all punctuation characters (,;. ? ? !) when spell checking. Use the dictionary available on the CSE system. The dictionary is contained in a plaintext file,/usr/share/dict/american which contains thousands of english words (one on each line) to spell check each word. Your output should produce a list of misspelled or unrecognized words (words not contained in the dictionary file), and might look like this:Explanation / Answer
#include "hash.h"
#include <stdlib.h>
#include <stdio.h>
#include <omp.h>
#include <time.h>
#include "word_list.h"
int main(int argc, char *argv[])
{
HashFunction hf[] = { RSHash, JSHash, ELFHash, BKDRHash, SDBMHash,
DJBHash, DEKHash, BPHash, FNVHash, APHash,
hash_div_701, hash_div_899, hash_mult_700, hash_mult_900
};
word_list *wl;
char *word;
char *bv;
double start, end, diff;
size_t wl_size;
size_t bv_size;
size_t num_hf;
size_t i, j;
unsigned int hash;
int misspelled;
// Set Number of threads to 4
omp_set_num_threads(4);
//
if (argc != 2) {
printf("Please give word to spell check ");
exit(EXIT_FAILURE);
}
word = argv[1];
/* load the word list */
wl = create_word_list("word_list.txt");
if (!wl) {
fprintf(stderr, "Could not read word list ");
exit(EXIT_FAILURE);
}
wl_size = get_num_words(wl);
start = omp_get_wtime();
/* create the bit vector */
bv_size = 100000000;
num_hf = sizeof(hf) / sizeof(HashFunction);
bv = calloc(bv_size, sizeof(char));
if (!bv) {
destroy_word_list(wl);
exit(EXIT_FAILURE);
}
#pragma omp parallel for private(i,hash)
for (j = 0; j < num_hf; j++) {
for (i = 0; i < wl_size; i++) {
hash = hf[j] (get_word(wl, i));
hash %= bv_size;
bv[hash] = 1;
}
}
/* do the spell checking */
misspelled = 0;
for (j = 0; j < num_hf; j++) {
hash = hf[j] (word);
hash %= bv_size;
if (bv[hash] == 0)
misspelled = 1;
}
end = omp_get_wtime();
diff = end - start;
printf("Spell check time: %f ", diff);
/* tell the user the result */
if (misspelled)
printf("Word %s is misspelled ", word);
else
printf("Word %s is spelled correctly ", word);
free(bv);
destroy_word_list(wl);
return EXIT_SUCCESS;
}
word_list.h
#include <stdlib.h>
#ifndef WORD_LIST_H
#define WORD_LIST_H
typedef struct {
char **words;
size_t num_words;
} word_list;
word_list *create_word_list(const char *path);
const char *get_word(word_list * wl, size_t index);
size_t get_num_words(word_list * wl);
void destroy_word_list(word_list * wl);
#endif
word_list.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "word_list.h"
#define WORDS_GROW_FACTOR 1024
word_list *create_word_list(const char *path)
{
char line[1024];
word_list *wl;
FILE *f;
char **new_words;
size_t len, words_size;
wl = calloc(1, sizeof(word_list));
if (!wl)
return NULL;
wl->words = NULL;
wl->num_words = 0;
f = fopen(path, "r");
if (!f) {
free(wl);
return NULL;
}
words_size = 0;
while (fgets(line, sizeof(line), f)) {
if (words_size == wl->num_words) {
words_size += WORDS_GROW_FACTOR;
new_words = realloc(wl->words,
words_size * sizeof(char *));
if (!new_words) {
destroy_word_list(wl);
wl = NULL;
break;
}
wl->words = new_words;
}
len = strlen(line);
wl->words[wl->num_words] = calloc(len, sizeof(char));
if (!wl->words[wl->num_words]) {
destroy_word_list(wl);
wl = NULL;
break;
}
/* do not copy the newline which fgets puts into line */
memcpy(wl->words[wl->num_words], line, len - 1);
wl->num_words++;
}
fclose(f);
return wl;
}
const char *get_word(word_list * wl, size_t index)
{
if (!wl)
return NULL;
if (index >= wl->num_words)
return NULL;
return wl->words[index];
}
size_t get_num_words(word_list * wl)
{
if (!wl)
return 0;
return wl->num_words;
}
void destroy_word_list(word_list * wl)
{
size_t i;
if (!wl)
return;
if (wl->words) {
for (i = 0; i < wl->num_words; i++)
free(wl->words[i]);
free(wl->words);
}
free(wl);
}
hash.h
#ifndef HASH_H
#define HAS_H
typedef unsigned int (*HashFunction) (const char *str);
unsigned int RSHash(const char *str);
unsigned int JSHash(const char *str);
unsigned int ELFHash(const char *str);
unsigned int BKDRHash(const char *str);
unsigned int SDBMHash(const char *str);
unsigned int DJBHash(const char *str);
unsigned int DEKHash(const char *str);
unsigned int BPHash(const char *str);
unsigned int FNVHash(const char *str);
unsigned int APHash(const char *str);
unsigned int hash_div_701(const char *str);
unsigned int hash_div_899(const char *str);
unsigned int hash_mult_700(const char *str);
unsigned int hash_mult_900(const char *str);
#endif
hash.c
#include "hash.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>
unsigned int RSHash(const char *str)
{
unsigned int b, a, hash;
size_t len, i;
b = 378551;
a = 63689;
hash = 0;
len = strlen(str);
for (i = 0; i < len; i++) {
hash = hash * a + str[i];
a = a * b;
}
return hash;
}
unsigned int JSHash(const char *str)
{
unsigned int hash;
size_t len, i;
hash = 1315423911;
len = strlen(str);
for (i = 0; i < len; i++)
hash ^= ((hash << 5) + str[i] + (hash >> 2));
return hash;
}
unsigned int ELFHash(const char *str)
{
unsigned int hash, x;
size_t len, i;
hash = 0;
x = 0;
len = strlen(str);
for (i = 0; i < len; i++) {
hash = (hash << 4) + str[i];
if ((x = hash & 0xF0000000L) != 0)
hash ^= (x >> 24);
hash &= ~x;
}
return hash;
}
unsigned int BKDRHash(const char *str)
{
unsigned int seed, hash;
size_t len, i;
seed = 131;
hash = 0;
len = strlen(str);
for (i = 0; i < len; i++)
hash = (hash * seed) + str[i];
return hash;
}
unsigned int SDBMHash(const char *str)
{
unsigned int hash;
size_t len, i;
hash = 0;
len = strlen(str);
for (i = 0; i < len; i++)
hash = str[i] + (hash << 6) + (hash << 16) - hash;
return hash;
}
unsigned int DJBHash(const char *str)
{
unsigned int hash;
size_t len, i;
hash = 5381;
len = strlen(str);
for (i = 0; i < len; i++)
hash = ((hash << 5) + hash) + str[i];
return hash;
}
unsigned int DEKHash(const char *str)
{
unsigned int hash;
size_t len, i;
len = strlen(str);
hash = (unsigned int)len;
for (i = 0; i < len; i++)
hash = ((hash << 5) ^ (hash >> 27)) ^ str[i];
return hash;
}
unsigned int BPHash(const char *str)
{
unsigned int hash = 0;
size_t len, i;
hash = 0;
len = strlen(str);
for (i = 0; i < len; i++)
hash = hash << 7 ^ str[i];
return hash;
}
unsigned int FNVHash(const char *str)
{
const unsigned int fnv_prime = 0x811C9DC5;
unsigned int hash;
size_t len, i;
hash = 0;
len = strlen(str);
for (i = 0; i < len; i++) {
hash *= fnv_prime;
hash ^= str[i];
}
return hash;
}
unsigned int APHash(const char *str)
{
unsigned int hash;
size_t len, i;
hash = 0xAAAAAAAA;
len = strlen(str);
for (i = 0; i < len; i++)
hash ^= ((i & 1) == 0) ?
((hash << 7) ^ str[i] * (hash >> 3)) :
(~((hash << 11) + (str[i] ^ (hash >> 5))));
return hash;
}
/* The following functions are from Project 2 of CS314 at Rutgers */
static unsigned int key(const char *str)
{
unsigned int hash;
size_t len, i;
hash = 5381;
len = strlen(str);
for (i = 0; i < len; i++)
hash = 33 * hash + (str[i] - 'a' + 1);
return hash;
}
static unsigned int hash_div(const char *str, unsigned int size)
{
return key(str) % size;
}
static unsigned int hash_mult(const char *str, unsigned int size)
{
static double A = 0.6180339887;
double kA;
unsigned int k, r;
k = key(str);
kA = (double)k *A;
r = (unsigned int)floor((double)size * (kA - floor(kA)));
return r;
}
unsigned int hash_div_701(const char *str)
{
return hash_div(str, 701);
}
unsigned int hash_div_899(const char *str)
{
return hash_div(str, 899);
}
unsigned int hash_mult_700(const char *str)
{
return hash_mult(str, 700);
}
unsigned int hash_mult_900(const char *str)
{
return hash_mult(str, 900);
}
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.