Data Structures & C Programming: Need help identifying what is wrong with spellc

ID: 3726855 • Letter: D

Question

Data Structures & C Programming: Need help identifying what is wrong with spellchecker program for hashtables.

The code compiles and works correctly when a word is input that is spelled correctly.

The issue is when I try a word that is incorrectly spelled, the program only displays "did you mean ...? (null)" instead of the word with the lowest levenshtein distance.

Code and hashtable.c code is provided below!

/********* Spellchecker.c ***************/

#include "hashMap.h"
#include <assert.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/**
* Allocates a string for the next word in the file and returns it. This string
* is null terminated. Returns NULL after reaching the end of the file.
* @param file
* @return Allocated string or NULL.
*/
char* nextWord(FILE* file)
{
int maxLength = 16;
int length = 0;
char* word = malloc(sizeof(char) * maxLength);
while (1)
{
char c = fgetc(file);
if ((c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
c == ''')
{
if (length + 1 >= maxLength)
{
maxLength *= 2;
word = realloc(word, maxLength);
}
word[length] = c;
length++;
}
else if (length > 0 || c == EOF)
{
break;
}
}
if (length == 0)
{
free(word);
return NULL;
}
word[length] = '';
return word;
}

/**
* Loads the contents of the file into the hash map.
* @param file
* @param map
*/
void loadDictionary(FILE* file, HashMap* map)
{ // FIXME: implement
char* word = nextWord(file);

while (word) {
hashMapPut(map, word, 1);
free(word);
word = nextWord(file);
}

free(word);
}

/*********************
* min2 function
* ********************/
int min2(int a, int b) {
int min;
if (a < b)
min = a;
else
min = b;
return min;
}

/**********************************
* HELPER FUNCTION
* code sourced from https://en.wikipedia.org/wiki/Levenshtein_distance#Computing_Levenshtein_distance
* code has been modified to work with hashtables
* *********************************/
#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c)))

int levenshtein(char *s1, char *s2) {
unsigned int s1len, s2len, x, y, lastdiag, olddiag;
s1len = strlen(s1);
s2len = strlen(s2);
unsigned int column[s1len+1];
for (y = 1; y <= s1len; y++)
column[y] = y;
for (x = 1; x <= s2len; x++) {
column[0] = x;
for (y = 1, lastdiag = x-1; y <= s1len; y++) {
olddiag = column[y];
column[y] = MIN3(column[y] + 1, column[y-1] + 1, lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1));
lastdiag = olddiag;
}
}
return(column[s1len]);
}

/**
* Prints the concordance of the given file and performance information. Uses
* the file input1.txt by default or a file name specified as a command line
* argument.
* @param argc
* @param argv
* @return
*/
int main(int argc, const char** argv)
{ // FIXME: implement
HashMap* map = hashMapNew(1000);
FILE* file = fopen("dictionary.txt", "r");
//if file not found
if(file==NULL)
{
perror("Error");
}
clock_t timer = clock();
loadDictionary(file, map);
timer = clock() - timer;
printf("Dictionary loaded in %f seconds ", (float)timer / (float)CLOCKS_PER_SEC);
fclose(file);

char inputBuffer[256];
int quit = 0;
while (!quit) //Step 4: continue to prompt user for word until they type quit
{
printf("Enter a word or "quit" to quit: ");
scanf("%s", inputBuffer);

// Implement the spell checker code here..

/*****
* Compare input Buffer to words in the dictionary, computing their Levenshtein distance.
* Store that distance as the value for each key in the table
* *********************/
for (int i = 0; i < map->capacity; i++)
{
HashLink* link = map->table[i];
if (link != NULL)
{
while (link != NULL)
{
char* word = link->key;
int minVal = levenshtein(word, inputBuffer);
hashMapPut(map, word, minVal);
link = link->next;
}
}
}

/*****
* Traverse down hash table, checking each bucket.
* Jump out if you find an exact matching dictionary word
* print a message that "word spelled correctly"
* **********************/
if (hashMapContainsKey(map, inputBuffer))
{
printf("That word is spelled correctly! ");
}

/************
* NEED HELP WITH THIS CODE!!!
* If the input Buffer did not match any of the dictionary words exactly,
* generate an array of 5 words that are the closest matches to input Buffer based
* on the LOWEST Levenshtein distance.
* print the array INCLUDING the message "did you mean ...?"
* ********************/
else
{
const char* array[6];
for(int idx = 0; idx < 6; idx++){
int i = 0;
HashLink* link = map->table[i];
HashLink* link2 = map->table[i+1];
while (link != NULL && link2 != NULL) {
int val1 = *(hashMapGet(map, link->key));
int val2 =*(hashMapGet(map, link2->key));
int minim = min2(val1, val2);
if(minim == val1) {
array[idx] = link->key;
}
link = link->next;
link2 = link->next;
}
}

/* Print the new map */
for (int i = 0; i < 6; i++)
{
printf("Did you mean ...? %s ", array[i]);
}
}

if (strcmp(inputBuffer, "quit") == 0)
{
quit = 1;
}
}

hashMapDelete(map);
return 0;
}

/***************** HashMap.c *****************/

#include "hashMap.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <ctype.h>

int hashFunction1(const char* key)
{
int r = 0;
for (int i = 0; key[i] != ''; i++)
{
r += key[i];
}
return r;
}

int hashFunction2(const char* key)
{
int r = 0;
for (int i = 0; key[i] != ''; i++)
{
r += (i + 1) * key[i];
}
return r;
}

/**
* Creates a new hash table link with a copy of the key string.
* @param key Key string to copy in the link.
* @param value Value to set in the link.
* @param next Pointer to set as the link's next.
* @return Hash table link allocated on the heap.
*/
HashLink* hashLinkNew(const char* key, int value, HashLink* next)
{
HashLink* link = malloc(sizeof(HashLink));
link->key = malloc(sizeof(char) * (strlen(key) + 1));
strcpy(link->key, key);
link->value = value;
link->next = next;
return link;
}

/**
* Free the allocated memory for a hash table link created with hashLinkNew.
* @param link
*/
static void hashLinkDelete(HashLink* link)
{
free(link->key);
free(link);
}

/**
* Initializes a hash table map, allocating memory for a link pointer table with
* the given number of buckets.
* @param map
* @param capacity The number of table buckets.
*/
void hashMapInit(HashMap* map, int capacity)
{
map->capacity = capacity;
map->size = 0;
map->table = malloc(sizeof(HashLink*) * capacity);
for (int i = 0; i < capacity; i++)
{
map->table[i] = NULL;
}
}

/**
* Removes all links in the map and frees all allocated memory. You can use
* hashLinkDelete to free the links.
* @param map
*/
void hashMapCleanUp(HashMap* map)
{
// FIXME: implement
for (int i = 0; i < map->capacity; i++) {
HashLink* link = map->table[i];
map->table[i] = NULL;

       while (link != NULL)
       {
           HashLink* temp = link;
           link = link->next;
           hashLinkDelete(temp);
       }

       free(map->table[i]);

   }

free(map->table);
}

/**
* Creates a hash table map, allocating memory for a link pointer table with
* the given number of buckets.
* @param capacity The number of buckets.
* @return The allocated map.
*/
HashMap* hashMapNew(int capacity)
{
HashMap* map = malloc(sizeof(HashMap));
hashMapInit(map, capacity);
return map;
}

/**
* Removes all links in the map and frees all allocated memory, including the
* map itself.
* @param map
*/
void hashMapDelete(HashMap* map)
{
hashMapCleanUp(map);
free(map);
}

/**
* Returns a pointer to the value of the link with the given key. Returns NULL
* if no link with that key is in the table.
*
* Use HASH_FUNCTION(key) and the map's capacity to find the index of the
* correct linked list bucket. Also make sure to search the entire list.
*
* @param map
* @param key
* @return Link value or NULL if no matching link.
*/
int* hashMapGet(HashMap* map, const char* key)
{
// FIXME: implement
int idx = HASH_FUNCTION(key) % (map->capacity);

HashLink* link = map->table[idx];

   while (link != NULL)
{
       if (strcmp(link->key, key) == 0)
{
           return &(link->value);
   }
       link = link->next;
   }
   return NULL;

}

/**
* Resizes the hash table to have a number of buckets equal to the given
* capacity. After allocating the new table, all of the links need to be
* rehashed into it because the capacity has changed.
*
* Remember to free the old table and any old links if you use hashMapPut to
* rehash them.
*
* @param map
* @param capacity The new number of buckets.
*/
void resizeTable(HashMap* map, int capacity)
{ // FIXME: implement
// Save old Map information
int oldCap = map->capacity;
HashLink ** oldTable = map->table;

// Initialize new map data to replace old map data
hashMapInit(map, capacity);

   // Rehash old map key/value pairs to new map
   for (int i = 0; i < oldCap; i++)
{
       HashLink * link = oldTable[i];

       while (link != NULL)
{
           hashMapPut(map, link->key, link->value);
           link = link->next;
       }
   }

   // Free allocated data in old table
   for (int i = 0; i < oldCap; i++)
{
       HashLink* link = oldTable[i];
       oldTable[i] = NULL;

       while (link != NULL)
       {
           HashLink* temp = link;
           link = link->next;
           hashLinkDelete(temp);
       }
       free(oldTable[i]);
   }
   free(oldTable);
}

/**
* Updates the given key-value pair in the hash table. If a link with the given
* key already exists, this will just update the value. Otherwise, it will
* create a new link with the given key and value and add it to the table
* bucket's linked list. You can use hashLinkNew to create the link.
*
* Use HASH_FUNCTION(key) and the map's capacity to find the index of the
* correct linked list bucket. Also make sure to search the entire list.
*
* @param map
* @param key
* @param value
*/
void hashMapPut(HashMap* map, const char* key, int value)
{ // FIXME: implement
// Resize table if exceeds MAX_TABLE_LOAD
   if (hashMapTableLoad(map) >= MAX_TABLE_LOAD)
{
       resizeTable(map, 2 * map->capacity);
   }
   // Compute index
   int idx = HASH_FUNCTION(key) % (map->capacity);
   if (idx < 0)
{
       idx += map->capacity;
   }
   // Add to bucket
   HashLink* link = map->table[idx];
   HashLink* newLink = NULL;
   if (link == NULL) {
       // Bucket is currently empty
       newLink= hashLinkNew(key, value, NULL);
       map->table[idx] = newLink;
       map->size++;
       return;
   }
   else
{ //bucket contains @ least 1 link
       while (link != NULL)
{
           if (strcmp(link->key, key) == 0)
{ //link w/ key already exists
               link->value = value;
               return;
           }
           link = link->next;
       }
       // Link with given key does not already exist, create new Link
       newLink = hashLinkNew(key, value, map->table[idx]);
       map->table[idx] = newLink;
       map->size++;
       return;
   }
}

/**
* Removes and frees the link with the given key from the table. If no such link
* exists, this does nothing. Remember to search the entire linked list at the
* bucket. You can use hashLinkDelete to free the link.
* @param map
* @param key
*/
void hashMapRemove(HashMap* map, const char* key)
{
// FIXME: implement
if (!hashMapContainsKey(map, key)) {
       printf("Not in map ");
       return;
   }

int idx = HASH_FUNCTION(key) % (map->capacity);

HashLink* cur = map->table[idx];
HashLink* last = map->table[idx];

   if (cur == NULL) {
       printf("No links in bucket to remove ");
   }
   while (cur != NULL)
{
       if (strcmp(cur->key, key) == 0)
{ //printf("Found key/link to remove ");
           if (cur == map->table[idx])
{
               //printf("Link to remove is first link ");
               map->table[idx] = cur->next;
               hashLinkDelete(cur);
               map->size--;
               cur = 0;
           }
           else
{
               last->next = cur->next;
               hashLinkDelete(cur);
               map->size--;
               cur = 0;
           }
       }
       else
{
           last = cur;
           cur = cur->next;
       }
   }
}

/**
* Returns 1 if a link with the given key is in the table and 0 otherwise.
*
* Use HASH_FUNCTION(key) and the map's capacity to find the index of the
* correct linked list bucket. Also make sure to search the entire list.
*
* @param map
* @param key
* @return 1 if the key is found, 0 otherwise.
*/
int hashMapContainsKey(HashMap* map, const char* key)
{
// FIXME: implement
int idx = HASH_FUNCTION(key) % (map->capacity);

HashLink* link = map->table[idx];

   while (link != NULL)
{
       if (strcmp(link->key, key) == 0)
{
           return 1;
       }
       link = link->next;
   }
return 0;
}

/**
* Returns the number of links in the table.
* @param map
* @return Number of links in the table.
*/
int hashMapSize(HashMap* map)
{ // FIXME: implement
return map->size;
}

/**
* Returns the number of buckets in the table.
* @param map
* @return Number of buckets in the table.
*/
int hashMapCapacity(HashMap* map)
{ // FIXME: implement
return map->capacity;
}

/**
* Returns the number of table buckets without any links.
* @param map
* @return Number of empty buckets.
*/
int hashMapEmptyBuckets(HashMap* map)
{ // FIXME: implement
int emptyBuckets = 0;

   for (int i = 0; i < map->capacity; i++)
{
       HashLink* link = map->table[i];
       if (link == NULL)
{
               emptyBuckets++;
       }
   }

return emptyBuckets;
}

/**
* Returns the ratio of (number of links) / (number of buckets) in the table.
* Remember that the buckets are linked lists, so this ratio tells you nothing
* about the number of empty buckets. Remember also that the load is a floating
* point number, so don't do integer division.
* @param map
* @return Table load.
*/
float hashMapTableLoad(HashMap* map)
{ // FIXME: implement
float links = (float)map->size;
float buckets = (float)map->capacity;

return (links/buckets);

}

/**
* Prints all the links in each of the buckets in the table.
* @param map
*/
void hashMapPrint(HashMap* map)
{
for (int i = 0; i < map->capacity; i++)
{
HashLink* link = map->table[i];
if (link != NULL)
{
printf(" Bucket %i ->", i);
while (link != NULL)
{
printf(" (%s, %d) ->", link->key, link->value);
link = link->next;
}
}
}
printf(" ");
}

Explanation / Answer

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "dictionary.h"

// Number of linked lists in hash table.
#define HASH_SIZE 2000

// Each element in linked list is a node. Node contains
// a word and a pointer to the next node.
typedef struct node
{
char word[LENGTH + 1];
struct node* next;
}
node;

// Hash table is an array of linked lists.
node* hashtable[HASH_SIZE];

int hash_function(const char* word);

// Tracks number of words in dictionary.
int number_of_words = 0;

/**
* Returns true if word is in dictionary else false.
*/
bool check(const char* word)
{
    int word_length = strlen(word);
    char lower_word[LENGTH+1];

    // Convert word to lowercase to accurately compare to hash table.
    for (int i = 0; i < word_length; i++)
    {
        // If character is uppercase, make it lowercase.
        if(isupper(word[i]))
        {
            lower_word[i] = tolower(word[i]) ;
        }
        // Otherwise it's already lowercase or it's not a letter.
        else
        {
            lower_word[i] = word[i];
        }
    }
    // Add null character to end of char array.
    lower_word[word_length] = '';
    // Use hash function to find correct "bucket" to place word.
    int bucket = hash_function(lower_word);
    // Set cursor to first node in bucket.
    node* cursor = hashtable[bucket];
    // Until the end of the linked list is reached (cursor == NULL),
    // compare each word stored in each node to lower_word. If they're
    // the same, then the word is in the dictionary and is not mispelled.
    // Otherwise, it is spelled incorrectly.
    while (cursor != NULL)
    {
        if (strcmp(lower_word, cursor->word) == 0)
        {
            // If lowercase'd word is the same as another in the bucket,
            // it's a match and return true.
            return true;
        }
        cursor = cursor->next;
    }

    return false;
}

/**
* Loads dictionary into memory. Returns true if successful else false.
*/
bool load(const char* dictionary)
{
    // Initialize each value in hash table to NULL.
    for(int i = 0; i < HASH_SIZE; i++)
    {
        hashtable[i] = NULL;
    }

    // Open the dictionary text file.
    FILE* the_dictionary;
    the_dictionary = fopen(dictionary, "r");

    // Check if dictionary opened correctly.
    if (the_dictionary == NULL)
    {
        printf("Failed to load dictionary");
        return false;
    }

    char buffer[LENGTH+1];
    // Loop through file until end of file is reached.
    // Save each word in buffer.
    while (fscanf(the_dictionary, "%s", buffer) > 0)
    {
        // Allocate memory for a new node.
        node* new_node = malloc(sizeof(node));
        // Set node's next pointer to NULL.
        new_node->next = NULL;
        // Set node's word to value stored in buffer.
        strcpy(new_node->word, buffer);
        // Run word through hash function to set bucket in hash table.
        int bucket = hash_function(new_node->word);
        // If it's the first node being added to the bucket, replace
        // the NULL value with the new node.
        if (hashtable[bucket] == NULL)
        {
            hashtable[bucket] = new_node;
        }
        // Otherwise set new node's pointer to the first node in the linked list.
        // Then set new node to be the first node in the linked list.
        else
        {
            new_node->next = hashtable[bucket];
            hashtable[bucket] = new_node;
        }
        // Track number of words in dictionary.
        number_of_words++;
    }
    // Done with text file. Time to close it.
    fclose(the_dictionary);
    // Everything seems to have gone well, return true.
    return true;
}

/**
* Returns number of words in dictionary if loaded else 0 if not yet loaded.
*/
unsigned int size(void)
{
return number_of_words;
}

/**
* Unloads dictionary from memory. Returns true if successful else false.
*/
bool unload(void)
{
    // Iterate over all linked lists in hash table. Set
    // cursor to point at each one's location in memory.
    for (int i = 0; i < HASH_SIZE;i++)
    {
        node* cursor = hashtable[i];
        while (cursor != NULL)
        {
            // Set temporary pointer to point at cursor's
            // location in memory. Move cursor to the next node
            // so we don't lose track of it before freeing
            // the current node's (temp's) memory.
            node* temp = cursor;
            cursor = cursor->next;
            free(temp);
        }
    }
    return true;
}

// Maps a word to an integer value to place it in the hash table.
// Sum the value of each character in the word, then find the
// remainder after dividing by the size of the hash table.
int hash_function(const char* word)
{
int sum = 0;
int word_length = strlen(word);

    for (int i = 0; i < word_length; i++)
    {
        sum += word[i];
    }

    int bucket = sum % HASH_SIZE;
    return bucket;
}

Navigate

Data Structures & Algorithm - Time Complexity I have written my own answer, can

Data Structures & C++: Write a program to solve the selection problem. Let k=N/2

Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.

Data Structures & C Programming: Need help identifying what is wrong with spellc

Question

Explanation / Answer

Related Questions

Navigate