C++ Programming Write a spell checker class that stores a set of words, W, in a
ID: 3853276 • Letter: C
Question
C++ Programming
Write a spell checker class that stores a set of words, W, in a hash table and implements a function, spellCheck(s), which performs a Spell Check on the string s with respect to the set of words, W. If s is in W, then the call to spellCheck(s) returns an iterable collection that contains only s, since it is assumed to be spelled correctly in this case. Otherwise, if s is not in W, then the call to spellCheck(s) returns a list of every word in W that could be a correct spelling of s. Your program should be able to handle all the common ways that s might be a misspelling of a word in W, including swapping adjacent characters in a word , inserting a single character inbetween two adjacent characters in a word, deleting a single character from a word, and replacing a character in a word with another character. for an extra challenge, consider phonetic substitutions as well.
Explanation / Answer
SpellChecker.cpp
---------------------------------------------------------------------
#include <iostream>
#include <sstream>
#include <algorithm>
#include "SpellChecker.h"
using namespace std;
// utilities
string swapChars(string& str, int i)
{
char tmp;
tmp = str[i];
str[i] = str[i+1];
str[i+1] = tmp;
return str;
}
SpellChecker::SpellChecker(istream& wordlistfile)
{
// populate the alphabet array
for (int i = 0; i < ALPHABET_LENGTH; i++)
{
m_alphabet[i] = (char)(65 + i);
}
// local variable init
stringstream ss;
int wordlist_size;
string line;
string word;
// get the size of the wordlist and create a hash table of apporpriate size
getline(wordlistfile, line);
ss << line;
ss >> wordlist_size;
m_wordlist = new HashTable(wordlist_size);
// load words into the hashtable
while (getline(wordlistfile, line))
{
ss << line;
ss >> word;
transform(word.begin(), word.end(), word.begin(), ::toupper); // convert to upper case
m_wordlist->insert(word);
}
}
SpellChecker::~SpellChecker()
{
delete m_wordlist;
}
void SpellChecker::swapAdjacent(string misspelling)
{
for (int i = 0; i < misspelling.size() - 1; i++)
{
swapChars(misspelling, i);
if (m_wordlist->find(misspelling))
m_suggestions.push_back(misspelling);
swapChars(misspelling, i); // swap them back to original positions
}
}
void SpellChecker::insertChar(string misspelling)
{
for (int i = 0; i < misspelling.size() + 1; i++)
{
for (int j = 0; j < ALPHABET_LENGTH; j++)
{
misspelling.insert(misspelling.begin()+i, m_alphabet[j]);
if (m_wordlist->find(misspelling))
m_suggestions.push_back(misspelling);
misspelling.erase(i, 1);
}
}
}
void SpellChecker::deleteChar(string misspelling)
{
char orig;
for (int i = 0; i < misspelling.size(); i++)
{
orig = misspelling[i];
misspelling.erase(i, 1);
if (m_wordlist->find(misspelling))
m_suggestions.push_back(misspelling);
misspelling.insert(misspelling.begin()+i, orig);
}
}
void SpellChecker::replaceChar(string misspelling)
{
char orig;
for (int i = 0; i < misspelling.size(); i++)
{
orig = misspelling[i];
for (int j = 0; j < ALPHABET_LENGTH; j++)
{
misspelling[i] = m_alphabet[j];
if (m_wordlist->find(misspelling))
m_suggestions.push_back(misspelling);
}
misspelling[i] = orig;
}
}
void SpellChecker::splitWord(string misspelling)
{
string s1;
string s2;
for (int i = 1; i < misspelling.size(); i++)
{
s1 = misspelling.substr(0, i);
s2 = misspelling.substr(i, misspelling.size());
if (m_wordlist->find(s1) && m_wordlist->find(s2))
m_suggestions.push_back(s1 + " " + s2);
}
}
void SpellChecker::suggest(string misspelling)
{
if (!m_suggestions.empty()) m_suggestions.clear(); // clear suggestions from previous misspellings
swapAdjacent(misspelling);
insertChar(misspelling);
deleteChar(misspelling);
replaceChar(misspelling);
splitWord(misspelling);
}
void SpellChecker::collectSuggestions() // sort and de-dup suggestions
{
// sort suggestions
sort(m_suggestions.begin(), m_suggestions.end());
// remove duplicates
vector<int> dups;
for (vector<string>::size_type i = 0; i != m_suggestions.size() - 1; i++)
{
if (m_suggestions[i] == m_suggestions[i+1]) dups.push_back(i+1);
}
for (vector<int>::size_type i = 0; i != dups.size(); i++)
{
m_suggestions.erase(m_suggestions.begin()+ dups[i] - i);
}
}
void SpellChecker::outputSuggestions(string line, string misspelling, ostream& outf)
{
this->collectSuggestions();
// output suggestions
outf << line << endl;
outf << "word not found: " << misspelling << endl;
outf << "perhaps you meant: " << endl;
for (vector<string>::iterator it = m_suggestions.begin(); it != m_suggestions.end(); ++it)
{
outf << *it << endl;
}
outf << endl;
}
void SpellChecker::spellCheck(istream& inf, ostream& outf)
{
stringstream ss;
string line;
string word;
// spellcheck each word
while (getline(inf, line)) // while we have lines (paragraphs)
{
ss << line;
while (ss >> word) // while we have words in the line
{
transform(word.begin(), word.end(), word.begin(), ::toupper); // convert to upper case
if (!m_wordlist->find(word))
{
this->suggest(word);
this->outputSuggestions(line, word, outf);
}
}
}
}
-------------------------------------------------------------------------------------------------------------------------------------
SpellChecker.h
---------------------------------------------------------------------
#ifndef SPELL_CHECKER_INCLUDED
#define SPELL_CHECKER_INCLUDED
#include <string>
#include "hash.h"
const int ALPHABET_LENGTH = 26;
class SpellChecker
{
public:
SpellChecker(std::istream& wordlistfile);
~SpellChecker();
void spellCheck(std::istream& inf, std::ostream& outf);
private:
char m_alphabet[ALPHABET_LENGTH];
int m_wordlist_size;
HashTable * m_wordlist;
std::vector<std::string> m_suggestions;
void suggest(std::string misspelling); // updates m_suggestions with suggestions for a misspelled word
void collectSuggestions();
void outputSuggestions(std::string line, std::string misspelling, std::ostream& outf);
void swapAdjacent(std::string misspelling);
void insertChar(std::string misspelling);
void deleteChar(std::string misspelling);
void replaceChar(std::string misspelling);
void splitWord(std::string misspelling);
};
#endif // SPELL_CHECKER_INCLUDED
-------------------------------------------------------------------------------------------------------------------
check.cpp
--------------------------------------------------------------------
#include <iostream>
#include <fstream>
#include "SpellChecker.h"
using namespace std;
void spellCheck(istream& inf, istream& wordlistfile, ostream& outf)
{
SpellChecker checker = SpellChecker(wordlistfile);
checker.spellCheck(inf, outf);
}
int main()
{
ifstream wordlist("wordlist.txt");
ifstream input("test_input.txt");
spellCheck(input, wordlist, cout);
}
----------------------------------------------------------------------
hash.cpp
------------------------------------------------
#include "hash.h"
using namespace std;
HashTable::HashTable(int items) : m_nBuckets(items / MAX_LOAD_FACTOR)
{
std::vector<int>::size_type size = m_nBuckets;
m_storage.reserve(size);
for (int i = 0; i < m_nBuckets; i++)
{
m_storage.push_back(Bucket());
}
}
int HashTable::hash(string val)
{
int sum = 0;
for (string::size_type i = 0; i < val.length(); i++)
{
sum += val[i];
}
return sum % m_nBuckets;
}
bool HashTable::insert(string val)
{
int bucket = this->hash(val);
for (int tries = 0; tries < m_nBuckets; tries++)
{
if (!m_storage[bucket].used)
{
m_storage[bucket].val = val;
m_storage[bucket].used = true;
return true; // value inserted successfully
}
bucket = ++bucket % m_nBuckets;
}
return false; // No room in hash table!
}
bool HashTable::find(string val)
{
int bucket = this->hash(val);
for (int tries = 0; tries < m_nBuckets; tries++)
{
if (!m_storage[bucket].used)
return false; // there's nothing here
if (m_storage[bucket].val == val)
return true; // value found
bucket = ++bucket % m_nBuckets;
}
return false; // not found
}
------------------------------------------------------------------------------
hash.h
-----------------------------------------------------
#ifndef HASH_INCLUDED
#define HASH_INCLUDED
#include <string>
#include <vector>
const float MAX_LOAD_FACTOR = 0.5;
struct Bucket
{
std::string val;
bool used;
Bucket() : val(""), used(false) {}
};
// hash table for hashing strings
class HashTable
{
public:
HashTable(int buckets);
bool insert(std::string val);
bool find(std::string val);
private:
int hash(std::string val);
int m_nBuckets;
std::vector<Bucket> m_storage;
};
#endif // HASH_INCLUDED
-----------------------------------------------------------------------------
You can create your own wordlist
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.