Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Program Specifications The following are requirements for your program: • Read i

ID: 3888276 • Letter: P

Question

Program Specifications
The following are requirements for your program:
• Read in the name of the file to process from the second command-line
argument.
• Read in the number of most common words to process from the first
command-line argument.

• Write a function named getStopwords that takes the name of the ignore-
words file and a reference to a vector as parameters (returns void). Read in

my queastion below

Store the unique words found in the file that are not in the stopword list in a
dynamically allocated array.
o Call a function to check if the word is a stopword first, and if it is, then
ignore that word.
o Use an array of structs to store each unique word (variable name
word) and a count (variable name count) of how many times it
appears in the text file.
o Use the array-doubling algorithm to increase the size of your
array

§ We don’t know ahead of time how many unique words the
input file will have, so you don’t know how big the array should
be. Start with an array size of 100 (use the constant declared in
the starter code), and double the size as words are read in from
the file and the array fills up with new words.
• Use dynamic memory allocation to create your array
• Copy the values from the current array into the new
array, and then
• Free the memory used for the current array.
(Index of any given word in the array after resizing must
match index in array before resizing.)

• Output the top n most frequent words
Write a function named printTopN that takes a reference to the array of
structs and the value of n to determine the top n words in the array.
Generate an array of the n top items sorted from most frequent to least
frequent and print these out from most to least.
Array MUST be sorted before calling printTopN.
• Output the number of times you had to double the array.
• Output the number of unique non-stop words.
• Output the total number of non-stop words.

Explanation / Answer


#include <iostream>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <vector>
#include <string>
#include <math.h>

using namespace std;

// struct to store word + count combination
struct wordItem // structure for the words
{
    string word;
    int count;
};

void getStopWords(char *ignoreWordFileName, vector<string>& _vecIgnoreWords);
void printTopN(wordItem *&wordItemList, int topN);
bool isCommonWord(string word, vector<string>& _vecIgnore);
const int STOPWORD_LIST_SIZE = 50;

int main(int argc, char *argv[])
{
    int size = 100; //Initialize size
    wordItem *library = new wordItem[size]; //use later for doubling
    vector<string> vecIgnoreWords(STOPWORD_LIST_SIZE);

    int t = atoi(argv[1]); //This changes the command line argument to an integer
    getStopWords(argv[3], vecIgnoreWords);

    string element;
    int totalWords = 0; // total words in document
    int totalUniqueWords = 0; //total unique words
    int j = 0;
    int aindex = 0;
    int dbscnt = 0;
    ifstream mainfile(argv[2]);
    while(mainfile >> element)
    {
        totalWords = totalWords + 1;
        if(mainfile.is_open())
        {
            bool iscommon = isCommonWord(element, vecIgnoreWords);

             if(iscommon == false) // if the word is already in library the counter goes up
             {

                totalUniqueWords++;
                for(int i = 0; i <= aindex; i++)
                {
                    if(element ==library[i].word)
                    {
                        library[i].count++;
                        j = 1; // resets while loop
                    }
                }
                if(j == 0) // adds word if not in library
                {
                    library[aindex].word = element;
                    library[aindex].count = 1;
                    aindex++;
                }

                for(int k = 0; k<size; k++) // ensures the the library is in descending order
                {
                    if(library[k+1].count > library[k].count)
                    {
                        int temp = 0;
                        string term;
                        temp = library[k].count;
                        library[k].count = library[k+1].count;
                        library[k+1].count = temp;
                        term = library[k].word;
                        library[k].word = library[k+1].word;
                        library[k+1].word = term;

                    }
                }

                if(size <= aindex) // here we double
                {
                    dbscnt++;
                    wordItem *newArray = new wordItem[2*size];

                    for(int i = 0; i<size; i++)
                    {
                        newArray[i].count = library[i].count;
                        newArray[i].word = library[i].word;
                    }
                        size*=2;
                        delete[] library;
                        library = newArray;
                }

             }
        }
        j = 0;

    }

printTopN(library, t);
cout << "#" << endl;
cout << "Array double: " << dbscnt <<endl;
cout << "#" << endl;
cout << "Unique non-common words: " << aindex << endl;
cout << "#" << endl;
cout << "Total non-common words: " << totalUniqueWords << endl;

mainfile.close();
delete[] library;

}


/*
Function name: getStopWords
Purpose: read stop word list from file and store into vector
@param ignoreWordFile - filename (the file storing ignore words)
@param _vecIgnoreWords - store ignore from the file (pass by reference)
@return - none
- Note: The number of words is fixed to 50
*/

void getStopWords(char *ignoreWordFileName, vector<string>& _vecIgnoreWords) // Neil nguyen and I worked on this part
{
    int i = 0;
    string iguword;
    cout << ignoreWordFileName << endl;
    ifstream myFile;
    myFile.open(ignoreWordFileName); // Opens the file
if(myFile == NULL) // if can't find file it prints failure to open
{
cout<<"failure in opening file";
    exit(1);
}
else    //(myFile.is_open())
{
    //while(myFile >> iguword)
    while(getline(myFile,iguword)) // Reads the lines of HW1-stopwords.txt
    {
        //if(myFile.is_open())
        //{
            _vecIgnoreWords[i] = iguword;
            i++;
        //}
    }
        myFile.close(); //closes file after done being used
}
}
/*
Function name: isCommmonWord
Purpose: to see if a word is a common word
@param word - a word (which you want to check if it is a common word)
@param _vecIgnoreWords - the vector type of string storing ignore/common words
@return - true (if word is a common word), or false (otherwise)
*/

bool isCommonWord(string word, vector<string>& _vecIgnoreWords)
{
    for(int i = 0; i < STOPWORD_LIST_SIZE; i++)
    {
         if(word == _vecIgnoreWords[i]) // if word is a common word it ignores it
        {
        return true; // ignores the word if common word
        }
    }
    return false; // if false checks for a different word if it is a common word
}
/*
Function name: printTopN
Purpose: to print the top N high frequency words
@param wordItemList - a pointer that points to an array of wordItems
@param topN - the number of top frequency words to print
@return none
*/

void printTopN(wordItem *&wordItemList, int topN)
{
    for(int i = 0; i < topN; i++) // Counter
    {
        cout << wordItemList[i].count << " - " << wordItemList[i].word << endl; //prints out how many times the unique word came across and printed the word
    }
};


one.txt

the
be
to
of
and
a
in
that
have
i
it
for
not
on
with
he
as
you
do
at
this
but
his
by
from
they
we
say
her
she
or
an
will
my
one
all
would
there
their
what
so
up
out
if
about
who
get
which
go
me

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Chat Now And Get Quote