Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

c++ In this project, we will let the computer produce a list of all the unique w

ID: 3573527 • Letter: C

Question

c++

In this project, we will let the computer produce a list of all the unique words used in a given text file and their frequency of occurrence.

The main object in this problem is a "word" with associated frequency. The tentative definition of "word" here is a string of alphanumeric characters between markers where markers are white space and all punctuation marks; anything non-alphanumeric stops the reading. If we skip all un-allowed characters before getting the string, we should have exactly what we want. Ignoring words of fewer than three letters will remove from consideration such as "a", "is", "to", "do", and "by" that do not belong in an index.

In this project, you are asked to write a program to read a given text file named "Lincoln.txt" and then list all the "words" in alphabetic order with their frequency together appeared in the article. The "word" is defined above and has at least three letters.

Note:

Your result should be printed to an output file named YourUserID.txt.
You need to create a Binary Search Tree (BST) to store all the words through an insertion or increment function. Finally, a proper traversal print function of the BST should be able to output the required results.
You may read a word at a time from the input file, check its length, and only insert it into the BST when: 1) the word length is greater than two, and 2) erase the ending punctuation mark, if any. In particular, you can use isalnum defined in cctype (http://www.cplusplus.com/reference/cctype/isalnum/) to check whether the last character of a word is alphanumeric or not.


The following codes are recommended in your program.

//Node type:
struct TreeNode
{
string word;
int count;
TreeNode * left;
TreeNode * right;
};

// Two function's prototype
// 1. The IncrementOrInsert function is very similar to the insert function introduced in class.
// However, increments the frequency count if the string is in the tree or inserts the string if it is not there.
void IncrementOrInsert(TreeNode*& node, string mystr);

// 2. Prints the words in the tree and their frequency counts (tree traversal).
void PrintTree(TreeNode* node, ofstream& outfile);


Sample run:
You are done! You can open the file "zux2.txt" (replace the name with your user ID) to check.

The expected output file should look like the following:

1863 1
Address 1
But 1
Four 1
Gettysburg 2
God 1
Liberty 1
November 1
Now 1
Pennsylvania 1
The 3
above 1
add 1
advanced 1
ago 1
all 1
altogether 1
and 6
any 1
are 3
battle-field 1
before 1
birth 1
brave 1
brought 1
but 1
can 5
cause 1
civil 1
come 1
conceived 2
consecrate 1
consecrated 1
continent 1
created 1
dead 3
dedicate 2
dedicated 4
detract 1
devotion 2

...

Please and Thank you!

Explanation / Answer

// A program to find k most frequent words in a file

#include <stdio.h>

#include <string.h>

#include <ctype.h>

# define MAX_CHARS 26

# define MAX_WORD_SIZE 30

struct TrieNode

{    bool isEnd; // indicates end of word

    unsigned frequency; // the number of occurrences of a word

    int indexMinHeap; // the index of the word in minHeap

    TrieNode* child[MAX_CHARS]; // represents 26 slots each for 'a' to 'z'.

};

struct MinHeapNode

{    TrieNode* root; // indicates the leaf node of TRIE

    unsigned frequency; // number of occurrences

    char* word; // the actual word stored

};

struct MinHeap

{    unsigned capacity; // the total size a min heap

    int count; // indicates the number of slots filled.

    MinHeapNode* array; // represents the collection of minHeapNodes

};

TrieNode* newTrieNode()

{    TrieNode* trieNode = new TrieNode;

     trieNode->isEnd = 0;

    trieNode->frequency = 0;

    trieNode->indexMinHeap = -1;

    for( int i = 0; i < MAX_CHARS; ++i )

        trieNode->child[i] = NULL;

    return trieNode;

}

MinHeap* createMinHeap( int capacity )

{    MinHeap* minHeap = new MinHeap;

     minHeap->capacity = capacity;

    minHeap->count = 0;

     minHeap->array = new MinHeapNode [ minHeap->capacity ];

     return minHeap;

}

void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b )

{    MinHeapNode temp = *a;

    *a = *b;

    *b = temp;

}

void minHeapify( MinHeap* minHeap, int idx )

{    int left, right, smallest;

    left = 2 * idx + 1;

    right = 2 * idx + 2;

    smallest = idx;

    if ( left < minHeap->count &&

         minHeap->array[ left ]. frequency <

         minHeap->array[ smallest ]. frequency

       )        smallest = left;

    if ( right < minHeap->count &&

         minHeap->array[ right ]. frequency <

         minHeap->array[ smallest ]. frequency

       )

        smallest = right;

    if( smallest != idx )

    {   minHeap->array[ smallest ]. root->indexMinHeap = idx;

        minHeap->array[ idx ]. root->indexMinHeap = smallest;

    swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]);

         minHeapify( minHeap, smallest );

    }}

void buildMinHeap( MinHeap* minHeap )

{    int n, i;

    n = minHeap->count - 1;

     for( i = ( n - 1 ) / 2; i >= 0; --i )

        minHeapify( minHeap, i );

}

void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word )

{    if( (*root)->indexMinHeap != -1 )

   {        ++( minHeap->array[ (*root)->indexMinHeap ]. frequency );

         minHeapify( minHeap, (*root)->indexMinHeap );

    }

    else if( minHeap->count < minHeap->capacity )

    {       int count = minHeap->count;

        minHeap->array[ count ]. frequency = (*root)->frequency;

        minHeap->array[ count ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ count ]. word, word );

        minHeap->array[ count ]. root = *root;

        (*root)->indexMinHeap = minHeap->count;

        ++( minHeap->count );

        buildMinHeap( minHeap );

    }

    else if ( (*root)->frequency > minHeap->array[0]. frequency )

    { minHeap->array[ 0 ]. root->indexMinHeap = -1;

        minHeap->array[ 0 ]. root = *root;

        minHeap->array[ 0 ]. root->indexMinHeap = 0;

        minHeap->array[ 0 ]. frequency = (*root)->frequency;

    delete [] minHeap->array[ 0 ]. word;

        minHeap->array[ 0 ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ 0 ]. word, word );

         minHeapify ( minHeap, 0 );

    }

}

void insertUtil ( TrieNode** root, MinHeap* minHeap,

                        const char* word, const char* dupWord )

{     if ( *root == NULL )

        *root = newTrieNode();

   if ( *word != '' )

        insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]),

                         minHeap, word + 1, dupWord );

    else // The complete word is processed

    {        if ( (*root)->isEnd )

            ++( (*root)->frequency );

        else        {            (*root)->isEnd = 1;

            (*root)->frequency = 1;        }

         insertInMinHeap( minHeap, root, dupWord );    }}

void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap)

{    insertUtil( root, minHeap, word, word );}

void displayMinHeap( MinHeap* minHeap )

{    int i;

for( i = 0; i < minHeap->count; ++i )

    {        printf( "%s : %d ", minHeap->array[i].word,

                            minHeap->array[i].frequency );   }}

void printKMostFreq( FILE* fp, int k )

{     MinHeap* minHeap = createMinHeap( k );

    TrieNode* root = NULL;

    char buffer[MAX_WORD_SIZE];

    while( fscanf( fp, "%s", buffer ) != EOF )

        insertTrieAndHeap(buffer, &root, minHeap);

    displayMinHeap( minHeap );}

int main()

{    int k = 5;

    FILE *fp = fopen ("file.txt", "r");

    if (fp == NULL)

        printf ("File doesn't exist ");

    else         printKMostFreq (fp, k);

    return 0;}

Hire Me For All Your Tutoring Needs
Integrity-first tutoring: clear explanations, guidance, and feedback.
Drop an Email at
drjack9650@gmail.com
Chat Now And Get Quote