Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

HI! I need help writing a function that find the most frequent word thath has be

ID: 3914565 • Letter: H

Question

HI! I need help writing a function that find the most frequent word thath has been inserted into a Trie. I have a count field that lets me know if I am at the terminal node of of the string. If the same string is inserted again the counter variable is incremented. So I need to traverse the Trie and find what is the most frequent word in the Trie. If any of the strings have been inserted the same amount of times I want to return the string that comes in first alphabetical order. I am just having trouble on how to go about traversing the Trie and comparing the string in alphabetical order if their count fields are equal. Below is my Trie struct, the code is in C language.

struct TrieNode

{

   int count;

struct TrieNode *children[26];

} ;

Explanation / Answer

#include <stdio.h>

#include <string.h>

#include <ctype.h>

# define MAX_CHARS 26

# define MAX_WORD_SIZE 30

struct TrieNode

{

    bool isEnd;

    unsigned frequency;

    int indexMinHeap;

    TrieNode* child[MAX_CHARS];

};

struct MinHeapNode

{

    TrieNode* root;

    unsigned frequency;

    char* word;

};

struct MinHeap

{

    unsigned capacity;

    int count;

    MinHeapNode* array;

};

TrieNode* newTrieNode()

{

      TrieNode* trieNode = new TrieNode;

    trieNode->isEnd = 0;

    trieNode->frequency = 0;

    trieNode->indexMinHeap = -1;

    for( int i = 0; i < MAX_CHARS; ++i )

        trieNode->child[i] = NULL;

    return trieNode;

}

MinHeap* createMinHeap( int capacity )

{

    MinHeap* minHeap = new MinHeap;

    minHeap->capacity = capacity;

    minHeap->count = 0;

    minHeap->array = new MinHeapNode [ minHeap->capacity ];

    return minHeap;

}

void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b )

{

    MinHeapNode temp = *a;

    *a = *b;

    *b = temp;

}

void minHeapify( MinHeap* minHeap, int idx )

{

    int left, right, smallest;

    left = 2 * idx + 1;

    right = 2 * idx + 2;

    smallest = idx;

    if ( left < minHeap->count &&

         minHeap->array[ left ]. frequency <

         minHeap->array[ smallest ]. frequency

       )

        smallest = left;

    if ( right < minHeap->count &&

         minHeap->array[ right ]. frequency <

         minHeap->array[ smallest ]. frequency

       )

        smallest = right;

    if( smallest != idx )

    {

        minHeap->array[ smallest ]. root->indexMinHeap = idx;

        minHeap->array[ idx ]. root->indexMinHeap = smallest;

      swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]);

        minHeapify( minHeap, smallest );

    }

}

void buildMinHeap( MinHeap* minHeap )

{

    int n, i;

    n = minHeap->count - 1;

    for( i = ( n - 1 ) / 2; i >= 0; --i )

        minHeapify( minHeap, i );

}

void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word )

{

        if( (*root)->indexMinHeap != -1 )

    {

        ++( minHeap->array[ (*root)->indexMinHeap ]. frequency );

        minHeapify( minHeap, (*root)->indexMinHeap );

    }

        else if( minHeap->count < minHeap->capacity )

    {

        int count = minHeap->count;

        minHeap->array[ count ]. frequency = (*root)->frequency;

        minHeap->array[ count ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ count ]. word, word );

        minHeap->array[ count ]. root = *root;

        (*root)->indexMinHeap = minHeap->count;

        ++( minHeap->count );

        buildMinHeap( minHeap );

    }

    else if ( (*root)->frequency > minHeap->array[0]. frequency )

    {

        minHeap->array[ 0 ]. root->indexMinHeap = -1;

        minHeap->array[ 0 ]. root = *root;

        minHeap->array[ 0 ]. root->indexMinHeap = 0;

        minHeap->array[ 0 ]. frequency = (*root)->frequency;

        delete [] minHeap->array[ 0 ]. word;

        minHeap->array[ 0 ]. word = new char [strlen( word ) + 1];

        strcpy( minHeap->array[ 0 ]. word, word );

        minHeapify ( minHeap, 0 );

    }

}

void insertUtil ( TrieNode** root, MinHeap* minHeap,

                        const char* word, const char* dupWord )

{

    if ( *root == NULL )

        *root = newTrieNode();

    if ( *word != '' )

        insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]),

                         minHeap, word + 1, dupWord );

    else // The complete word is processed

    {

        if ( (*root)->isEnd )

            ++( (*root)->frequency );

        else

        {

            (*root)->isEnd = 1;

            (*root)->frequency = 1;

        }

        insertInMinHeap( minHeap, root, dupWord );

    }

}

void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap)

{

    insertUtil( root, minHeap, word, word );

}

void displayMinHeap( MinHeap* minHeap )

{

    int i;

    for( i = 0; i < minHeap->count; ++i )

    {

        printf( "%s : %d ", minHeap->array[i].word,

                            minHeap->array[i].frequency );

    }

}

void printKMostFreq( FILE* fp, int k )

{

    MinHeap* minHeap = createMinHeap( k );

        TrieNode* root = NULL;

    char buffer[MAX_WORD_SIZE];

    while( fscanf( fp, "%s", buffer ) != EOF )

        insertTrieAndHeap(buffer, &root, minHeap);

        displayMinHeap( minHeap );

}

int main()

{

    int k = 5;

    FILE *fp = fopen ("file.txt", "r");

    if (fp == NULL)

        printf ("File doesn't exist ");

    else

        printKMostFreq (fp, k);

    return 0;

}