Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

import java.io.IOException; /** * An executable that counts the words in a files

ID: 3908389 • Letter: I

Question

import java.io.IOException;

/**
* An executable that counts the words in a files and prints out the counts in
* descending order. You will need to modify this file.
*/
public class WordCount {

private static void countWords(String file) {
DataCounter<String> counter = new BinarySearchTree<String>();

try {
FileWordReader reader = new FileWordReader(file);
String word = reader.nextWord();
while (word != null) {
counter.incCount(word);
word = reader.nextWord();
}
} catch (IOException e) {
System.err.println("Error processing " + file + e);
System.exit(1);
}

DataCount<String>[] counts = counter.getCounts();
sortByDescendingCount(counts);
for (DataCount<String> c : counts)
System.out.println(c.count + " " + c.data);
}

/**
* TODO Replace this comment with your own.
*
* Sort the count array in descending order of count. If two elements have
* the same count, they should be in alphabetical order (for Strings, that
* is. In general, use the compareTo method for the DataCount.data field).
*
* This code uses insertion sort. You should modify it to use a different
* sorting algorithm. NOTE: the current code assumes the array starts in
* alphabetical order! You'll need to make your code deal with unsorted
* arrays.
*
* The generic parameter syntax here is new, but it just defines E as a
* generic parameter for this method, and constrains E to be Comparable. You
* shouldn't have to change it.
*
* @param counts array to be sorted.
*/
private static <E extends Comparable<? super E>> void sortByDescendingCount(
DataCount<E>[] counts) {
for (int i = 1; i < counts.length; i++) {
DataCount<E> x = counts[i];
int j;
for (j = i - 1; j >= 0; j--) {
if (counts[j].count >= x.count) {
break;
}
counts[j + 1] = counts[j];
}
counts[j + 1] = x;
}
}

public static void main(String[] args) {
if (args.length != 1) {
System.err.println("Usage: filename of document to analyze");
System.exit(1);
}
countWords(args[0]);
}
}

------------------------------------------------------------------

/**
* An example of how to test your binary search tree. You should use this as
* inspiration for your unit tests.
*/
public class TestBinarySearchTree {
public static void main(String[] args) {
boolean dumpall = false, notest = false;

if (args.length > 1
|| (args.length == 1 && args[0].compareTo("dump") != 0 && args[0]
.compareTo("notest") != 0)) {
System.err.println("Arguments: [dump] to dump all output");
System.err.println(" [notest] to skip tests");
System.err.println("No arguments justs tests output");
return;
}

if (args.length == 1) {
dumpall = true;
if (args[0].compareTo("notest") == 0)
notest = true;
}

String[][] tests = {
{ "g", "h", "a", "b", "a", "h", "j", "e", "e", "f" },
{ "5", "3", "1", "2", "7", "6", "0", "8", "9", "4", "3", "5",
"0", "9" }, {} };

String[][] expected = {
{
"([g,1] . .)",
"([g,1] . ([h,1] . .))",
"([g,1] ([a,1] . .) ([h,1] . .))",
"([g,1] ([a,1] . ([b,1] . .)) ([h,1] . .))",
"([g,1] ([a,2] . ([b,1] . .)) ([h,1] . .))",
"([g,1] ([a,2] . ([b,1] . .)) ([h,2] . .))",
"([g,1] ([a,2] . ([b,1] . .)) ([h,2] . ([j,1] . .)))",
"([g,1] ([a,2] . ([b,1] . ([e,1] . .))) ([h,2] . ([j,1] . .)))",
"([g,1] ([a,2] . ([b,1] . ([e,2] . .))) ([h,2] . ([j,1] . .)))",
"([g,1] ([a,2] . ([b,1] . ([e,2] . ([f,1] . .)))) ([h,2] . ([j,1] . .)))",
"a,2 b,1 e,2 f,1 g,1 h,2 j,1 " },
{
"([5,1] . .)",
"([5,1] ([3,1] . .) .)",
"([5,1] ([3,1] ([1,1] . .) .) .)",
"([5,1] ([3,1] ([1,1] . ([2,1] . .)) .) .)",
"([5,1] ([3,1] ([1,1] . ([2,1] . .)) .) ([7,1] . .))",
"([5,1] ([3,1] ([1,1] . ([2,1] . .)) .) ([7,1] ([6,1] . .) .))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) .) ([7,1] ([6,1] . .) .))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) .) ([7,1] ([6,1] . .) ([8,1] . .)))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) .) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,1] ([3,2] ([1,1] ([0,1] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,2] ([3,2] ([1,1] ([0,1] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,2] ([3,2] ([1,1] ([0,2] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,2] ([3,2] ([1,1] ([0,2] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,2] . .))))",
"0,2 1,1 2,1 3,2 4,1 5,2 6,1 7,1 8,1 9,2 " },
{ "<empty tree>", "No Data" } };

boolean error = false;
for (int i = 0; i < tests.length; i++) {
BinarySearchTree<String> tree = new BinarySearchTree<String>();
for (int j = 0; j < tests[i].length; j++) {
tree.incCount(tests[i][j]);
String out = tree.dump();
if (notest || out.compareTo(expected[i][j]) != 0)
error = true;
if (dumpall)
System.out.println(out);
}
if (tests[i].length < 1) {
String out = tree.dump();
if (notest || out.compareTo(expected[i][0]) != 0)
error = true;
if (dumpall)
System.out.println(out);
}

DataCount<String>[] cnt = tree.getCounts();
String out = "";
if (cnt != null && cnt.length > 0)
for (int j = 0; j < cnt.length; j++)
out += cnt[j].data + "," + cnt[j].count + " ";
else
out = "No Data";
if (notest
|| out.compareTo(expected[i][expected[i].length - 1]) != 0)
error = true;
if (dumpall)
System.out.println(out);
}

if (!notest) {
if (error)
System.out.println("Test failed!");
else
System.out.println("Test passed.");
}
}
}

------------------------------------------------------------

/**
* TODO Replace this comment with your own.
*
* Stub code for an implementation of a DataCounter that uses a hash table as
* its backing data structure. We included this stub so that it's very clear
* that HashTable works only with Strings, whereas the DataCounter interface is
* generic. You need the String contents to write your hashcode code.
*/
public class HashTable implements DataCounter<String> {

/** {@inheritDoc} */
public DataCount<String>[] getCounts() {
// TODO Auto-generated method stub
return null;
}

/** {@inheritDoc} */
public int getSize() {
// TODO Auto-generated method stub
return 0;
}

/** {@inheritDoc} */
public void incCount(String data) {
// TODO Auto-generated method stub

}

}

------------------------------------------------------

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;

/**
* FileWordReader reads words from a file one-by-one, converting to lowercase
* and eliminating punctuation. You can read a file in using:
*
*/
class FileWordReader {
StreamTokenizer tok;

public FileWordReader(String file) throws FileNotFoundException,
IOException {
tok = new StreamTokenizer(new BufferedReader(new InputStreamReader(
new FileInputStream(file))));
tok.eolIsSignificant(false);
tok.lowerCaseMode(true);
tok.wordChars('a', 'z');
tok.wordChars('A', 'Z');
String ws = " .,!;:-[].,;!?*+-=\|/(){}"[]><'";
for (int i = 0; i < ws.length(); i++) {
tok.whitespaceChars(ws.charAt(i), ws.charAt(i));
}
}

public String nextWord() throws IOException {
int ttype = tok.nextToken();
while (ttype != StreamTokenizer.TT_WORD
&& ttype != StreamTokenizer.TT_EOF)
ttype = tok.nextToken();
if (ttype == StreamTokenizer.TT_EOF)
return null;
return tok.sval;
}
};

Explanation / Answer

WordCount.java

---------------------------------------

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;


public class WordCount {
public static void main(String[] args) {
if(args.length != 3) {
System.out.println("Usage: [-b | -a | -h] [-frequency | -num_unique] <filename> ");
System.out.println("-b - Use an Unbalanced BST");
System.out.println("-a - Use an AVL Tree");
System.out.println("-h - Use a Hashtable ");
System.out.println("-frequency - Print all the word/frequency pairs, " +
"ordered by frequency, and then by the words in" +
"lexicographic order. ");
System.out.println("-num_unique - Print the number of unique words in the document. " +
"This is the total number of distinct (different) words in the document. " +
"Words that appear more than once are only counted as a single word for " +
"this statistic");
return;
}

try {
switch(args[1]) {
case "-frequency":
countWordFrequencies(countWords(args[0], args[2]));
break;
case "-num_unique":
countUniqueWords(countWords(args[0], args[2]));
break;
default:
System.out.println("Invalid choice for second argument");
break;
}
} catch(IOException e) {
System.out.println("An error occurred when parsing the file!:");
System.out.println(e.getMessage());
}
}

  
public static DataCount<String>[] countWords(String dataStructure, String filename) throws IOException {
FileWordReader fileWordReader = new FileWordReader(filename);
DataCounter<String> wordCounter;

switch(dataStructure) {
case "-b":
wordCounter = new BinarySearchTree<>();
break;
case "-a":
wordCounter = new AVLTree<>();
break;
case "-h":
wordCounter = new HashTable();
break;
default:
wordCounter = new BinarySearchTree<>();
System.out.println("Invalid data structure. Using BST by default.");
break;
}

String word;
while((word = fileWordReader.nextWord()) != null) {
wordCounter.incCount(word);
}

return wordCounter.getCounts();
}

private static <E> void printWordCounts(DataCount<E>[] dataCounts) {
Arrays.stream(dataCounts)
.forEach(count -> {
System.out.format("%d %s ", count.count, count.data);
});
}

private static void countWordFrequencies(DataCount<String>[] dataCounts) {
sort(dataCounts, (count1, count2) -> count2.count - count1.count);

System.out.println("Ordered by Frequency:");
printWordCounts(dataCounts);

sort(dataCounts, (count1, count2) -> count1.data.compareTo(count2.data));

System.out.println(" Ordered Lexicographically:");
printWordCounts(dataCounts);
}

private static void countUniqueWords(DataCount<String>[] dataCounts) {
System.out.println("Unique words: " + dataCounts.length);
}

private static <E> void sort(DataCount<E>[] dataCounts, Comparator<DataCount<E>> comparator) {
if(dataCounts.length > 1) {
int mid = dataCounts.length / 2;
DataCount<E>[] left = Arrays.copyOfRange(dataCounts, 0, mid);
DataCount<E>[] right = Arrays.copyOfRange(dataCounts, mid, dataCounts.length);

sort(left, comparator);
sort(right, comparator);
merge(dataCounts, left, right, comparator);
}
}

private static <E> void merge(DataCount<E>[] dataCounts,
DataCount<E>[] left, DataCount<E>[] right,
Comparator<DataCount<E>> comparator) {
int i = 0, j = 0;
for(int k = 0; k < dataCounts.length; k++) {
if(i < left.length && j < right.length) {
if(comparator.compare(left[i], right[j]) <= 0) {
dataCounts[k] = left[i++];
} else {
dataCounts[k] = right[j++];
}
} else if(i < left.length) {
dataCounts[k] = left[i++];
} else if(j < right.length) {
dataCounts[k] = right[j++];
}
}
}
}

HashTable.java

------------------------------------------

package wordcounter;

import java.util.LinkedList;

public class HashTable implements DataCounter<String> {

  

private class Cell {

private String value;

private String key;

private int count;

public Cell(String value) {

this.count = 1;

this.value = value;

this.key = value;

}

public String getValue() {

return this.value;

}

public String getkey() {

return this.key;

}

public int getCount() {

return count;

}

public void incCount() {

this.count++;;

}

}

static final int INIT_TABLE_SIZE = 7919; // with 97, times are similar BST. 7919 faster

static final double INIT_MAX_LAMBDA = 1.5;

private LinkedList<Cell>[] table;

private int count;

private int tableSize;

private double maxLambda;

public HashTable() {

this(INIT_TABLE_SIZE);

}

public HashTable(int size) {

count = 0;

if (size < INIT_TABLE_SIZE)

tableSize = INIT_TABLE_SIZE;

else

tableSize = nextPrime(size);

intializeTable();

maxLambda = INIT_MAX_LAMBDA;

}

@SuppressWarnings("unchecked")

private void intializeTable() {

table = new LinkedList[tableSize];

for(int i = 0;i<tableSize;i++) {

if(table[i] == null)

table[i] = new LinkedList<Cell>();

}

}

public DataCount<String>[] getCounts() {

@SuppressWarnings("unchecked")

DataCount<String>[] counter = new DataCount[count];

int k = 0;

for(int i = 0; i < table.length; i++) {

if(!table[i].isEmpty()) {

for(int j = 0;j < table[i].size();j++) {

counter[k]= new DataCount<String>(table[i].get(j).getValue(), table[i].get(j).getCount());

k++;

}

}

}

return counter;

}

public int getSize() {

// TODO Auto-generated method stub

return count;

}

public void incCount(String data) {

// TODO Auto-generated method stub

Cell newCell = new Cell(data);

int index = -1;

if(contains(newCell.getkey())) {

newCell = get(newCell.getkey());

newCell.incCount();

}

else {

index = hashingFunction(newCell.getkey());

table[index].add(newCell);

count++;

}

if ( count > maxLambda * tableSize )

reSize();

}

public Boolean contains(String key) {

Boolean found = false;

int index = hashingFunction(key);

for(int i = 0; i < table[index].size(); i++) {

if(table[index].get(i).getkey().equals(key)) {

found = true;

}

}

return found;

}

public Cell get(String key) {

int index = hashingFunction(key);

Cell targetValue = null;

for(int i = 0; i < table[index].size(); i++) { //searching linked list at index for key value

if(table[index].get(i).getkey().equals(key)) {

targetValue = table[index].get(i);

break;

}

}

return targetValue;

}

private int hashingFunction(String key)

{

int index;

index = _hashingFunction(key) % tableSize;

if (index < 0 )

index += tableSize;

return index;

}

// helper method to calculate hash

private int _hashingFunction( String key )

{

int index = 0;

char [] val = key.toCharArray();

for ( int i = 0; i < key.length(); i++ )

index = 31 * index + val[i];

return index;

}

private void reSize()

{

// save old list and size then can allocate freely

LinkedList<Cell>[] oldTable = table;

int oldTableSize = tableSize;

tableSize = nextPrime(2*oldTableSize);

// allocate a larger, empty array

intializeTable();

// use the reinsert() algorithm to re-enter old data

for (int i = 0; i < oldTableSize; i++ )

for ( int j = 0; j < oldTable[i].size(); j++)

reInsert( oldTable[i].get(j) );

}

//helper function to re-insert data into new table

private void reInsert(Cell cell) {

int index = hashingFunction(cell.getkey());

table[index].add(cell);

}

  

public boolean setMaxLambda( double lam )

{

if ( lam < .1 || lam > 100.)

return false;

maxLambda = lam;

return true;

}

private static int nextPrime( int n )

{

int k, candidate, loopLim;

// loop doesn't work for 2 or 3

if ( n <= 2 )

return 2;

else if ( n == 3 )

return 3;

for ( candidate = (n%2 == 0)? n+1 : n ; true ; candidate += 2)

{

// all primes > 3 are of the form 6k +/- 1

loopLim = (int)( (Math.sqrt((double)candidate) + 1)/6);

// we know it is odd. check for divisibility by 3

if ( candidate % 3 == 0)

continue;

// now we can check for divisibility by 6k +/1 1 up to sqrt

for ( k = 1; k <= loopLim; k++ )

{

if (candidate % (6*k - 1) == 0)

break;

if (candidate % (6*k + 1) == 0)

break;

}

if ( k > loopLim)

return candidate;

}

}

}