import java.io.IOException; /** * An executable that counts the words in a files
ID: 3908389 • Letter: I
Question
import java.io.IOException;
/**
* An executable that counts the words in a files and prints out the counts in
* descending order. You will need to modify this file.
*/
public class WordCount {
private static void countWords(String file) {
DataCounter<String> counter = new BinarySearchTree<String>();
try {
FileWordReader reader = new FileWordReader(file);
String word = reader.nextWord();
while (word != null) {
counter.incCount(word);
word = reader.nextWord();
}
} catch (IOException e) {
System.err.println("Error processing " + file + e);
System.exit(1);
}
DataCount<String>[] counts = counter.getCounts();
sortByDescendingCount(counts);
for (DataCount<String> c : counts)
System.out.println(c.count + " " + c.data);
}
/**
* TODO Replace this comment with your own.
*
* Sort the count array in descending order of count. If two elements have
* the same count, they should be in alphabetical order (for Strings, that
* is. In general, use the compareTo method for the DataCount.data field).
*
* This code uses insertion sort. You should modify it to use a different
* sorting algorithm. NOTE: the current code assumes the array starts in
* alphabetical order! You'll need to make your code deal with unsorted
* arrays.
*
* The generic parameter syntax here is new, but it just defines E as a
* generic parameter for this method, and constrains E to be Comparable. You
* shouldn't have to change it.
*
* @param counts array to be sorted.
*/
private static <E extends Comparable<? super E>> void sortByDescendingCount(
DataCount<E>[] counts) {
for (int i = 1; i < counts.length; i++) {
DataCount<E> x = counts[i];
int j;
for (j = i - 1; j >= 0; j--) {
if (counts[j].count >= x.count) {
break;
}
counts[j + 1] = counts[j];
}
counts[j + 1] = x;
}
}
public static void main(String[] args) {
if (args.length != 1) {
System.err.println("Usage: filename of document to analyze");
System.exit(1);
}
countWords(args[0]);
}
}
------------------------------------------------------------------
/**
* An example of how to test your binary search tree. You should use this as
* inspiration for your unit tests.
*/
public class TestBinarySearchTree {
public static void main(String[] args) {
boolean dumpall = false, notest = false;
if (args.length > 1
|| (args.length == 1 && args[0].compareTo("dump") != 0 && args[0]
.compareTo("notest") != 0)) {
System.err.println("Arguments: [dump] to dump all output");
System.err.println(" [notest] to skip tests");
System.err.println("No arguments justs tests output");
return;
}
if (args.length == 1) {
dumpall = true;
if (args[0].compareTo("notest") == 0)
notest = true;
}
String[][] tests = {
{ "g", "h", "a", "b", "a", "h", "j", "e", "e", "f" },
{ "5", "3", "1", "2", "7", "6", "0", "8", "9", "4", "3", "5",
"0", "9" }, {} };
String[][] expected = {
{
"([g,1] . .)",
"([g,1] . ([h,1] . .))",
"([g,1] ([a,1] . .) ([h,1] . .))",
"([g,1] ([a,1] . ([b,1] . .)) ([h,1] . .))",
"([g,1] ([a,2] . ([b,1] . .)) ([h,1] . .))",
"([g,1] ([a,2] . ([b,1] . .)) ([h,2] . .))",
"([g,1] ([a,2] . ([b,1] . .)) ([h,2] . ([j,1] . .)))",
"([g,1] ([a,2] . ([b,1] . ([e,1] . .))) ([h,2] . ([j,1] . .)))",
"([g,1] ([a,2] . ([b,1] . ([e,2] . .))) ([h,2] . ([j,1] . .)))",
"([g,1] ([a,2] . ([b,1] . ([e,2] . ([f,1] . .)))) ([h,2] . ([j,1] . .)))",
"a,2 b,1 e,2 f,1 g,1 h,2 j,1 " },
{
"([5,1] . .)",
"([5,1] ([3,1] . .) .)",
"([5,1] ([3,1] ([1,1] . .) .) .)",
"([5,1] ([3,1] ([1,1] . ([2,1] . .)) .) .)",
"([5,1] ([3,1] ([1,1] . ([2,1] . .)) .) ([7,1] . .))",
"([5,1] ([3,1] ([1,1] . ([2,1] . .)) .) ([7,1] ([6,1] . .) .))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) .) ([7,1] ([6,1] . .) .))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) .) ([7,1] ([6,1] . .) ([8,1] . .)))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) .) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,1] ([3,1] ([1,1] ([0,1] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,1] ([3,2] ([1,1] ([0,1] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,2] ([3,2] ([1,1] ([0,1] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,2] ([3,2] ([1,1] ([0,2] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,1] . .))))",
"([5,2] ([3,2] ([1,1] ([0,2] . .) ([2,1] . .)) ([4,1] . .)) ([7,1] ([6,1] . .) ([8,1] . ([9,2] . .))))",
"0,2 1,1 2,1 3,2 4,1 5,2 6,1 7,1 8,1 9,2 " },
{ "<empty tree>", "No Data" } };
boolean error = false;
for (int i = 0; i < tests.length; i++) {
BinarySearchTree<String> tree = new BinarySearchTree<String>();
for (int j = 0; j < tests[i].length; j++) {
tree.incCount(tests[i][j]);
String out = tree.dump();
if (notest || out.compareTo(expected[i][j]) != 0)
error = true;
if (dumpall)
System.out.println(out);
}
if (tests[i].length < 1) {
String out = tree.dump();
if (notest || out.compareTo(expected[i][0]) != 0)
error = true;
if (dumpall)
System.out.println(out);
}
DataCount<String>[] cnt = tree.getCounts();
String out = "";
if (cnt != null && cnt.length > 0)
for (int j = 0; j < cnt.length; j++)
out += cnt[j].data + "," + cnt[j].count + " ";
else
out = "No Data";
if (notest
|| out.compareTo(expected[i][expected[i].length - 1]) != 0)
error = true;
if (dumpall)
System.out.println(out);
}
if (!notest) {
if (error)
System.out.println("Test failed!");
else
System.out.println("Test passed.");
}
}
}
------------------------------------------------------------
/**
* TODO Replace this comment with your own.
*
* Stub code for an implementation of a DataCounter that uses a hash table as
* its backing data structure. We included this stub so that it's very clear
* that HashTable works only with Strings, whereas the DataCounter interface is
* generic. You need the String contents to write your hashcode code.
*/
public class HashTable implements DataCounter<String> {
/** {@inheritDoc} */
public DataCount<String>[] getCounts() {
// TODO Auto-generated method stub
return null;
}
/** {@inheritDoc} */
public int getSize() {
// TODO Auto-generated method stub
return 0;
}
/** {@inheritDoc} */
public void incCount(String data) {
// TODO Auto-generated method stub
}
}
------------------------------------------------------
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StreamTokenizer;
/**
* FileWordReader reads words from a file one-by-one, converting to lowercase
* and eliminating punctuation. You can read a file in using:
*
*/
class FileWordReader {
StreamTokenizer tok;
public FileWordReader(String file) throws FileNotFoundException,
IOException {
tok = new StreamTokenizer(new BufferedReader(new InputStreamReader(
new FileInputStream(file))));
tok.eolIsSignificant(false);
tok.lowerCaseMode(true);
tok.wordChars('a', 'z');
tok.wordChars('A', 'Z');
String ws = " .,!;:-[].,;!?*+-=\|/(){}"[]><'";
for (int i = 0; i < ws.length(); i++) {
tok.whitespaceChars(ws.charAt(i), ws.charAt(i));
}
}
public String nextWord() throws IOException {
int ttype = tok.nextToken();
while (ttype != StreamTokenizer.TT_WORD
&& ttype != StreamTokenizer.TT_EOF)
ttype = tok.nextToken();
if (ttype == StreamTokenizer.TT_EOF)
return null;
return tok.sval;
}
};
Explanation / Answer
WordCount.java
---------------------------------------
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
public class WordCount {
public static void main(String[] args) {
if(args.length != 3) {
System.out.println("Usage: [-b | -a | -h] [-frequency | -num_unique] <filename> ");
System.out.println("-b - Use an Unbalanced BST");
System.out.println("-a - Use an AVL Tree");
System.out.println("-h - Use a Hashtable ");
System.out.println("-frequency - Print all the word/frequency pairs, " +
"ordered by frequency, and then by the words in" +
"lexicographic order. ");
System.out.println("-num_unique - Print the number of unique words in the document. " +
"This is the total number of distinct (different) words in the document. " +
"Words that appear more than once are only counted as a single word for " +
"this statistic");
return;
}
try {
switch(args[1]) {
case "-frequency":
countWordFrequencies(countWords(args[0], args[2]));
break;
case "-num_unique":
countUniqueWords(countWords(args[0], args[2]));
break;
default:
System.out.println("Invalid choice for second argument");
break;
}
} catch(IOException e) {
System.out.println("An error occurred when parsing the file!:");
System.out.println(e.getMessage());
}
}
public static DataCount<String>[] countWords(String dataStructure, String filename) throws IOException {
FileWordReader fileWordReader = new FileWordReader(filename);
DataCounter<String> wordCounter;
switch(dataStructure) {
case "-b":
wordCounter = new BinarySearchTree<>();
break;
case "-a":
wordCounter = new AVLTree<>();
break;
case "-h":
wordCounter = new HashTable();
break;
default:
wordCounter = new BinarySearchTree<>();
System.out.println("Invalid data structure. Using BST by default.");
break;
}
String word;
while((word = fileWordReader.nextWord()) != null) {
wordCounter.incCount(word);
}
return wordCounter.getCounts();
}
private static <E> void printWordCounts(DataCount<E>[] dataCounts) {
Arrays.stream(dataCounts)
.forEach(count -> {
System.out.format("%d %s ", count.count, count.data);
});
}
private static void countWordFrequencies(DataCount<String>[] dataCounts) {
sort(dataCounts, (count1, count2) -> count2.count - count1.count);
System.out.println("Ordered by Frequency:");
printWordCounts(dataCounts);
sort(dataCounts, (count1, count2) -> count1.data.compareTo(count2.data));
System.out.println(" Ordered Lexicographically:");
printWordCounts(dataCounts);
}
private static void countUniqueWords(DataCount<String>[] dataCounts) {
System.out.println("Unique words: " + dataCounts.length);
}
private static <E> void sort(DataCount<E>[] dataCounts, Comparator<DataCount<E>> comparator) {
if(dataCounts.length > 1) {
int mid = dataCounts.length / 2;
DataCount<E>[] left = Arrays.copyOfRange(dataCounts, 0, mid);
DataCount<E>[] right = Arrays.copyOfRange(dataCounts, mid, dataCounts.length);
sort(left, comparator);
sort(right, comparator);
merge(dataCounts, left, right, comparator);
}
}
private static <E> void merge(DataCount<E>[] dataCounts,
DataCount<E>[] left, DataCount<E>[] right,
Comparator<DataCount<E>> comparator) {
int i = 0, j = 0;
for(int k = 0; k < dataCounts.length; k++) {
if(i < left.length && j < right.length) {
if(comparator.compare(left[i], right[j]) <= 0) {
dataCounts[k] = left[i++];
} else {
dataCounts[k] = right[j++];
}
} else if(i < left.length) {
dataCounts[k] = left[i++];
} else if(j < right.length) {
dataCounts[k] = right[j++];
}
}
}
}
HashTable.java
------------------------------------------
package wordcounter;
import java.util.LinkedList;
public class HashTable implements DataCounter<String> {
private class Cell {
private String value;
private String key;
private int count;
public Cell(String value) {
this.count = 1;
this.value = value;
this.key = value;
}
public String getValue() {
return this.value;
}
public String getkey() {
return this.key;
}
public int getCount() {
return count;
}
public void incCount() {
this.count++;;
}
}
static final int INIT_TABLE_SIZE = 7919; // with 97, times are similar BST. 7919 faster
static final double INIT_MAX_LAMBDA = 1.5;
private LinkedList<Cell>[] table;
private int count;
private int tableSize;
private double maxLambda;
public HashTable() {
this(INIT_TABLE_SIZE);
}
public HashTable(int size) {
count = 0;
if (size < INIT_TABLE_SIZE)
tableSize = INIT_TABLE_SIZE;
else
tableSize = nextPrime(size);
intializeTable();
maxLambda = INIT_MAX_LAMBDA;
}
@SuppressWarnings("unchecked")
private void intializeTable() {
table = new LinkedList[tableSize];
for(int i = 0;i<tableSize;i++) {
if(table[i] == null)
table[i] = new LinkedList<Cell>();
}
}
public DataCount<String>[] getCounts() {
@SuppressWarnings("unchecked")
DataCount<String>[] counter = new DataCount[count];
int k = 0;
for(int i = 0; i < table.length; i++) {
if(!table[i].isEmpty()) {
for(int j = 0;j < table[i].size();j++) {
counter[k]= new DataCount<String>(table[i].get(j).getValue(), table[i].get(j).getCount());
k++;
}
}
}
return counter;
}
public int getSize() {
// TODO Auto-generated method stub
return count;
}
public void incCount(String data) {
// TODO Auto-generated method stub
Cell newCell = new Cell(data);
int index = -1;
if(contains(newCell.getkey())) {
newCell = get(newCell.getkey());
newCell.incCount();
}
else {
index = hashingFunction(newCell.getkey());
table[index].add(newCell);
count++;
}
if ( count > maxLambda * tableSize )
reSize();
}
public Boolean contains(String key) {
Boolean found = false;
int index = hashingFunction(key);
for(int i = 0; i < table[index].size(); i++) {
if(table[index].get(i).getkey().equals(key)) {
found = true;
}
}
return found;
}
public Cell get(String key) {
int index = hashingFunction(key);
Cell targetValue = null;
for(int i = 0; i < table[index].size(); i++) { //searching linked list at index for key value
if(table[index].get(i).getkey().equals(key)) {
targetValue = table[index].get(i);
break;
}
}
return targetValue;
}
private int hashingFunction(String key)
{
int index;
index = _hashingFunction(key) % tableSize;
if (index < 0 )
index += tableSize;
return index;
}
// helper method to calculate hash
private int _hashingFunction( String key )
{
int index = 0;
char [] val = key.toCharArray();
for ( int i = 0; i < key.length(); i++ )
index = 31 * index + val[i];
return index;
}
private void reSize()
{
// save old list and size then can allocate freely
LinkedList<Cell>[] oldTable = table;
int oldTableSize = tableSize;
tableSize = nextPrime(2*oldTableSize);
// allocate a larger, empty array
intializeTable();
// use the reinsert() algorithm to re-enter old data
for (int i = 0; i < oldTableSize; i++ )
for ( int j = 0; j < oldTable[i].size(); j++)
reInsert( oldTable[i].get(j) );
}
//helper function to re-insert data into new table
private void reInsert(Cell cell) {
int index = hashingFunction(cell.getkey());
table[index].add(cell);
}
public boolean setMaxLambda( double lam )
{
if ( lam < .1 || lam > 100.)
return false;
maxLambda = lam;
return true;
}
private static int nextPrime( int n )
{
int k, candidate, loopLim;
// loop doesn't work for 2 or 3
if ( n <= 2 )
return 2;
else if ( n == 3 )
return 3;
for ( candidate = (n%2 == 0)? n+1 : n ; true ; candidate += 2)
{
// all primes > 3 are of the form 6k +/- 1
loopLim = (int)( (Math.sqrt((double)candidate) + 1)/6);
// we know it is odd. check for divisibility by 3
if ( candidate % 3 == 0)
continue;
// now we can check for divisibility by 6k +/1 1 up to sqrt
for ( k = 1; k <= loopLim; k++ )
{
if (candidate % (6*k - 1) == 0)
break;
if (candidate % (6*k + 1) == 0)
break;
}
if ( k > loopLim)
return candidate;
}
}
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.