import java.util.Set; public class SetUtilities { /** * Returns a new set repres
ID: 3590748 • Letter: I
Question
import java.util.Set;
public class SetUtilities {
/**
* Returns a new set representing the union of s and t.
*
* Does not modify s or t.
* @param s
* @param t
* @return a new set representing the union of s and t
*/
public static <E> Set<E> union(Set<E> s, Set<E> t) {
}
/**
* Returns a new set representing the intersection of s and t.
*
* Does not modify s or t.
* @param s
* @param t
* @return a new set representing the intersection of s and t
*/
public static <E> Set<E> intersection(Set<E> s, Set<E> t) {
}
/**
* Returns a new set representing the set difference s and t,
* that is, s t.
*
* Does not modify s or t.
* @param s
* @param t
* @return a new set representing the difference of s and t
*/
public static <E> Set<E> setDifference(Set<E> s, Set<E> t) {
}
/**
* Returns the Jaccard index of the two sets s and t.
*
* It is defined as 1 if both sets are empty.
*
* Otherwise, it is defined as the size of the intersection of the sets,
* divided by the size of the union of the sets.
*
* Does not modify s or t.
*
* @param s
* @param t
* @return the Jaccard index of s and t
*/
public static <E> double jaccardIndex(Set<E> s, Set<E> t) {
}
}
=======================================================================
import java.util.List;
import java.util.Set;
public class SimilarityUtilities {
/**
* Returns the set of non-empty lines contained in a text, trimmed of
* leading and trailing whitespace.
*
* @param text
* @return the trimmed set of lines
*/
public static Set<String> trimmedLines(String text) {
return null;
}
/**
* Returns a list of words in the text, in the order they appeared in the text,
* converted to lowercase.
*
* Words are defined as a contiguous sequence of letters and numbers.
*
* @param text
* @return a list of lowercase words
*/
public static List<String> asLowercaseWords(String text) {
return null;
}
/**
* Returns the line-based similarity of two texts.
*
* The line-based similarity is the Jaccard index between each text's line
* set.
*
* A text's line set is the set of trimmed lines in that text, as defined by
* trimmedLines.
*
* @param text1
* a text
* @param text2
* another text
* @return
*/
public static double lineSimilarity(String text1, String text2) {
return -1.0;
}
/**
* Returns the line-based similarity of two texts.
*
* The line-based similarity is the Jaccard index between each text's line
* set.
*
* A text's line set is the set of trimmed lines in that text, as defined by
* trimmedLines, less the set of trimmed lines from the templateText. Removes
* the template text from consideration after trimming lines, not before.
*
* @param text1
* a text
* @param text2
* another text
* @param templateText
* a template, representing things the two texts have in common
* @return
*/
public static double lineSimilarity(String text1, String text2, String templateText) {
return -1.0;
}
/**
* Returns a set of strings representing the shingling of the given length
* of a list of words.
*
* A shingling of length k of a list of words is the set of all k-shingles
* of that list.
*
* A k-shingle is the concatenation of k adjacent words.
*
* For example, a 3-shingle of the list: ["a" "very" "fine" "young" "man"
* "I" "know"] is the set: {"averyfine" "veryfineyoung" "fineyoungman"
* "youngmanI" "manIknow"}.
*
* @param words
* @param shingleLength
* @return
*/
public static Set<String> shingle(List<String> words, int shingleLength) {
return null;
}
/**
* Returns the shingled word similarity of two texts.
*
* The shingled word similarity is the Jaccard index between each text's
* shingle set.
*
* A text's shingle set is the set of shingles (of the given length) for the
* entire text, as defined by shingle and asLowercaseWords,
* less the shingle set of the templateText. Removes the templateText
* from consideration after shingling, not before.
*
* @param text1
* @param text2
* @param templateText
* @param shingleLength
* @return
*/
public static double shingleSimilarity(String text1, String text2, String templateText, int shingleLength) {
return -1.0;
}
}
1. Translate written descriptions of behavior into code.
2. String.split
3. implement the shingleSimilarity method, which parallels the second lineSimilarity method
Thank you.
Explanation / Answer
SetUtilities.java
package sets;
import java.util.HashSet;
import java.util.Set;
public class SetUtilities {
/**
* Returns a new set representing the union of s and t.
*
* Does not modify s or t.
* @param s
* @param t
* @return a new set representing the union of s and t
*/
public static <E> Set<E> union(Set<E> s, Set<E> t) {
Set<E> union = new HashSet<E>(s);
for(E a : t){
if(!union.contains(a)){
union.add(a);
}
}
return union;
}
/**
* Returns a new set representing the intersection of s and t.
*
* Does not modify s or t.
* @param s
* @param t
* @return a new set representing the intersection of s and t
*/
public static <E> Set<E> intersection(Set<E> s, Set<E> t) {
Set<E> intersection = new HashSet<E>();
for(E a : t){
if(s.contains(a)){
intersection.add(a);
}
}
return intersection;
}
/**
* Returns a new set representing the set difference s and t,
* that is, s t.
*
* Does not modify s or t.
* @param s
* @param t
* @return a new set representing the difference of s and t
*/
public static <E> Set<E> setDifference(Set<E> s, Set<E> t) {
Set<E> setDifference = new HashSet<E>(s);
setDifference.removeAll(t);
return setDifference;
}
/**
* Returns the Jaccard index of the two sets s and t.
*
* It is defined as the size of the intersection of the sets,
* divided by the size of the union of the sets.
*
* It is defined as 1 if both sets are empty.
*
* Does not modify s or t.
* @param s
* @param t
* @return the Jaccard index of s and t
*/
public static <E> double jaccardIndex(Set<E> s, Set<E> t) {
if(s.isEmpty() && t.isEmpty()){
return 1;
}
double intersection = intersection(s,t).size();
double union = union(s,t).size();
return intersection/union;
}
}
SimilarityUtilities.java
package similarity;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import sets.SetUtilities;
public class SimilarityUtilities {
/**
* Returns the set of non-empty lines contained in a text, trimmed of
* leading and trailing whitespace.
*
* @param text
* @return the trimmed set of lines
*/
public static Set<String> trimmedLines(String text) {
String[] trimmed = text.split(" ");
Set<String> t = new HashSet<String>();
String trimTemp = "";
for (int i = 0; i < trimmed.length; i++) {
trimTemp = trimmed[i].trim();
if (!trimTemp.isEmpty()) {
t.add(trimTemp);
}
}
return t;
}
/**
* Returns a list of words in the text, converted to lowercase.
*
* Words are defined as a contiguous sequence of letters and numbers.
*
* @param text
* @return a list of lowercase words
*/
public static List<String> asLowercaseWords(String text) {
String textLower = text.toLowerCase();
String[] textL = textLower.split("\W+");
List<String> textF = new ArrayList<String>();
String lowerTemp = "";
for (int i = 0; i < textL.length; i++) {
lowerTemp = textL[i].trim();
if(!lowerTemp.isEmpty()){
textF.add(lowerTemp);
}
}
return textF;
}
/**
* Returns the line-based similarity of two texts.
*
* The line-based similarity is the Jaccard index between each text's line
* set.
*
* A text's line set is the set of trimmed lines in that text, as defined by
* trimmedLines.
*
* @param text1
* a text
* @param text2
* another text
* @return
*/
public static double lineSimilarity(String text1, String text2) {
Set<String> s1 = trimmedLines(text1);
Set<String> s2 = trimmedLines(text2);
double lineSimilarity = SetUtilities.jaccardIndex(s1, s2);
return lineSimilarity;
}
/**
* Returns the line-based similarity of two texts.
*
* The line-based similarity is the Jaccard index between each text's line
* set.
*
* A text's line set is the set of trimmed lines in that text, as defined by
* trimmedLines, less the set of trimmed lines from the templateText.
*
* @param text1
* a text
* @param text2
* another text
* @param templateText
* a template, representing things the two texts have in common
* @return
*/
public static double lineSimilarity(String text1, String text2, String templateText) {
Set<String> s1 = trimmedLines(text1);
Set<String> s2 = trimmedLines(text2);
Set<String> s3 = trimmedLines(templateText);
Set<String> d1 = SetUtilities.setDifference(s1, s3);
Set<String> d2 = SetUtilities.setDifference(s2, s3);
double lineSimilarity = SetUtilities.jaccardIndex(d1, d2);
return lineSimilarity;
}
/**
* Returns a set of strings representing the shingling of the given length
* of a list of words.
*
* A shingling of length k of a list of words is the set of all k-shingles
* of that list.
*
* A k-shingle is the concatenation of k adjacent words.
*
* For example, a 3-shingle of the list: ["a" "very" "fine" "young" "man"
* "I" "know"] is the set: {"averyfine" "veryfineyoung" "fineyoungman"
* "youngmanI" "manIknow"}.
*
* @param words
* @param shingleLength
* @return
*/
public static Set<String> shingle(List<String> words, int shingleLength) {
Set<String> shingle = new HashSet<String>();
for(int i = 0; i < (words.size()-shingleLength+1); i++){
String temp = "";
for(int j = i; j < i + shingleLength; j++){
temp += words.get(j);
}
shingle.add(temp);
}
return shingle;
}
/**
* Returns the shingled word similarity of two texts.
*
* The shingled word similarity is the Jaccard index between each text's
* shingle set.
*
* A text's shingle set is the set of shingles (of the given length) in that
* text, as defined by shingle and asLowercaseWords, less the shingle set of
* the templateText.
*
* @param text1
* @param text2
* @param templateText
* @param shingleLength
* @return
*/
public static double shingleSimilarity(String text1, String text2, String templateText, int shingleLength) {
List<String> s1 = asLowercaseWords(text1);
List<String> s2 = asLowercaseWords(text2);
List<String> s3 = asLowercaseWords(templateText);
Set<String> shingle1 = shingle(s1,shingleLength);
Set<String> shingle2 = shingle(s2,shingleLength);
Set<String> shingle3 = shingle(s3,shingleLength);
Set<String> d1 = SetUtilities.setDifference(shingle1, shingle3);
Set<String> d2 = SetUtilities.setDifference(shingle2, shingle3);
double shingleSimilarity = SetUtilities.jaccardIndex(d1, d2);
return shingleSimilarity;
}
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.