Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

import java.util.Set; public class SetUtilities { /** * Returns a new set repres

ID: 3590748 • Letter: I

Question

import java.util.Set;

public class SetUtilities {

   /**

   * Returns a new set representing the union of s and t.

   *

   * Does not modify s or t.

   * @param s

   * @param t

   * @return a new set representing the union of s and t

   */

   public static <E> Set<E> union(Set<E> s, Set<E> t) {

   }

   /**

   * Returns a new set representing the intersection of s and t.

   *

   * Does not modify s or t.

   * @param s

   * @param t

   * @return a new set representing the intersection of s and t

   */

   public static <E> Set<E> intersection(Set<E> s, Set<E> t) {

   }

   /**

   * Returns a new set representing the set difference s and t,

   * that is, s t.

   *

   * Does not modify s or t.

   * @param s

   * @param t

   * @return a new set representing the difference of s and t

   */

   public static <E> Set<E> setDifference(Set<E> s, Set<E> t) {

   }

  

   /**

   * Returns the Jaccard index of the two sets s and t.

   *

   * It is defined as 1 if both sets are empty.

   *

   * Otherwise, it is defined as the size of the intersection of the sets,

   * divided by the size of the union of the sets.

   *

   * Does not modify s or t.

   *

   * @param s

   * @param t

   * @return the Jaccard index of s and t

   */

   public static <E> double jaccardIndex(Set<E> s, Set<E> t) {

   }

}
=======================================================================

import java.util.List;

import java.util.Set;

public class SimilarityUtilities {

/**

* Returns the set of non-empty lines contained in a text, trimmed of

* leading and trailing whitespace.

*

* @param text

* @return the trimmed set of lines

*/

public static Set<String> trimmedLines(String text) {

return null;

}

/**

* Returns a list of words in the text, in the order they appeared in the text,

* converted to lowercase.

*

* Words are defined as a contiguous sequence of letters and numbers.

*

* @param text

* @return a list of lowercase words

*/

public static List<String> asLowercaseWords(String text) {

return null;

}

/**

* Returns the line-based similarity of two texts.

*

* The line-based similarity is the Jaccard index between each text's line

* set.

*

* A text's line set is the set of trimmed lines in that text, as defined by

* trimmedLines.

*

* @param text1

* a text

* @param text2

* another text

* @return

*/

public static double lineSimilarity(String text1, String text2) {

return -1.0;

}

/**

* Returns the line-based similarity of two texts.

*

* The line-based similarity is the Jaccard index between each text's line

* set.

*

* A text's line set is the set of trimmed lines in that text, as defined by

* trimmedLines, less the set of trimmed lines from the templateText. Removes

* the template text from consideration after trimming lines, not before.

*

* @param text1

* a text

* @param text2

* another text

* @param templateText

* a template, representing things the two texts have in common

* @return

*/

public static double lineSimilarity(String text1, String text2, String templateText) {

return -1.0;

}

/**

* Returns a set of strings representing the shingling of the given length

* of a list of words.

*

* A shingling of length k of a list of words is the set of all k-shingles

* of that list.

*

* A k-shingle is the concatenation of k adjacent words.

*

* For example, a 3-shingle of the list: ["a" "very" "fine" "young" "man"

* "I" "know"] is the set: {"averyfine" "veryfineyoung" "fineyoungman"

* "youngmanI" "manIknow"}.

*

* @param words

* @param shingleLength

* @return

*/

public static Set<String> shingle(List<String> words, int shingleLength) {

return null;

}

/**

* Returns the shingled word similarity of two texts.

*

* The shingled word similarity is the Jaccard index between each text's

* shingle set.

*

* A text's shingle set is the set of shingles (of the given length) for the

* entire text, as defined by shingle and asLowercaseWords,

* less the shingle set of the templateText. Removes the templateText

* from consideration after shingling, not before.

*

* @param text1

* @param text2

* @param templateText

* @param shingleLength

* @return

*/

public static double shingleSimilarity(String text1, String text2, String templateText, int shingleLength) {

return -1.0;

}

}


1. Translate written descriptions of behavior into code.
2. String.split
3. implement the shingleSimilarity method, which parallels the second lineSimilarity method

Thank you.

Explanation / Answer

SetUtilities.java

package sets;

import java.util.HashSet;
import java.util.Set;

public class SetUtilities {
   /**
   * Returns a new set representing the union of s and t.
   *
   * Does not modify s or t.
   * @param s
   * @param t
   * @return a new set representing the union of s and t
   */
   public static <E> Set<E> union(Set<E> s, Set<E> t) {
       Set<E> union = new HashSet<E>(s);
       for(E a : t){
           if(!union.contains(a)){
               union.add(a);
           }
       }
       return union;
   }

   /**
   * Returns a new set representing the intersection of s and t.
   *
   * Does not modify s or t.
   * @param s
   * @param t
   * @return a new set representing the intersection of s and t
   */
   public static <E> Set<E> intersection(Set<E> s, Set<E> t) {
       Set<E> intersection = new HashSet<E>();
       for(E a : t){
           if(s.contains(a)){
               intersection.add(a);
           }
       }
       return intersection;
   }

   /**
   * Returns a new set representing the set difference s and t,
   * that is, s t.
   *
   * Does not modify s or t.
   * @param s
   * @param t
   * @return a new set representing the difference of s and t
   */
   public static <E> Set<E> setDifference(Set<E> s, Set<E> t) {
       Set<E> setDifference = new HashSet<E>(s);
       setDifference.removeAll(t);
       return setDifference;
      
   }
  
   /**
   * Returns the Jaccard index of the two sets s and t.
   *
   * It is defined as the size of the intersection of the sets,
   * divided by the size of the union of the sets.
   *
   * It is defined as 1 if both sets are empty.
   *
   * Does not modify s or t.
   * @param s
   * @param t
   * @return the Jaccard index of s and t
   */
   public static <E> double jaccardIndex(Set<E> s, Set<E> t) {
       if(s.isEmpty() && t.isEmpty()){
           return 1;
       }
       double intersection = intersection(s,t).size();
       double union = union(s,t).size();
       return intersection/union;
   }
}


SimilarityUtilities.java

package similarity;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import sets.SetUtilities;

public class SimilarityUtilities {
   /**
   * Returns the set of non-empty lines contained in a text, trimmed of
   * leading and trailing whitespace.
   *
   * @param text
   * @return the trimmed set of lines
   */
   public static Set<String> trimmedLines(String text) {
       String[] trimmed = text.split(" ");
       Set<String> t = new HashSet<String>();
       String trimTemp = "";
       for (int i = 0; i < trimmed.length; i++) {
           trimTemp = trimmed[i].trim();
           if (!trimTemp.isEmpty()) {
               t.add(trimTemp);
           }

       }
       return t;
   }

   /**
   * Returns a list of words in the text, converted to lowercase.
   *
   * Words are defined as a contiguous sequence of letters and numbers.
   *
   * @param text
   * @return a list of lowercase words
   */
   public static List<String> asLowercaseWords(String text) {
       String textLower = text.toLowerCase();
       String[] textL = textLower.split("\W+");
       List<String> textF = new ArrayList<String>();
       String lowerTemp = "";
       for (int i = 0; i < textL.length; i++) {
           lowerTemp = textL[i].trim();
           if(!lowerTemp.isEmpty()){
               textF.add(lowerTemp);
           }
       }
       return textF;
   }

   /**
   * Returns the line-based similarity of two texts.
   *
   * The line-based similarity is the Jaccard index between each text's line
   * set.
   *
   * A text's line set is the set of trimmed lines in that text, as defined by
   * trimmedLines.
   *
   * @param text1
   *            a text
   * @param text2
   *            another text
   * @return
   */
   public static double lineSimilarity(String text1, String text2) {
       Set<String> s1 = trimmedLines(text1);
       Set<String> s2 = trimmedLines(text2);
       double lineSimilarity = SetUtilities.jaccardIndex(s1, s2);
       return lineSimilarity;
   }

   /**
   * Returns the line-based similarity of two texts.
   *
   * The line-based similarity is the Jaccard index between each text's line
   * set.
   *
   * A text's line set is the set of trimmed lines in that text, as defined by
   * trimmedLines, less the set of trimmed lines from the templateText.
   *
   * @param text1
   *            a text
   * @param text2
   *            another text
   * @param templateText
   *            a template, representing things the two texts have in common
   * @return
   */
   public static double lineSimilarity(String text1, String text2, String templateText) {
       Set<String> s1 = trimmedLines(text1);
       Set<String> s2 = trimmedLines(text2);
       Set<String> s3 = trimmedLines(templateText);
       Set<String> d1 = SetUtilities.setDifference(s1, s3);
       Set<String> d2 = SetUtilities.setDifference(s2, s3);
       double lineSimilarity = SetUtilities.jaccardIndex(d1, d2);
       return lineSimilarity;
   }

   /**
   * Returns a set of strings representing the shingling of the given length
   * of a list of words.
   *
   * A shingling of length k of a list of words is the set of all k-shingles
   * of that list.
   *
   * A k-shingle is the concatenation of k adjacent words.
   *
   * For example, a 3-shingle of the list: ["a" "very" "fine" "young" "man"
   * "I" "know"] is the set: {"averyfine" "veryfineyoung" "fineyoungman"
   * "youngmanI" "manIknow"}.
   *
   * @param words
   * @param shingleLength
   * @return
   */
   public static Set<String> shingle(List<String> words, int shingleLength) {
       Set<String> shingle = new HashSet<String>();
       for(int i = 0; i < (words.size()-shingleLength+1); i++){
           String temp = "";
           for(int j = i; j < i + shingleLength; j++){
               temp += words.get(j);
           }
           shingle.add(temp);
       }
       return shingle;
   }

   /**
   * Returns the shingled word similarity of two texts.
   *
   * The shingled word similarity is the Jaccard index between each text's
   * shingle set.
   *
   * A text's shingle set is the set of shingles (of the given length) in that
   * text, as defined by shingle and asLowercaseWords, less the shingle set of
   * the templateText.
   *
   * @param text1
   * @param text2
   * @param templateText
   * @param shingleLength
   * @return
   */
   public static double shingleSimilarity(String text1, String text2, String templateText, int shingleLength) {
       List<String> s1 = asLowercaseWords(text1);
       List<String> s2 = asLowercaseWords(text2);
      
       List<String> s3 = asLowercaseWords(templateText);
       Set<String> shingle1 = shingle(s1,shingleLength);
       Set<String> shingle2 = shingle(s2,shingleLength);
       Set<String> shingle3 = shingle(s3,shingleLength);
      
       Set<String> d1 = SetUtilities.setDifference(shingle1, shingle3);
       Set<String> d2 = SetUtilities.setDifference(shingle2, shingle3);
      
       double shingleSimilarity = SetUtilities.jaccardIndex(d1, d2);
       return shingleSimilarity;
      
   }
}