1. Consider the process of building a decision-tree-based classifier using Entro
ID: 3792808 • Letter: 1
Question
1. Consider the process of building a decision-tree-based classifier using Entropy as a measure of impurity associated with a tree node that represents a subset of training examples. A node is split into partitions represented by its child nodes based on the values of a selected attribute. While considering ordinal attributes as the basis for splitting, both multiway splitting and two-way splitting are examined.
The information gain of the attribute, is estimated in terms of the difference between the impurity of the parent node and the weighted sum of the impurities of the child nodes. The goodness of the attribute for the split, referred to as the gain-ratio of the split, can be computed as the ratio of information gain to the information associated with the split.
Consider the training set of examples described in terms of an ordinal attribute A1 and a binary attribute A2 in addition to the distribution of examples in two classes, C1 and C2.
Each row of the table describes a subset of examples in terms of the two attributes A1 and A2 and indicates the number of examples among the subset that fall into class C1 and class C2. The classifier aims at classifying the training examples into one of the two classes.
Estimate the goodness of the attributes for both multiway as well as two-way splitting with equal number of distinct values along each branch, in the form of the gain-ratio metric, for the training set given in the table. Which of the following is TRUE about the goodness of the attributes for a split.
A1
A2
C1
C2
Low
Yes
2
0
Medium
Yes
56
5
High
Yes
2
16
Very-High
Yes
6
24
Low
No
6
2
Medium
No
8
3
High
No
4
16
Very-High
No
2
8
A1
A2
C1
C2
Low
Yes
2
0
Medium
Yes
56
5
High
Yes
2
16
Very-High
Yes
6
24
Low
No
6
2
Medium
No
8
3
High
No
4
16
Very-High
No
2
8
Explanation / Answer
Entropy may be calculated in the following way:
import java.util.ArrayList;
public class Entropy {
public static double calculateEntropy(ArrayList<Record> data) {
double entropy = 0;
if(data.size() == 0) {
// nothing to do
return 0;
}
for(int i = 0; i < Hw1.setSize("PlayTennis"); i++) {
int count = 0;
for(int j = 0; j < data.size(); j++) {
Record record = data.get(j);
if(record.getAttributes().get(4).getValue() == i) {
count++;
}
}
double probability = count / (double)data.size();
if(count > 0) {
entropy += -probability * (Math.log(probability) / Math.log(2));
}
}
return entropy;
}
public static double calculateGain(double rootEntropy, ArrayList<Double> subEntropies, ArrayList<Integer> setSizes, int data) {
double gain = rootEntropy;
for(int i = 0; i < subEntropies.size(); i++) {
gain += -((setSizes.get(i) / (double)data) * subEntropies.get(i));
}
return gain;
}
}
**************************************************
Class 2: Tree.java
import java.io.*;
import java.util.*;
public class Tree {
public Node buildTree(ArrayList<Record> records, Node root, LearningSet learningSet) {
int bestAttribute = -1;
double bestGain = 0;
root.setEntropy(Entropy.calculateEntropy(root.getData()));
if(root.getEntropy() == 0) {
return root;
}
for(int i = 0; i < Hw1.NUM_ATTRS - 2; i++) {
if(!Hw1.isAttributeUsed(i)) {
double entropy = 0;
ArrayList<Double> entropies = new ArrayList<Double>();
ArrayList<Integer> setSizes = new ArrayList<Integer>();
for(int j = 0; j < Hw1.NUM_ATTRS - 2; j++) {
ArrayList<Record> subset = subset(root, i, j);
setSizes.add(subset.size());
if(subset.size() != 0) {
entropy = Entropy.calculateEntropy(subset);
entropies.add(entropy);
}
}
double gain = Entropy.calculateGain(root.getEntropy(), entropies, setSizes, root.getData().size());
if(gain > bestGain) {
bestAttribute = i;
bestGain = gain;
}
}
}
if(bestAttribute != -1) {
int setSize = Hw1.setSize(Hw1.attrMap.get(bestAttribute));
root.setTestAttribute(new DiscreteAttribute(Hw1.attrMap.get(bestAttribute), 0));
root.children = new Node[setSize];
root.setUsed(true);
Hw1.usedAttributes.add(bestAttribute);
for (int j = 0; j< setSize; j++) {
root.children[j] = new Node();
root.children[j].setParent(root);
root.children[j].setData(subset(root, bestAttribute, j));
root.children[j].getTestAttribute().setName(Hw1.getLeafNames(bestAttribute, j));
root.children[j].getTestAttribute().setValue(j);
}
for (int j = 0; j < setSize; j++) {
buildTree(root.children[j].getData(), root.children[j], learningSet);
}
root.setData(null);
}
else {
return root;
}
return root;
}
public ArrayList<Record> subset(Node root, int attr, int value) {
ArrayList<Record> subset = new ArrayList<Record>();
for(int i = 0; i < root.getData().size(); i++) {
Record record = root.getData().get(i);
if(record.getAttributes().get(attr).getValue() == value) {
subset.add(record);
}
}
return subset;
}
public double calculateSurrogates(ArrayList<Record> records) {
return 0;
}
}
****************************************************************88
class 3: Main driver class
import java.util.*;
public class Hw1 {
public static int NUM_ATTRS = 6;
public static ArrayList<String> attrMap;
public static ArrayList<Integer> usedAttributes = new ArrayList<Integer>();
public static void main(String[] args) {
populateAttrMap();
Tree t = new Tree();
ArrayList<Record> records;
LearningSet learningSet = new LearningSet();
// read in all our data
records = FileReader.buildRecords();
Node root = new Node();
for(Record record : records) {
root.getData().add(record);
}
t.buildTree(records, root, learningSet);
traverseTree(records.get(12), root);
return;
}
public static void traverseTree(Record r, Node root) {
while(root.children != null) {
double nodeValue = 0;
for(int i = 0; i < r.getAttributes().size(); i++) {
if(r.getAttributes().get(i).getName().equalsIgnoreCase(root.getTestAttribute().getName())) {
nodeValue = r.getAttributes().get(i).getValue();
break;
}
}
for(int i = 0; i < root.getChildren().length; i++) {
if(nodeValue == root.children[i].getTestAttribute().getValue()) {
traverseTree(r, root.children[i]);
}
}
}
System.out.print("Prediction for Play Tennis: ");
if(root.getTestAttribute().getValue() == 0) {
System.out.println("No");
}
else if(root.getTestAttribute().getValue() == 0) {
System.out.println("Yes");
}
return;
}
public static boolean isAttributeUsed(int attribute) {
if(usedAttributes.contains(attribute)) {
return true;
}
else {
return false;
}
}
public static int setSize(String set) {
if(set.equalsIgnoreCase("Outlook")) {
return 3;
}
else if(set.equalsIgnoreCase("Wind")) {
return 2;
}
else if(set.equalsIgnoreCase("Temperature")) {
return 3;
}
else if(set.equalsIgnoreCase("Humidity")) {
return 2;
}
else if(set.equalsIgnoreCase("PlayTennis")) {
return 2;
}
return 0;
}
public static String getLeafNames(int attributeNum, int valueNum) {
if(attributeNum == 0) {
if(valueNum == 0) {
return "Sunny";
}
else if(valueNum == 1) {
return "Overcast";
}
else if(valueNum == 2) {
return "Rain";
}
}
else if(attributeNum == 1) {
if(valueNum == 0) {
return "Hot";
}
else if(valueNum == 1) {
return "Mild";
}
else if(valueNum == 2) {
return "Cool";
}
}
else if(attributeNum == 2) {
if(valueNum == 0) {
return "High";
}
else if(valueNum == 1) {
return "Normal";
}
}
else if(attributeNum == 3) {
if(valueNum == 0) {
return "Weak";
}
else if(valueNum == 1) {
return "Strong";
}
}
return null;
}
public static void populateAttrMap() {
attrMap = new ArrayList<String>();
attrMap.add("Outlook");
attrMap.add("Temperature");
attrMap.add("Humidity");
attrMap.add("Wind");
attrMap.add("PlayTennis");
}
}
***********************************************************************
Class 4: Node.java - to hold the information of tree
import java.util.*;
public class Node {
private Node parent;
public Node[] children;
private ArrayList<Record> data;
private double entropy;
private boolean isUsed;
private DiscreteAttribute testAttribute;
public Node() {
this.data = new ArrayList<Record>();
setEntropy(0.0);
setParent(null);
setChildren(null);
setUsed(false);
setTestAttribute(new DiscreteAttribute("", 0));
}
public void setParent(Node parent) {
this.parent = parent;
}
public Node getParent() {
return parent;
}
public void setData(ArrayList<Record> data) {
this.data = data;
}
public ArrayList<Record> getData() {
return data;
}
public void setEntropy(double entropy) {
this.entropy = entropy;
}
public double getEntropy() {
return entropy;
}
public void setChildren(Node[] children) {
this.children = children;
}
public Node[] getChildren() {
return children;
}
public void setUsed(boolean isUsed) {
this.isUsed = isUsed;
}
public boolean isUsed() {
return isUsed;
}
public void setTestAttribute(DiscreteAttribute testAttribute) {
this.testAttribute = testAttribute;
}
public DiscreteAttribute getTestAttribute() {
return testAttribute;
}
}
*************************************************
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.