Tweets Early sentiment analysis work6 included the collection of a set of tweets
ID: 3906614 • Letter: T
Question
Tweets Early sentiment analysis work6 included the collection of a set of tweets, some for learning a machine learning model for sentiment analysis, and some for evaluating how good that model is. We’ll be using that same data; it includes the following information for each tweet:7
• the gold polarity of the tweet (0 = negative, 2 = neutral, 4 = positive, = not given)
• the id of the tweet (2087)
• the date of the tweet (Sat May 16 23:58:44 UTC 2009)
You’ll be basically implementing a simple keyword-based method for sentiment analysis of tweets, counting up the numbers of positive and negative words in a tweet to determine the predicted polarity of the tweet. (This differs from the gold polarity, which is what has been decided as the true polarity of the tweet; you’re going to try to see how well you can predict it based on the content of the tweet.) T1 You will choose approprate representations for the Tweet class. You may or may not choose to base it on other classes I’ve supplied (Vertex, VertexIDList). Material from weeks 9–11 of lectures will be particularly relevant in helping you decide. You’ll need to write a constructor based on your chosen representation that instantiates an empty tweet.
import java.io.IOException;
import java.io.Reader;
import java.io.BufferedReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import org.apache.commons.csv.*;
import org.junit.Test;
public class TweetCollection {
// TODO: add appropriate data types
public TweetCollection() {
// Constructor
// TODO
}
/*
* functions for accessing individual tweets
*/
public Tweet getTweetByID (String ID) {
// PRE: -
// POST: Returns the Tweet object that with tweet ID
// TODO
return null;
}
public Integer numTweets() {
// PRE: -
// POST: Returns the number of tweets in this collection
// TODO
return null;
}
/*
* functions for accessing sentiment words
*/
public Polarity getBasicSentimentWordPolarity(String w) {
// PRE: w not null, basic sentiment words already read in from file
// POST: Returns polarity of w
// TODO
return null;
}
public Polarity getFinegrainedSentimentWordPolarity(String w) {
// PRE: w not null, finegrained sentiment words already read in from file
// POST: Returns polarity of w
// TODO
return null;
}
public Strength getFinegrainedSentimentWordStrength(String w) {
// PRE: w not null, finegrained sentiment words already read in from file
// POST: Returns strength of w
// TODO
return null;
}
/*
* functions for reading in tweets
*
*/
public void ingestTweetsFromFile(String fInName) throws IOException {
// PRE: -
// POST: Reads tweets from .csv file, stores in data structure
// NOTES
// Data source, file format description at http://help.sentiment140.com/for-students
// Using apache csv reader: https://www.callicoder.com/java-read-write-csv-file-apache-commons-csv/
try (
Reader reader = Files.newBufferedReader(Paths.get(fInName));
CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
) {
Iterable<CSVRecord> csvRecords = csvParser.getRecords();
for (CSVRecord csvRecord : csvRecords) {
// Accessing Values by Column Index
Tweet tw = new Tweet(csvRecord.get(0), // gold polarity
csvRecord.get(1), // ID
csvRecord.get(2), // date
csvRecord.get(4), // user
csvRecord.get(5)); // text
// TODO: insert tweet tw into appropriate data type
}
}
}
/*
* functions for sentiment words
*/
public void importBasicSentimentWordsFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and store basic sentiment words in appropriate data type
// TODO
}
public void importFinegrainedSentimentWordsFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and store finegrained sentiment words in appropriate data type
// TODO
}
public Boolean isBasicSentWord (String w) {
// PRE: Basic sentiment words have been read in and stored
// POST: Returns true if w is a basic sentiment word, false otherwise
// TODO
return null;
}
public Boolean isFinegrainedSentWord (String w) {
// PRE: Finegrained sentiment words have been read in and stored
// POST: Returns true if w is a finegrained sentiment word, false otherwise
// TODO
return null;
}
public void predictTweetSentimentFromBasicWordlist () {
// PRE: Finegrained word sentiment already imported
// POST: For all tweets in collection, tweet annotated with predicted sentiment
// based on count of sentiment words in sentWords
// TODO
}
public void predictTweetSentimentFromFinegrainedWordlist (Integer strongWeight, Integer weakWeight) {
// PRE: Finegrained word sentiment already imported
// POST: For all tweets in v, tweet annotated with predicted sentiment
// based on count of sentiment words in sentWords
// TODO
}
/*
* functions for inverse index
*
*/
public Map<String, Vector<String>> importInverseIndexFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and returned contents of file as inverse index
// invIndex has words w as key, IDs of tweets that contain w as value
// TODO
return null;
}
/*
* functions for graph construction
*/
public void constructSharedWordGraph(Map<String, Vector<String>> invIndex) {
// PRE: invIndex has words w as key, IDs of tweets that contain w as value
// POST: Graph constructed, with tweets as vertices,
// and edges between them if they share a word
// TODO
}
public Integer numConnectedComponents() {
// PRE: -
// POST: Returns the number of connected components
// TODO
return null;
}
public void annotateConnectedComponents() {
// PRE: -
// POST: Annotates graph so that it is partitioned into components
// TODO
}
public Integer componentSentLabelCount(String ID, Polarity p) {
// PRE: Graph components are identified, ID is a valid tweet
// POST: Returns count of labels corresponding to Polarity p in component containing ID
// TODO
return null;
}
public void propagateLabelAcrossComponent(String ID, Polarity p, Boolean keepPred) {
// PRE: ID is a tweet id in the graph
// POST: Labels tweets in component with predicted polarity p
// (if keepPred == T, only tweets w pred polarity None; otherwise all tweets
// TODO
}
public void propagateMajorityLabelAcrossComponents(Boolean keepPred) {
// PRE: Components are identified
// POST: Tweets in each component are labelled with the majority sentiment for that component
// Majority label is defined as whichever of POS or NEG has the larger count;
// if POS and NEG are both zero, majority label is NONE
// otherwise, majority label is NEUT
// If keepPred is True, only tweets with predicted label None are labelled in this way
// otherwise, all tweets in the component are labelled in this way
// TODO
}
/*
* functions for evaluation
*/
public Double accuracy () {
// PRE: -
// POST: Calculates and returns accuracy of labelling
// TODO
return null;
}
public Double coverage () {
// PRE: -
// POST: Calculates and returns coverage of labelling
// TODO
return null;
}
public static void main(String[] args) {
}
}
Explanation / Answer
*********************************
Tweet.java
public class Tweet {
public Tweet(int gold_polarity, String t_ID, String t_date, String t_user, String t_text) {
this.gold_polarity = gold_polarity;
this.t_ID = t_ID;
this.t_date = t_date;
this.t_user = t_user;
this.t_text = t_text;
}
public int getGold_polarity() {
return gold_polarity;
}
public void setGold_polarity(int gold_polarity) {
this.gold_polarity = gold_polarity;
}
public String getT_ID() {
return t_ID;
}
public void setT_ID(String t_ID) {
this.t_ID = t_ID;
}
public String getT_date() {
return t_date;
}
public void setT_date(String t_date) {
this.t_date = t_date;
}
public String getT_user() {
return t_user;
}
public void setT_user(String t_user) {
this.t_user = t_user;
}
public String getT_text() {
return t_text;
}
public void setT_text(String t_text) {
this.t_text = t_text;
}
}
*********************************
SentimentWord.java
public class SentimentWord {
String word;
int polarity, strength;
public SentimentWord(String word, int polarity, int strength) {
this.word = word;
this.polarity = polarity;
this.strength = strength;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public int getPolarity() {
return polarity;
}
public void setPolarity(int polarity) {
this.polarity = polarity;
}
public int getStrength() {
return strength;
}
public void setStrength(int strength) {
this.strength = strength;
}
}
*********************************
TweetCollection.java
import java.io.IOException;
import java.io.Reader;
import java.io.BufferedReader;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.*;
import org.apache.commons.csv.*;
import org.junit.Test;
public class TweetCollection {
// TODO: add appropriate data types
ArrayList<Tweet> arr_tweets;
ArrayList<SentimentWord> arr_basic_sentiment;
ArrayList<SentimentWord> arr_finegrained_sentiment;
enum Polarity{
negative(0), neutral(2), positive(4), not_given(-1);
private int value;
private Polarity(int value) {
this.value = value;
}
}
enum Strength{
dual,
binary,
trinary,
scale
}
public TweetCollection() {
// Constructor
// TODO
arr_tweets = new ArrayList<>();
arr_basic_sentiment = new ArrayList<>();
arr_finegrained_sentiment = new ArrayList<>();
}
/*
* functions for accessing individual tweets
*/
public Tweet getTweetByID (String ID) {
// PRE: -
// POST: Returns the Tweet object that with tweet ID
// TODO
for(int i=0; i<arr_tweets.size(); i++) {
if(arr_tweets.get(i).getT_ID().equals(ID))
return arr_tweets.get(i);
}
return null;
}
public Integer numTweets() {
// PRE: -
// POST: Returns the number of tweets in this collection
// TODO
return arr_tweets.size();
}
/*
* functions for accessing sentiment words
*/
public Polarity getBasicSentimentWordPolarity(String w) {
// PRE: w not null, basic sentiment words already read in from file
// POST: Returns polarity of w
// TODO
for(int i=0; i<arr_basic_sentiment.size(); i++) {
if(arr_basic_sentiment.get(i).getWord().equals(w)) {
int pol = arr_basic_sentiment.get(i).getPolarity();
if(pol > 0)
return Polarity.positive;
else if(pol == 0)
return Polarity.neutral;
else
return Polarity.negative;
}
}
return Polarity.not_given;
}
public Polarity getFinegrainedSentimentWordPolarity(String w) {
// PRE: w not null, finegrained sentiment words already read in from file
// POST: Returns polarity of w
// TODO
for(int i=0; i<arr_finegrained_sentiment.size(); i++) {
if(arr_finegrained_sentiment.get(i).getWord().equals(w)) {
int pol = arr_finegrained_sentiment.get(i).getPolarity();
if(pol > 0)
return Polarity.positive;
else if(pol == 0)
return Polarity.neutral;
else
return Polarity.negative;
}
}
return Polarity.not_given;
}
public Strength getFinegrainedSentimentWordStrength(String w) {
// PRE: w not null, finegrained sentiment words already read in from file
// POST: Returns strength of w
// TODO
return null;
}
/*
* functions for reading in tweets
*
*/
public void ingestTweetsFromFile(String fInName) throws IOException {
// PRE: -
// POST: Reads tweets from .csv file, stores in data structure
// NOTES
// Data source, file format description at http://help.sentiment140.com/for-students
// Using apache csv reader: https://www.callicoder.com/java-read-write-csv-file-apache-commons-csv/
try (
Reader reader = Files.newBufferedReader(Paths.get(fInName));
CSVParser csvParser = new CSVParser(reader, CSVFormat.DEFAULT);
) {
Iterable<CSVRecord> csvRecords = csvParser.getRecords();
for (CSVRecord csvRecord : csvRecords) {
// Accessing Values by Column Index
Tweet tw = new Tweet(csvRecord.get(0), // gold polarity
csvRecord.get(1), // ID
csvRecord.get(2), // date
csvRecord.get(4), // user
csvRecord.get(5)); // text
arr_tweets.add(tw);
}
}
}
/*
* functions for sentiment words
*/
public void importBasicSentimentWordsFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and store basic sentiment words in appropriate data type
// TODO
}
public void importFinegrainedSentimentWordsFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and store finegrained sentiment words in appropriate data type
// TODO
}
public Boolean isBasicSentWord (String w) {
// PRE: Basic sentiment words have been read in and stored
// POST: Returns true if w is a basic sentiment word, false otherwise
// TODO
for(int i=0; i<arr_basic_sentiment.size(); i++) {
if(arr_basic_sentiment.get(i).getWord().equals(w))
return true;
}
return false;
}
public Boolean isFinegrainedSentWord (String w) {
// PRE: Finegrained sentiment words have been read in and stored
// POST: Returns true if w is a finegrained sentiment word, false otherwise
// TODO
for(int i=0; i<arr_finegrained_sentiment.size(); i++) {
if(arr_finegrained_sentiment.get(i).getWord().equals(w))
return true;
}
return false;
}
public void predictTweetSentimentFromBasicWordlist () {
// PRE: Finegrained word sentiment already imported
// POST: For all tweets in collection, tweet annotated with predicted sentiment
// based on count of sentiment words in sentWords
// TODO
}
public void predictTweetSentimentFromFinegrainedWordlist (Integer strongWeight, Integer weakWeight) {
// PRE: Finegrained word sentiment already imported
// POST: For all tweets in v, tweet annotated with predicted sentiment
// based on count of sentiment words in sentWords
// TODO
}
/*
* functions for inverse index
*
*/
public Map<String, Vector<String>> importInverseIndexFromFile (String fInName) throws IOException {
// PRE: -
// POST: Read in and returned contents of file as inverse index
// invIndex has words w as key, IDs of tweets that contain w as value
// TODO
return null;
}
/*
* functions for graph construction
*/
public void constructSharedWordGraph(Map<String, Vector<String>> invIndex) {
// PRE: invIndex has words w as key, IDs of tweets that contain w as value
// POST: Graph constructed, with tweets as vertices,
// and edges between them if they share a word
// TODO
}
public Integer numConnectedComponents() {
// PRE: -
// POST: Returns the number of connected components
// TODO
return null;
}
public void annotateConnectedComponents() {
// PRE: -
// POST: Annotates graph so that it is partitioned into components
// TODO
}
public Integer componentSentLabelCount(String ID, Polarity p) {
// PRE: Graph components are identified, ID is a valid tweet
// POST: Returns count of labels corresponding to Polarity p in component containing ID
// TODO
return null;
}
public void propagateLabelAcrossComponent(String ID, Polarity p, Boolean keepPred) {
// PRE: ID is a tweet id in the graph
// POST: Labels tweets in component with predicted polarity p
// (if keepPred == T, only tweets w pred polarity None; otherwise all tweets
// TODO
}
public void propagateMajorityLabelAcrossComponents(Boolean keepPred) {
// PRE: Components are identified
// POST: Tweets in each component are labelled with the majority sentiment for that component
// Majority label is defined as whichever of POS or NEG has the larger count;
// if POS and NEG are both zero, majority label is NONE
// otherwise, majority label is NEUT
// If keepPred is True, only tweets with predicted label None are labelled in this way
// otherwise, all tweets in the component are labelled in this way
// TODO
}
/*
* functions for evaluation
*/
public Double accuracy () {
// PRE: -
// POST: Calculates and returns accuracy of labelling
// TODO
return null;
}
public Double coverage () {
// PRE: -
// POST: Calculates and returns coverage of labelling
// TODO
return null;
}
public static void main(String[] args) {
}
}
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.