Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Keep Getting This Error Traceback (most recent call last): File \"C:/Users/davis

ID: 3709928 • Letter: K

Question

Keep Getting This Error

Traceback (most recent call last):
File "C:/Users/davis/AppData/Local/Programs/Python/Python36-32/SeniorProject.py", line 13, in <module>
dataset["HomeWin"] = dataset["VisitorPts"] < dataset["HomePts"]
File "C:UsersdavisAppDataLocalProgramsPythonPython36-32libsite-packagespandascoreops.py", line 837, in wrapper
return self._constructor(na_op(self.values, other.values),
File "C:UsersdavisAppDataLocalProgramsPythonPython36-32libsite-packagespandascoreops.py", line 792, in na_op
raise TypeError("invalid type comparison")
TypeError: invalid type comparison

Code

import pandas as pd
import numpy as np

dataset = pd.read_csv("seniorproject.csv", parse_dates=[1],
skiprows=[0,])
print(dataset)
dataset.columns = ["Visitor Team","VisitorPts","Home Team","HomePts","Score Type","Team","Rk","Home Team Win"]
#dataset.columns = ["Visitor Team", "VisitorPts", "Home Team", "HomePts"]

dataset.ix[:5]

dataset["HomeWin"] = dataset["VisitorPts"] < dataset["HomePts"]
y_true = dataset["HomeWin"].values

from collections import defaultdict

won_last = defaultdict(int)

dict_variable = {}

for index, row in dataset.iterrows():
# remove two print lines below if you don't need them
# I have included them to show the output on terminal
#print(index)
#print(row)
home_team = row["Home Team"]
visitor_team = row["Visitor Team"]
row["HomeLastWin"] = won_last[home_team]
row["VisitorLastWin"] = won_last[visitor_team]
dataset.ix[index] = row
won_last[home_team] = row["HomeWin"]
won_last[visitor_team] = not row["HomeWin"]
  
dataset["Home Last Win"] = False
dataset["Visitor Last Win"] = False
from collections import defaultdict
won_last = defaultdict(int)
for index, row in dataset.iterrows():
home_team = row["Home Team"]
visitor_team = row["Visitor Team"]
row["Home Last Win"] = won_last[home_team]
row["Visitor Last Win"] = won_last[visitor_team]
dataset.ix[index] = row
#We then set our dictionary with the each team's result (from this row) for the next
#time we see these teams.
#Set current Win
won_last[home_team] = row["Home Team Win"]
won_last[visitor_team] = not row["Home Team Win"]

  
dataset["Home Win Streak"] = 0
dataset["Visitor Win Streak"] = 0
win_streak = defaultdict(int)

for index, row in dataset.iterrows():
home_team = row["Home Team"]
visitor_team = row["Visitor Team"]
row["Home Win Streak"] = win_streak[home_team]
row["Visitor Win Streak"] = win_streak[visitor_team]
dataset.ix[index] = row   
# Set current win
if row["Home Team Win"]:
win_streak[home_team] += 1
win_streak[visitor_team] = 0
else:
win_streak[home_team] = 0
win_streak[visitor_team] += 1

dataset["Home Team Ranks Higher"] = 0
for index , row in dataset.iterrows():
home_team = row["Home Team"]
visitor_team = row["Visitor Team"]
home_rank = dataset[dataset["Team"] == home_team]["Rk"].values
visitor_rank = dataset[dataset["Team"] == visitor_team]["Rk"].values
row["Home Team Rank Higher"] = (home_rank > visitor_rank)
dataset.ix[index] = row

last_match_winner = defaultdict(int)
dataset["Home Team Won Last"] = 0
for index , row in dataset.iterrows():
home_team = row["Home Team"]
visitor_team = row["Visitor Team"]
teams = tuple(sorted([home_team, visitor_team]))
  
row["Home Team Won Last"] = 1 if last_match_winner[teams] == row["Home Team"] else 0
dataset.ix[index] = row
# Who won this one?
winner = row["Home Team"] if row["Home Team Win"] else row["Visitor Team"]
last_match_winner[teams] = winner
  
X_features_only = dataset[['Home Win Streak', 'Visitor Win Streak', 'Home Team Ranks Higher',
'Home Team Won Last', 'Home Last Win', 'Visitor Last Win']].values


import numpy as np
from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier(random_state=14)
from sklearn.model_selection import cross_val_score

scores = cross_val_score(clf, X_features_only, y_true, scoring='accuracy')
print(scores)
print("Using just the last result from the home and visitor teams")
print("Accuracy: {0:.1f}%".format(np.mean(scores) * 100))


  

Explanation / Answer

The error is coming because the data in dataset["VisitorPts"] is different from the data in dataset["HomePts"]. There might be an error while reading the data from the .csv file. Please have a look into the data inside the csv file. Sharing the csv file here could be of more help.

Thank you!