Python Question(2.7) Please write different codes but the output should be same
ID: 666137 • Letter: P
Question
Python Question(2.7)
Please write different codes but the output should be same as codes below. Kind of paraphrasing the codes.
the file is:
https://drive.google.com/folderview?id=0Bz_aB61VIRZAfnNoamZmZVdVR19TNmdmVDBwbk5aN1JsNkQyNTVRLUgwVVB2elVIUHBfcFE&usp=sharing
----------------------------------------------
import recommendations
opened = open('ydata-delicious-popular-urls-and-tags-v1_0.txt')
dict = {}
counter=0
for line in opened:
counter+=1
if counter==100:
break
list = line.split(' ')
list.pop(1)
list.pop(1)
url=list[0]
dict.setdefault(url,{})
list.pop(0)
if len(list)==1:
continue
for i in range (0,len(list),2):
new=list[i:i+2]
#url=new[0],int(new[1])
dict[url].setdefault(new[0],int(new[1]))
#print dict
#Q2
#similarity between tags
print recommendations.calculateSimilarItems(dict)
#Q3
#used-based filtering
print recommendations.getRecommendations(dict,'http://www.michaelbach.de/ot/index.html',similarity=recommendations.sim_pearson)
#item-based filtering
itemsim=recommendations.calculateSimilarItems(dict)
print recommendations.getRecommendedItems(dict,itemsim,'http://www.michaelbach.de/ot/index.html')
#there is zeroDivisionError for other urls.
PROBLEM DESCRIPTION delicious (formerly, del.icio.us) is a web site that allows users to save their favorite links (bookmarks) online. Each link has also one or more "tag"'s that represent the categories or topics of the website, such as "programming", "cooking", "research", etc. In this assignment, you will do the following: 1. Using the provided dataset (described below), first create a dictionary of tags and items (i.e., web links). That is, you will create a dictionary which will be very similar to critics dictionary where the key will be a URL, and value will be another dictionary in which key will be a tag and value will be how many times that URL is tagged with that tag 2. Calculate similarity between tags, and see if you can find any tags that are almost identical (i.e. similarity is almost 1). For instance, find some items that could have been tagged "programming" but were not. 3. Compute top-5 item-based and user-based tag recommendations for five URLs that you choose. DATASET This dataset consists of 100,000 popular URLs bookmarked on Delicious within a past time window Each URL includes the date first saved, the number of saves, and the top 10 tags used and their respective counts. The dataset is available on LMS under /Assignments/Assignment 1, file name: Webscope _R5.tgz DATA FORMAT The file is tab delimited; the columns are (from left to right): » URL » Number of saves . Date of first saveExplanation / Answer
critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
'The Night Listener': 3.0},
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 3.5},
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
'Superman Returns': 3.5, 'The Night Listener': 4.0},
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
'The Night Listener': 4.5, 'Superman Returns': 4.0,
'You, Me and Dupree': 2.5},
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 2.0},
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0},
'Ali': {'Just My Luck': 1.0,'You, Me and Dupree':2.0,'The Night Listener': 4.5, 'Superman Returns': 1.0, 'Snakes on a Plane':4.5}}
from math import sqrt
def sim_distance(prefs,person1,person2):
si={}
for item in prefs[person1]:
if item in prefs[person2]: si[item]=1
if len(si)==0: return 0
sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2)
for item in prefs[person1] if item in prefs[person2]])
return 1/(1+sqrt(sum_of_squares))
def sim_pearson(prefs,p1,p2):
si={}
for item in prefs[p1]:
if item in prefs[p2]: si[item]=1
if len(si)==0: return 0
n=len(si)
sum1=sum([prefs[p1][it] for it in si])
sum2=sum([prefs[p2][it] for it in si])
sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
sum2Sq=sum([pow(prefs[p2][it],2) for it in si])
pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
num=pSum-(sum1*sum2/n)
den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
if den==0: return 0
r=num/den
return r
def topMatches(prefs,person,n=5,similarity=sim_pearson):
scores=[(similarity(prefs,person,other),other)
for other in prefs if other!=person]
scores.sort()
scores.reverse()
return scores[0:n]
def getRecommendations(prefs,person,similarity=sim_pearson):
totals={}
simSums={}
for other in prefs:
if other==person: continue
sim=similarity(prefs,person,other)
if sim<=0: continue
for item in prefs[other]:
if item not in prefs[person] or prefs[person][item]==0:
totals.setdefault(item,0)
totals[item]+=prefs[other][item]*sim
simSums.setdefault(item,0)
simSums[item]+=sim
rankings=[(total/simSums[item],item) for item,total in totals.items()]
rankings.sort()
rankings.reverse()
return rankings
def transformPrefs(prefs):
result={}
for person in prefs:
for item in prefs[person]:
result.setdefault(item,{})
result[item][person]=prefs[person][item]
return result
def calculateSimilarItems(prefs,n=10):
result={}
itemPrefs=transformPrefs(prefs)
c=0
for item in itemPrefs:
c+=1
if c%100==0: print "%d / %d" % (c,len(itemPrefs))
scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
result[item]=scores
return result
def getRecommendedItems(prefs,itemMatch,user):
userRatings=prefs[user]
scores={}
totalSim={}
for (item,rating) in userRatings.items( ):
for (similarity,item2) in itemMatch[item]:
if item2 in userRatings: continue
scores.setdefault(item2,0)
scores[item2]+=similarity*rating
totalSim.setdefault(item2,0)
totalSim[item2]+=similarity
rankings=[(score/totalSim[item],item) for item,score in scores.items( )]
rankings.sort( )
rankings.reverse( )
return rankings
def loadMovieLens(path='/data/movielens'):
movies={}
for line in open(path+'/u.item'):
(id,title)=line.split('|')[0:2]
movies[id]=title
prefs={}
for line in open(path+'/u.data'):
(user,movieid,rating,ts)=line.split(' ')
prefs.setdefault(user,{})
prefs[user][movies[movieid]]=float(rating)
return prefs
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.