Academic Integrity: tutoring, explanations, and feedback — we don’t complete graded work or submit on a student’s behalf.

Python Question(2.7) Please write different codes but the output should be same

ID: 666137 • Letter: P

Question

Python Question(2.7)

Please write different codes but the output should be same as codes below. Kind of paraphrasing the codes.

the file is:

https://drive.google.com/folderview?id=0Bz_aB61VIRZAfnNoamZmZVdVR19TNmdmVDBwbk5aN1JsNkQyNTVRLUgwVVB2elVIUHBfcFE&usp=sharing

----------------------------------------------

import recommendations

opened = open('ydata-delicious-popular-urls-and-tags-v1_0.txt')
dict = {}
counter=0

for line in opened:
counter+=1
if counter==100:
break

list = line.split(' ')
list.pop(1)
list.pop(1)
url=list[0]
dict.setdefault(url,{})
list.pop(0)
if len(list)==1:
continue
for i in range (0,len(list),2):
new=list[i:i+2]
#url=new[0],int(new[1])
dict[url].setdefault(new[0],int(new[1]))
#print dict

#Q2

#similarity between tags
print recommendations.calculateSimilarItems(dict)

#Q3

#used-based filtering
print recommendations.getRecommendations(dict,'http://www.michaelbach.de/ot/index.html',similarity=recommendations.sim_pearson)

#item-based filtering
itemsim=recommendations.calculateSimilarItems(dict)
print recommendations.getRecommendedItems(dict,itemsim,'http://www.michaelbach.de/ot/index.html')

#there is zeroDivisionError for other urls.

PROBLEM DESCRIPTION delicious (formerly, del.icio.us) is a web site that allows users to save their favorite links (bookmarks) online. Each link has also one or more "tag"'s that represent the categories or topics of the website, such as "programming", "cooking", "research", etc. In this assignment, you will do the following: 1. Using the provided dataset (described below), first create a dictionary of tags and items (i.e., web links). That is, you will create a dictionary which will be very similar to critics dictionary where the key will be a URL, and value will be another dictionary in which key will be a tag and value will be how many times that URL is tagged with that tag 2. Calculate similarity between tags, and see if you can find any tags that are almost identical (i.e. similarity is almost 1). For instance, find some items that could have been tagged "programming" but were not. 3. Compute top-5 item-based and user-based tag recommendations for five URLs that you choose. DATASET This dataset consists of 100,000 popular URLs bookmarked on Delicious within a past time window Each URL includes the date first saved, the number of saves, and the top 10 tags used and their respective counts. The dataset is available on LMS under /Assignments/Assignment 1, file name: Webscope _R5.tgz DATA FORMAT The file is tab delimited; the columns are (from left to right): » URL » Number of saves . Date of first save

Explanation / Answer

critics={'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
'The Night Listener': 3.0},
'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 3.5},
'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
'Superman Returns': 3.5, 'The Night Listener': 4.0},
'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
'The Night Listener': 4.5, 'Superman Returns': 4.0,
'You, Me and Dupree': 2.5},
'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
'You, Me and Dupree': 2.0},
'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
'Toby': {'Snakes on a Plane':4.5,'You, Me and Dupree':1.0,'Superman Returns':4.0},
'Ali': {'Just My Luck': 1.0,'You, Me and Dupree':2.0,'The Night Listener': 4.5, 'Superman Returns': 1.0, 'Snakes on a Plane':4.5}}

from math import sqrt
def sim_distance(prefs,person1,person2):

si={}
for item in prefs[person1]:
if item in prefs[person2]: si[item]=1
if len(si)==0: return 0
sum_of_squares=sum([pow(prefs[person1][item]-prefs[person2][item],2)
for item in prefs[person1] if item in prefs[person2]])

return 1/(1+sqrt(sum_of_squares))
def sim_pearson(prefs,p1,p2):

si={}
for item in prefs[p1]:
if item in prefs[p2]: si[item]=1
if len(si)==0: return 0
n=len(si)
sum1=sum([prefs[p1][it] for it in si])
sum2=sum([prefs[p2][it] for it in si])
sum1Sq=sum([pow(prefs[p1][it],2) for it in si])
sum2Sq=sum([pow(prefs[p2][it],2) for it in si])  
pSum=sum([prefs[p1][it]*prefs[p2][it] for it in si])
num=pSum-(sum1*sum2/n)
den=sqrt((sum1Sq-pow(sum1,2)/n)*(sum2Sq-pow(sum2,2)/n))
if den==0: return 0
r=num/den
return r
def topMatches(prefs,person,n=5,similarity=sim_pearson):
scores=[(similarity(prefs,person,other),other)
for other in prefs if other!=person]
scores.sort()
scores.reverse()
return scores[0:n]
def getRecommendations(prefs,person,similarity=sim_pearson):
totals={}
simSums={}
for other in prefs:

if other==person: continue
sim=similarity(prefs,person,other)
if sim<=0: continue
for item in prefs[other]:
if item not in prefs[person] or prefs[person][item]==0:

totals.setdefault(item,0)
totals[item]+=prefs[other][item]*sim

simSums.setdefault(item,0)
simSums[item]+=sim
rankings=[(total/simSums[item],item) for item,total in totals.items()]
rankings.sort()
rankings.reverse()
return rankings

def transformPrefs(prefs):
result={}
for person in prefs:
for item in prefs[person]:
result.setdefault(item,{})
result[item][person]=prefs[person][item]
return result


def calculateSimilarItems(prefs,n=10):
result={}

itemPrefs=transformPrefs(prefs)
c=0
for item in itemPrefs:

c+=1
if c%100==0: print "%d / %d" % (c,len(itemPrefs))
  
scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
result[item]=scores
return result

def getRecommendedItems(prefs,itemMatch,user):
userRatings=prefs[user]
scores={}
totalSim={}
  
for (item,rating) in userRatings.items( ):
for (similarity,item2) in itemMatch[item]:
if item2 in userRatings: continue

scores.setdefault(item2,0)
scores[item2]+=similarity*rating

totalSim.setdefault(item2,0)
totalSim[item2]+=similarity
rankings=[(score/totalSim[item],item) for item,score in scores.items( )]
rankings.sort( )
rankings.reverse( )
return rankings

def loadMovieLens(path='/data/movielens'):

movies={}
for line in open(path+'/u.item'):
(id,title)=line.split('|')[0:2]
movies[id]=title
prefs={}
for line in open(path+'/u.data'):
(user,movieid,rating,ts)=line.split(' ')
prefs.setdefault(user,{})
prefs[user][movies[movieid]]=float(rating)
return prefs