Please complete the algorithm for computing PageRank in Python. The program is l

ID: 3755378 • Letter: P

Question

Please complete the algorithm for computing PageRank in Python. The program is listed below. You need to fill in some lines which are indicated by the comments. Thanks!

# This Python file computes PageRank by using Power Iteration algorithm

class cEdge:
def __init__(self):
self.nBgnNodeIndex = 0
self.nEndNodeIndex = 0
self.dEdgeWeight = 0.0
self.dTransitionProb = 0.0

def setBgnNodeIndex(self, nNodeIndex):
self.nBgnNodeIndex = nNodeIndex

def getBgnNodeIndex(self):
return self.nBgnNodeIndex

def setEndNodeIndex(self, nNodeIndex):
self.nEndNodeIndex = nNodeIndex

def getEndNodeIndex(self):
return self.nEndNodeIndex

def setEdgeWeight(self, dWeight):
self.dEdgeWeight = dWeight

def getEdgeWeight(self):
return self.dEdgeWeight

def setTransitionProb(self, dTransProb):
self.dTransitionProb = dTransProb

def getTransitionProb(self):
return self.dTransitionProb

class cNode:
def __init__(self, nNodeIndex):
self.nNodeIndex = nNodeIndex
self.dPageRankValue = 0.0
self.vcOutNeighbors = {} # Python dictionaries
self.vcInNeighbors = {} # Python dictionaries
self.nOutDegree = 0

def getNodeIndex(self):
return self.nNodeIndex

def getPageRankValue(self):
return self.dPageRankValue

def setPageRankValue(self, dPageRankValue):
self.dPageRankValue = dPageRankValue

def addOutNeighbor(self, nIdxOfOutNeighbor, dWeight):
aNewEdge = cEdge()
aNewEdge.setBgnNodeIndex(self.nNodeIndex)
aNewEdge.setEndNodeIndex(nIdxOfOutNeighbor)
aNewEdge.setEdgeWeight(dWeight)
self.vcOutNeighbors[nIdxOfOutNeighbor] = aNewEdge

def addInNeighbor(self, nIdxOfInNeighbor, dWeight):
aNewEdge = cEdge()
aNewEdge.setBgnNodeIndex(nIdxOfInNeighbor)
aNewEdge.setEndNodeIndex(self.nNodeIndex)
aNewEdge.setEdgeWeight(dWeight)
self.vcInNeighbors[nIdxOfInNeighbor] = aNewEdge

def getOutNeighbors(self):
return self.vcOutNeighbors.keys()

def getOutNeighborNodeIndex(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getEndNodeIndex()

def getOutNeighborWeight(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getEdgeWeight()

def getOutNeighborTransProb(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getTransitionProb()

def getInNeighbors(self):
return self.vcInNeighbors.keys()

def getInNeighborWeight(self, nNodeIdx):
aEdge = self.vcInNeighbors[nNodeIdx]
return aEdge.getEdgeWeight()

def getInNeighborTransProb(self, nNodeIdx):
aEdge = self.vcInNeighbors[nNodeIdx]
return aEdge.getTransitionProb()

def setInNeighborTransProb(self, nNodeIndex, dTransProb):
aEdge = self.vcInNeighbors[nNodeIndex]
aEdge.setTransitionProb(dTransProb)

def ComputeOutgoingTransitionProbabilities(self):
self.nOutDegree = len(self.vcOutNeighbors)
for nNodeIndex in self.vcOutNeighbors:
aOneOutEdge = self.vcOutNeighbors[nNodeIndex]
dWeight = aOneOutEdge.getEdgeWeight()
dOutgoingTransProb = # Please complete this line
aOneOutEdge.setTransitionProb(dOutgoingTransProb)

class cGraph:
def __init__(self):
self.vcNodeList = {} # Python dictionaries
self.nNumOfNodes = 0
self.dDecayFactor = 0.0

def setDecayFactor(self, dDecayFactor):
self.dDecayFactor = dDecayFactor

def addNode(self, nNodeIndex):
self.nNumOfNodes += 1
aNewNode = cNode(nNodeIndex)
self.vcNodeList[nNodeIndex] = aNewNode
return aNewNode

def getNode(self, nNodeIndex):
if nNodeIndex in self.vcNodeList:
return self.vcNodeList[nNodeIndex]
else:
return None

def addEdge(self, nBgnNodeIndex, nEndNodeIndex, dWeight):
if nBgnNodeIndex not in self.vcNodeList:
aBgnNode = self.addNode(nBgnNodeIndex)
if nEndNodeIndex not in self.vcNodeList:
aEndNode = self.addNode(nEndNodeIndex)
self.vcNodeList[nBgnNodeIndex].addOutNeighbor(nEndNodeIndex, dWeight)
self.vcNodeList[nEndNodeIndex].addInNeighbor(nBgnNodeIndex, dWeight)

def getAllNodes(self):
return self.vcNodeList.keys()

def loadAdjacencyListFile(self, sInputFile):
fileAdjListData = open(sInputFile, "r")
vsAllLines = fileAdjListData.readlines()
for line in vsAllLines:
vsNodeIndices = line.rstrip(' ').split(" ")
vnIndicesOfNodes = {}
if len(line) > 0:
vnIndicesOfNodes = list(map(int, vsNodeIndices))
nBgnNodeIndex = vnIndicesOfNodes[0]
for nIdx in range(1, len(vnIndicesOfNodes)):
nEndNodeIndex = vnIndicesOfNodes[nIdx]
self.addEdge(nBgnNodeIndex, nEndNodeIndex, 1)
fileAdjListData.close()

def displayEdgeList(self):
print("The list of edges of the graph:")
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
nBgnNodeIndex = aNode.getNodeIndex()
vcOutNeighbors = aNode.getOutNeighbors()
for nOutNeighborIndex in vcOutNeighbors:
nEndNodeIndex = aNode.getOutNeighborNodeIndex(nOutNeighborIndex)
print("( " + str(nBgnNodeIndex) + " , " + str(nEndNodeIndex) + " )")

def ComputeTransitionProbabilities(self):
for nNodeIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nNodeIndex]
aNode.ComputeOutgoingTransitionProbabilities()
vnOutNeighborIndices = aNode.getOutNeighbors()
for nOutNeighborNodeIndex in vnOutNeighborIndices:
aOutNeighborNodeRefer = self.vcNodeList[nOutNeighborNodeIndex]
dTransProb = aNode.getOutNeighborTransProb(nOutNeighborNodeIndex)
aOutNeighborNodeRefer.setInNeighborTransProb(nNodeIndex, dTransProb)

def RunOnePowerIteration(self):
for nNodeIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nNodeIndex]
vnInNeighborIndices = aNode.getInNeighbors()
dTotalSumPageRank = 0.0
for nIndexOfInNeighborNode in vnInNeighborIndices:
aInNeighborNode = self.vcNodeList[nIndexOfInNeighborNode]
dPageRankValueOfInNeighbor = aInNeighborNode.getPageRankValue()
dTransProb = aNode.getInNeighborTransProb(nIndexOfInNeighborNode)
dTotalSumPageRank += # Please complete this line
dNewPageRankValue = # Please complete this line
aNode.setPageRankValue(dNewPageRankValue)

def ComputePageRank(self):
# Initialization of PageRank values
dInitialPRValue = 1 / self.nNumOfNodes
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
aNode.setPageRankValue(dInitialPRValue)
# Compute the outgoing transition probabilities
self.ComputeTransitionProbabilities()
# Start iteration:
for nIndexOfIteration in range(1, 30):
self.RunOnePowerIteration()
# Output the final PageRank values:
print("PageRank values:")
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
nNodeIndex = aNode.getNodeIndex()
dPageRank = aNode.getPageRankValue()
print(str(nNodeIndex) + " : " + str(dPageRank))

if __name__ == "__main__":
# Input file
sInputFile = "E:\GSU\99PythonWorkspace\08DataMining\src\edu\gsu\01AdjacencyList.txt"
# Load the data
aGraph = cGraph()
aGraph.setDecayFactor(0.85)
aGraph.loadAdjacencyListFile(sInputFile)
aGraph.displayEdgeList()
# PageRank
aGraph.ComputePageRank()

Explanation / Answer

Following is the code for the calculation of the Page rank.

def pagerank(G, alpha=0.85, personalization=None,

max_iter=100, tol=1.0e-6, nstart=None, weight='weight',

dangling=None):

"""Return the PageRank of the nodes in the graph.

PageRank computes a ranking of the nodes in the graph G based on

the structure of the incoming links. It was originally designed as

an algorithm to rank web pages.

Parameters

----------

G : graph

A NetworkX graph. Undirected graphs will be converted to a directed

graph with two directed edges for each undirected edge.

alpha : float, optional

Damping parameter for PageRank, default=0.85.

personalization: dict, optional

The "personalization vector" consisting of a dictionary with a

key for every graph node and nonzero personalization value for each node.

By default, a uniform distribution is used.

max_iter : integer, optional

Maximum number of iterations in power method eigenvalue solver.

tol : float, optional

Error tolerance used to check convergence in power method solver.

nstart : dictionary, optional

Starting value of PageRank iteration for each node.

weight : key, optional

Edge data key to use as weight. If None weights are set to 1.

dangling: dict, optional

The outedges to be assigned to any "dangling" nodes, i.e., nodes without

any outedges. The dict key is the node the outedge points to and the dict

value is the weight of that outedge. By default, dangling nodes are given

outedges according to the personalization vector (uniform if not

specified). This must be selected to result in an irreducible transition

matrix (see notes under google_matrix). It may be common to have the

dangling dict to be the same as the personalization dict.

Returns

-------

pagerank : dictionary

Dictionary of nodes with PageRank as value

Notes

-----

The eigenvector calculation is done by the power iteration method

and has no guarantee of convergence. The iteration will stop

after max_iter iterations or an error tolerance of

number_of_nodes(G)*tol has been reached.

The PageRank algorithm was designed for directed graphs but this

algorithm does not check if the input graph is directed and will

execute on undirected graphs by converting each edge in the

directed graph to two edges.

"""

if len(G) == 0:

return {}

if not G.is_directed():

D = G.to_directed()

else:

D = G

# Create a copy in (right) stochastic form

W = nx.stochastic_graph(D, weight=weight)

N = W.number_of_nodes()

# Choose fixed starting vector if not given

if nstart is None:

x = dict.fromkeys(W, 1.0 / N)

else:

# Normalized nstart vector

s = float(sum(nstart.values()))

x = dict((k, v / s) for k, v in nstart.items())

if personalization is None:

# Assign uniform personalization vector if not given

p = dict.fromkeys(W, 1.0 / N)

else:

missing = set(G) - set(personalization)

if missing:

raise NetworkXError('Personalization dictionary '

'must have a value for every node. '

'Missing nodes %s' % missing)

s = float(sum(personalization.values()))

p = dict((k, v / s) for k, v in personalization.items())

if dangling is None:

# Use personalization vector if dangling vector not specified

dangling_weights = p

else:

missing = set(G) - set(dangling)

if missing:

raise NetworkXError('Dangling node dictionary '

'must have a value for every node. '

'Missing nodes %s' % missing)

s = float(sum(dangling.values()))

dangling_weights = dict((k, v/s) for k, v in dangling.items())

dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

# power iteration: make up to max_iter iterations

for _ in range(max_iter):

xlast = x

x = dict.fromkeys(xlast.keys(), 0)

danglesum = alpha * sum(xlast[n] for n in dangling_nodes)

for n in x:

# this matrix multiply looks odd because it is

# doing a left multiply x^T=xlast^T*W

for nbr in W[n]:

x[nbr] += alpha * xlast[n] * W[n][nbr][weight]

x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]

# check convergence, l1 norm

err = sum([abs(x[n] - xlast[n]) for n in x])

if err < N*tol:

return x

raise NetworkXError('pagerank: power iteration failed to converge '

'in %d iterations.' % max_iter)

def pagerank(G, alpha=0.85, personalization=None,

max_iter=100, tol=1.0e-6, nstart=None, weight='weight',

dangling=None):

"""Return the PageRank of the nodes in the graph.

PageRank computes a ranking of the nodes in the graph G based on

the structure of the incoming links. It was originally designed as

an algorithm to rank web pages.

Parameters

----------

G : graph

A NetworkX graph. Undirected graphs will be converted to a directed

graph with two directed edges for each undirected edge.

alpha : float, optional

Damping parameter for PageRank, default=0.85.

personalization: dict, optional

The "personalization vector" consisting of a dictionary with a

key for every graph node and nonzero personalization value for each node.

By default, a uniform distribution is used.

max_iter : integer, optional

Maximum number of iterations in power method eigenvalue solver.

tol : float, optional

Error tolerance used to check convergence in power method solver.

nstart : dictionary, optional

Starting value of PageRank iteration for each node.

weight : key, optional

Edge data key to use as weight. If None weights are set to 1.

dangling: dict, optional

The outedges to be assigned to any "dangling" nodes, i.e., nodes without

any outedges. The dict key is the node the outedge points to and the dict

value is the weight of that outedge. By default, dangling nodes are given

outedges according to the personalization vector (uniform if not

specified). This must be selected to result in an irreducible transition

matrix (see notes under google_matrix). It may be common to have the

dangling dict to be the same as the personalization dict.

Returns

-------

pagerank : dictionary

Dictionary of nodes with PageRank as value

Notes

-----

The eigenvector calculation is done by the power iteration method

and has no guarantee of convergence. The iteration will stop

after max_iter iterations or an error tolerance of

number_of_nodes(G)*tol has been reached.

The PageRank algorithm was designed for directed graphs but this

algorithm does not check if the input graph is directed and will

execute on undirected graphs by converting each edge in the

directed graph to two edges.

"""

if len(G) == 0:

return {}

if not G.is_directed():

D = G.to_directed()

else:

D = G

# Create a copy in (right) stochastic form

W = nx.stochastic_graph(D, weight=weight)

N = W.number_of_nodes()

# Choose fixed starting vector if not given

if nstart is None:

x = dict.fromkeys(W, 1.0 / N)

else:

# Normalized nstart vector

s = float(sum(nstart.values()))

x = dict((k, v / s) for k, v in nstart.items())

if personalization is None:

# Assign uniform personalization vector if not given

p = dict.fromkeys(W, 1.0 / N)

else:

missing = set(G) - set(personalization)

if missing:

raise NetworkXError('Personalization dictionary '

'must have a value for every node. '

'Missing nodes %s' % missing)

s = float(sum(personalization.values()))

p = dict((k, v / s) for k, v in personalization.items())

if dangling is None:

# Use personalization vector if dangling vector not specified

dangling_weights = p

else:

missing = set(G) - set(dangling)

if missing:

raise NetworkXError('Dangling node dictionary '

'must have a value for every node. '

'Missing nodes %s' % missing)

s = float(sum(dangling.values()))

dangling_weights = dict((k, v/s) for k, v in dangling.items())

dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]

# power iteration: make up to max_iter iterations

for _ in range(max_iter):

xlast = x

x = dict.fromkeys(xlast.keys(), 0)

danglesum = alpha * sum(xlast[n] for n in dangling_nodes)

for n in x:

# this matrix multiply looks odd because it is

# doing a left multiply x^T=xlast^T*W

for nbr in W[n]:

x[nbr] += alpha * xlast[n] * W[n][nbr][weight]

x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]

# check convergence, l1 norm

err = sum([abs(x[n] - xlast[n]) for n in x])

if err < N*tol:

return x

raise NetworkXError('pagerank: power iteration failed to converge '

'in %d iterations.' % max_iter)

Navigate

Please complete the algorithm for computing PageRank in Matlab. The program is l

Please complete the attached amortization schedule. This is a common task people

Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.

Please complete the algorithm for computing PageRank in Python. The program is l

Question

Explanation / Answer

Related Questions

Navigate