Please complete the algorithm for computing PageRank in Python. The program is l
ID: 3755378 • Letter: P
Question
Please complete the algorithm for computing PageRank in Python. The program is listed below. You need to fill in some lines which are indicated by the comments. Thanks!
# This Python file computes PageRank by using Power Iteration algorithm
class cEdge:
def __init__(self):
self.nBgnNodeIndex = 0
self.nEndNodeIndex = 0
self.dEdgeWeight = 0.0
self.dTransitionProb = 0.0
def setBgnNodeIndex(self, nNodeIndex):
self.nBgnNodeIndex = nNodeIndex
def getBgnNodeIndex(self):
return self.nBgnNodeIndex
def setEndNodeIndex(self, nNodeIndex):
self.nEndNodeIndex = nNodeIndex
def getEndNodeIndex(self):
return self.nEndNodeIndex
def setEdgeWeight(self, dWeight):
self.dEdgeWeight = dWeight
def getEdgeWeight(self):
return self.dEdgeWeight
def setTransitionProb(self, dTransProb):
self.dTransitionProb = dTransProb
def getTransitionProb(self):
return self.dTransitionProb
class cNode:
def __init__(self, nNodeIndex):
self.nNodeIndex = nNodeIndex
self.dPageRankValue = 0.0
self.vcOutNeighbors = {} # Python dictionaries
self.vcInNeighbors = {} # Python dictionaries
self.nOutDegree = 0
def getNodeIndex(self):
return self.nNodeIndex
def getPageRankValue(self):
return self.dPageRankValue
def setPageRankValue(self, dPageRankValue):
self.dPageRankValue = dPageRankValue
def addOutNeighbor(self, nIdxOfOutNeighbor, dWeight):
aNewEdge = cEdge()
aNewEdge.setBgnNodeIndex(self.nNodeIndex)
aNewEdge.setEndNodeIndex(nIdxOfOutNeighbor)
aNewEdge.setEdgeWeight(dWeight)
self.vcOutNeighbors[nIdxOfOutNeighbor] = aNewEdge
def addInNeighbor(self, nIdxOfInNeighbor, dWeight):
aNewEdge = cEdge()
aNewEdge.setBgnNodeIndex(nIdxOfInNeighbor)
aNewEdge.setEndNodeIndex(self.nNodeIndex)
aNewEdge.setEdgeWeight(dWeight)
self.vcInNeighbors[nIdxOfInNeighbor] = aNewEdge
def getOutNeighbors(self):
return self.vcOutNeighbors.keys()
def getOutNeighborNodeIndex(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getEndNodeIndex()
def getOutNeighborWeight(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getEdgeWeight()
def getOutNeighborTransProb(self, nNodeIdx):
aEdge = self.vcOutNeighbors[nNodeIdx]
return aEdge.getTransitionProb()
def getInNeighbors(self):
return self.vcInNeighbors.keys()
def getInNeighborWeight(self, nNodeIdx):
aEdge = self.vcInNeighbors[nNodeIdx]
return aEdge.getEdgeWeight()
def getInNeighborTransProb(self, nNodeIdx):
aEdge = self.vcInNeighbors[nNodeIdx]
return aEdge.getTransitionProb()
def setInNeighborTransProb(self, nNodeIndex, dTransProb):
aEdge = self.vcInNeighbors[nNodeIndex]
aEdge.setTransitionProb(dTransProb)
def ComputeOutgoingTransitionProbabilities(self):
self.nOutDegree = len(self.vcOutNeighbors)
for nNodeIndex in self.vcOutNeighbors:
aOneOutEdge = self.vcOutNeighbors[nNodeIndex]
dWeight = aOneOutEdge.getEdgeWeight()
dOutgoingTransProb = # Please complete this line
aOneOutEdge.setTransitionProb(dOutgoingTransProb)
class cGraph:
def __init__(self):
self.vcNodeList = {} # Python dictionaries
self.nNumOfNodes = 0
self.dDecayFactor = 0.0
def setDecayFactor(self, dDecayFactor):
self.dDecayFactor = dDecayFactor
def addNode(self, nNodeIndex):
self.nNumOfNodes += 1
aNewNode = cNode(nNodeIndex)
self.vcNodeList[nNodeIndex] = aNewNode
return aNewNode
def getNode(self, nNodeIndex):
if nNodeIndex in self.vcNodeList:
return self.vcNodeList[nNodeIndex]
else:
return None
def addEdge(self, nBgnNodeIndex, nEndNodeIndex, dWeight):
if nBgnNodeIndex not in self.vcNodeList:
aBgnNode = self.addNode(nBgnNodeIndex)
if nEndNodeIndex not in self.vcNodeList:
aEndNode = self.addNode(nEndNodeIndex)
self.vcNodeList[nBgnNodeIndex].addOutNeighbor(nEndNodeIndex, dWeight)
self.vcNodeList[nEndNodeIndex].addInNeighbor(nBgnNodeIndex, dWeight)
def getAllNodes(self):
return self.vcNodeList.keys()
def loadAdjacencyListFile(self, sInputFile):
fileAdjListData = open(sInputFile, "r")
vsAllLines = fileAdjListData.readlines()
for line in vsAllLines:
vsNodeIndices = line.rstrip(' ').split(" ")
vnIndicesOfNodes = {}
if len(line) > 0:
vnIndicesOfNodes = list(map(int, vsNodeIndices))
nBgnNodeIndex = vnIndicesOfNodes[0]
for nIdx in range(1, len(vnIndicesOfNodes)):
nEndNodeIndex = vnIndicesOfNodes[nIdx]
self.addEdge(nBgnNodeIndex, nEndNodeIndex, 1)
fileAdjListData.close()
def displayEdgeList(self):
print("The list of edges of the graph:")
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
nBgnNodeIndex = aNode.getNodeIndex()
vcOutNeighbors = aNode.getOutNeighbors()
for nOutNeighborIndex in vcOutNeighbors:
nEndNodeIndex = aNode.getOutNeighborNodeIndex(nOutNeighborIndex)
print("( " + str(nBgnNodeIndex) + " , " + str(nEndNodeIndex) + " )")
def ComputeTransitionProbabilities(self):
for nNodeIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nNodeIndex]
aNode.ComputeOutgoingTransitionProbabilities()
vnOutNeighborIndices = aNode.getOutNeighbors()
for nOutNeighborNodeIndex in vnOutNeighborIndices:
aOutNeighborNodeRefer = self.vcNodeList[nOutNeighborNodeIndex]
dTransProb = aNode.getOutNeighborTransProb(nOutNeighborNodeIndex)
aOutNeighborNodeRefer.setInNeighborTransProb(nNodeIndex, dTransProb)
def RunOnePowerIteration(self):
for nNodeIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nNodeIndex]
vnInNeighborIndices = aNode.getInNeighbors()
dTotalSumPageRank = 0.0
for nIndexOfInNeighborNode in vnInNeighborIndices:
aInNeighborNode = self.vcNodeList[nIndexOfInNeighborNode]
dPageRankValueOfInNeighbor = aInNeighborNode.getPageRankValue()
dTransProb = aNode.getInNeighborTransProb(nIndexOfInNeighborNode)
dTotalSumPageRank += # Please complete this line
dNewPageRankValue = # Please complete this line
aNode.setPageRankValue(dNewPageRankValue)
def ComputePageRank(self):
# Initialization of PageRank values
dInitialPRValue = 1 / self.nNumOfNodes
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
aNode.setPageRankValue(dInitialPRValue)
# Compute the outgoing transition probabilities
self.ComputeTransitionProbabilities()
# Start iteration:
for nIndexOfIteration in range(1, 30):
self.RunOnePowerIteration()
# Output the final PageRank values:
print("PageRank values:")
for nIndex in self.vcNodeList.keys():
aNode = self.vcNodeList[nIndex]
nNodeIndex = aNode.getNodeIndex()
dPageRank = aNode.getPageRankValue()
print(str(nNodeIndex) + " : " + str(dPageRank))
if __name__ == "__main__":
# Input file
sInputFile = "E:\GSU\99PythonWorkspace\08DataMining\src\edu\gsu\01AdjacencyList.txt"
# Load the data
aGraph = cGraph()
aGraph.setDecayFactor(0.85)
aGraph.loadAdjacencyListFile(sInputFile)
aGraph.displayEdgeList()
# PageRank
aGraph.ComputePageRank()
Explanation / Answer
Following is the code for the calculation of the Page rank.
def pagerank(G, alpha=0.85, personalization=None,
max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
dangling=None):
"""Return the PageRank of the nodes in the graph.
PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as
an algorithm to rank web pages.
Parameters
----------
G : graph
A NetworkX graph. Undirected graphs will be converted to a directed
graph with two directed edges for each undirected edge.
alpha : float, optional
Damping parameter for PageRank, default=0.85.
personalization: dict, optional
The "personalization vector" consisting of a dictionary with a
key for every graph node and nonzero personalization value for each node.
By default, a uniform distribution is used.
max_iter : integer, optional
Maximum number of iterations in power method eigenvalue solver.
tol : float, optional
Error tolerance used to check convergence in power method solver.
nstart : dictionary, optional
Starting value of PageRank iteration for each node.
weight : key, optional
Edge data key to use as weight. If None weights are set to 1.
dangling: dict, optional
The outedges to be assigned to any "dangling" nodes, i.e., nodes without
any outedges. The dict key is the node the outedge points to and the dict
value is the weight of that outedge. By default, dangling nodes are given
outedges according to the personalization vector (uniform if not
specified). This must be selected to result in an irreducible transition
matrix (see notes under google_matrix). It may be common to have the
dangling dict to be the same as the personalization dict.
Returns
-------
pagerank : dictionary
Dictionary of nodes with PageRank as value
Notes
-----
The eigenvector calculation is done by the power iteration method
and has no guarantee of convergence. The iteration will stop
after max_iter iterations or an error tolerance of
number_of_nodes(G)*tol has been reached.
The PageRank algorithm was designed for directed graphs but this
algorithm does not check if the input graph is directed and will
execute on undirected graphs by converting each edge in the
directed graph to two edges.
"""
if len(G) == 0:
return {}
if not G.is_directed():
D = G.to_directed()
else:
D = G
# Create a copy in (right) stochastic form
W = nx.stochastic_graph(D, weight=weight)
N = W.number_of_nodes()
# Choose fixed starting vector if not given
if nstart is None:
x = dict.fromkeys(W, 1.0 / N)
else:
# Normalized nstart vector
s = float(sum(nstart.values()))
x = dict((k, v / s) for k, v in nstart.items())
if personalization is None:
# Assign uniform personalization vector if not given
p = dict.fromkeys(W, 1.0 / N)
else:
missing = set(G) - set(personalization)
if missing:
raise NetworkXError('Personalization dictionary '
'must have a value for every node. '
'Missing nodes %s' % missing)
s = float(sum(personalization.values()))
p = dict((k, v / s) for k, v in personalization.items())
if dangling is None:
# Use personalization vector if dangling vector not specified
dangling_weights = p
else:
missing = set(G) - set(dangling)
if missing:
raise NetworkXError('Dangling node dictionary '
'must have a value for every node. '
'Missing nodes %s' % missing)
s = float(sum(dangling.values()))
dangling_weights = dict((k, v/s) for k, v in dangling.items())
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
# power iteration: make up to max_iter iterations
for _ in range(max_iter):
xlast = x
x = dict.fromkeys(xlast.keys(), 0)
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
for n in x:
# this matrix multiply looks odd because it is
# doing a left multiply x^T=xlast^T*W
for nbr in W[n]:
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
# check convergence, l1 norm
err = sum([abs(x[n] - xlast[n]) for n in x])
if err < N*tol:
return x
raise NetworkXError('pagerank: power iteration failed to converge '
'in %d iterations.' % max_iter)
def pagerank(G, alpha=0.85, personalization=None,
max_iter=100, tol=1.0e-6, nstart=None, weight='weight',
dangling=None):
"""Return the PageRank of the nodes in the graph.
PageRank computes a ranking of the nodes in the graph G based on
the structure of the incoming links. It was originally designed as
an algorithm to rank web pages.
Parameters
----------
G : graph
A NetworkX graph. Undirected graphs will be converted to a directed
graph with two directed edges for each undirected edge.
alpha : float, optional
Damping parameter for PageRank, default=0.85.
personalization: dict, optional
The "personalization vector" consisting of a dictionary with a
key for every graph node and nonzero personalization value for each node.
By default, a uniform distribution is used.
max_iter : integer, optional
Maximum number of iterations in power method eigenvalue solver.
tol : float, optional
Error tolerance used to check convergence in power method solver.
nstart : dictionary, optional
Starting value of PageRank iteration for each node.
weight : key, optional
Edge data key to use as weight. If None weights are set to 1.
dangling: dict, optional
The outedges to be assigned to any "dangling" nodes, i.e., nodes without
any outedges. The dict key is the node the outedge points to and the dict
value is the weight of that outedge. By default, dangling nodes are given
outedges according to the personalization vector (uniform if not
specified). This must be selected to result in an irreducible transition
matrix (see notes under google_matrix). It may be common to have the
dangling dict to be the same as the personalization dict.
Returns
-------
pagerank : dictionary
Dictionary of nodes with PageRank as value
Notes
-----
The eigenvector calculation is done by the power iteration method
and has no guarantee of convergence. The iteration will stop
after max_iter iterations or an error tolerance of
number_of_nodes(G)*tol has been reached.
The PageRank algorithm was designed for directed graphs but this
algorithm does not check if the input graph is directed and will
execute on undirected graphs by converting each edge in the
directed graph to two edges.
"""
if len(G) == 0:
return {}
if not G.is_directed():
D = G.to_directed()
else:
D = G
# Create a copy in (right) stochastic form
W = nx.stochastic_graph(D, weight=weight)
N = W.number_of_nodes()
# Choose fixed starting vector if not given
if nstart is None:
x = dict.fromkeys(W, 1.0 / N)
else:
# Normalized nstart vector
s = float(sum(nstart.values()))
x = dict((k, v / s) for k, v in nstart.items())
if personalization is None:
# Assign uniform personalization vector if not given
p = dict.fromkeys(W, 1.0 / N)
else:
missing = set(G) - set(personalization)
if missing:
raise NetworkXError('Personalization dictionary '
'must have a value for every node. '
'Missing nodes %s' % missing)
s = float(sum(personalization.values()))
p = dict((k, v / s) for k, v in personalization.items())
if dangling is None:
# Use personalization vector if dangling vector not specified
dangling_weights = p
else:
missing = set(G) - set(dangling)
if missing:
raise NetworkXError('Dangling node dictionary '
'must have a value for every node. '
'Missing nodes %s' % missing)
s = float(sum(dangling.values()))
dangling_weights = dict((k, v/s) for k, v in dangling.items())
dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0]
# power iteration: make up to max_iter iterations
for _ in range(max_iter):
xlast = x
x = dict.fromkeys(xlast.keys(), 0)
danglesum = alpha * sum(xlast[n] for n in dangling_nodes)
for n in x:
# this matrix multiply looks odd because it is
# doing a left multiply x^T=xlast^T*W
for nbr in W[n]:
x[nbr] += alpha * xlast[n] * W[n][nbr][weight]
x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n]
# check convergence, l1 norm
err = sum([abs(x[n] - xlast[n]) for n in x])
if err < N*tol:
return x
raise NetworkXError('pagerank: power iteration failed to converge '
'in %d iterations.' % max_iter)
Related Questions
drjack9650@gmail.com
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.