Source code for sknetwork.linalg.ppr_solver
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Apr 2020
@author: Nathan de Lara <ndelara@enst.fr>
"""
from typing import Union
import numpy as np
from scipy import sparse
from scipy.sparse.linalg import eigs, LinearOperator, bicgstab
from sknetwork.linalg.diteration import diffusion
from sknetwork.linalg.push import push_pagerank
from sknetwork.linalg.normalization import normalize
from sknetwork.linalg.polynome import Polynome
class RandomSurferOperator(LinearOperator):
"""Random surfer as a LinearOperator
Parameters
----------
adjacency :
Adjacency matrix of the graph as a CSR or a LinearOperator.
damping_factor : float
Probability to continue the random walk.
seeds :
Probability vector for seeds.
Attributes
----------
a : sparse.csr_matrix
Scaled transposed transition matrix.
b : np.ndarray
Scaled restart probability vector.
"""
def __init__(self, adjacency: Union[sparse.csr_matrix, LinearOperator], seeds: np.ndarray, damping_factor):
super(RandomSurferOperator, self).__init__(shape=adjacency.shape, dtype=float)
n = adjacency.shape[0]
out_degrees = adjacency.dot(np.ones(n)).astype(bool)
if hasattr(adjacency, 'left_sparse_dot'):
self.a = damping_factor * normalize(adjacency).T
else:
self.a = (damping_factor * normalize(adjacency)).T.tocsr()
self.b = (np.ones(n) - damping_factor * out_degrees) * seeds
def _matvec(self, x: np.ndarray):
return self.a.dot(x) + self.b * x.sum()
[docs]def get_pagerank(adjacency: Union[sparse.csr_matrix, LinearOperator], seeds: np.ndarray, damping_factor: float,
n_iter: int, tol: float = 1e-6, solver: str = 'piteration') -> np.ndarray:
"""Solve the Pagerank problem. Formally,
:math:`x = \\alpha Px + (1-\\alpha)y`,
where :math:`P = (D^{-1}A)^T` is the transition matrix and :math:`y` is the personalization probability vector.
Parameters
----------
adjacency : sparse.csr_matrix
Adjacency matrix of the graph.
seeds : np.ndarray
Personalization array. Must be a valid probability vector.
damping_factor : float
Probability to continue the random walk.
n_iter : int
Number of iterations for some of the solvers such as ``'piteration'`` or ``'diteration'``.
tol : float
Tolerance for the convergence of some solvers such as ``'bicgstab'`` or ``'lanczos'`` or ``'push'``.
solver : :obj:`str`
Which solver to use: ``'piteration'``, ``'diteration'``, ``'bicgstab'``, ``'lanczos'``, ``̀'RH'``, ``'push'``.
Returns
-------
pagerank : np.ndarray
Probability vector.
Examples
--------
>>> from sknetwork.data import house
>>> adjacency = house()
>>> seeds = np.array([1, 0, 0, 0, 0])
>>> scores = get_pagerank(adjacency, seeds, damping_factor=0.85, n_iter=10)
>>> np.round(scores, 2)
array([0.29, 0.24, 0.12, 0.12, 0.24])
References
----------
* Hong, D. (2012). `Optimized on-line computation of pagerank algorithm.
<https://arxiv.org/pdf/1202.6158.pdf>`_
arXiv preprint arXiv:1202.6158.
* Van der Vorst, H. A. (1992). `Bi-CGSTAB:
<https://en.wikipedia.org/wiki/Biconjugate_gradient_stabilized_method>`_
A fast and smoothly converging variant of Bi-CG for the solution of nonsymmetric linear systems.
SIAM Journal on scientific and Statistical Computing, 13(2), 631-644.
* Lanczos, C. (1950).
`An iteration method for the solution of the eigenvalue problem of linear differential and integral operators.
<http://www.cs.umd.edu/~oleary/lanczos1950.pdf>`_
Los Angeles, CA: United States Governm. Press Office.
* Whang, J. , Lenharth, A. , Dhillon, I. , & Pingali, K. . (2015).
`Scalable Data-Driven PageRank: Algorithms, System Issues, and Lessons Learned`. 9233, 438-450.
<https://www.cs.utexas.edu/users/inderjit/public_papers/scalable_pagerank_europar15.pdf>
"""
n = adjacency.shape[0]
if solver == 'diteration':
if not isinstance(adjacency, sparse.csr_matrix):
raise ValueError('D-iteration is not compatible with linear operators.')
adjacency = normalize(adjacency, p=1)
indptr = adjacency.indptr.astype(np.int32)
indices = adjacency.indices.astype(np.int32)
data = adjacency.data.astype(np.float32)
damping_factor = np.float32(damping_factor)
n_iter = np.int32(n_iter)
tol = np.float32(tol)
scores = np.zeros(n, dtype=np.float32)
fluid = (1 - damping_factor) * seeds.astype(np.float32)
diffusion(indptr, indices, data, scores, fluid, damping_factor, n_iter, tol)
elif solver == 'push':
n = adjacency.shape[0]
damping_factor = np.float32(damping_factor)
tol = np.float32(tol)
degrees = adjacency.dot(np.ones(n)).astype(np.int32)
rev_adjacency = adjacency.transpose().tocsr()
indptr = adjacency.indptr.astype(np.int32)
indices = adjacency.indices.astype(np.int32)
rev_indptr = rev_adjacency.indptr.astype(np.int32)
rev_indices = rev_adjacency.indices.astype(np.int32)
scores = push_pagerank(n, degrees, indptr, indices,
rev_indptr, rev_indices,
seeds.astype(np.float32),
damping_factor, tol)
elif solver == 'RH':
coeffs = np.ones(n_iter + 1)
polynome = Polynome(damping_factor * normalize(adjacency, p=1).T.tocsr(), coeffs)
scores = polynome.dot(seeds)
else:
rso = RandomSurferOperator(adjacency, seeds, damping_factor)
v0 = rso.b
if solver == 'bicgstab':
scores, info = bicgstab(sparse.eye(n, format='csr') - rso.a, rso.b, atol=tol, x0=v0)
elif solver == 'lanczos':
# noinspection PyTypeChecker
_, scores = sparse.linalg.eigs(rso, k=1, tol=tol, v0=v0)
scores = abs(scores.flatten().real)
elif solver == 'piteration':
scores = v0
for i in range(n_iter):
scores_ = rso.dot(scores)
scores_ /= scores_.sum()
if np.linalg.norm(scores - scores_, ord=1) < tol:
break
else:
scores = scores_
else:
raise ValueError('Unknown solver.')
return scores / scores.sum()