Source code for sknetwork.ranking.pagerank

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created in May 2019
@author: Nathan de Lara <nathan.delara@polytechnique.org>
@author: Thomas Bonald <bonald@enst.fr>
"""
from typing import Union, Optional

import numpy as np
from scipy import sparse

from sknetwork.linalg.ppr_solver import get_pagerank
from sknetwork.ranking.base import BaseRanking
from sknetwork.utils.check import check_damping_factor
from sknetwork.utils.format import get_adjacency_values


[docs]class PageRank(BaseRanking): """PageRank of each node, corresponding to its frequency of visit by a random walk. The random walk restarts with some fixed probability. The restart distribution can be personalized by the user. This variant is known as Personalized PageRank. Parameters ---------- damping_factor : float Probability to continue the random walk. solver : str * ``'piteration'``, use power iteration for a given number of iterations. * ``'diteration'``, use asynchronous parallel diffusion for a given number of iterations. * ``'lanczos'``, use eigensolver with a given tolerance. * ``'bicgstab'``, use Biconjugate Gradient Stabilized method for a given tolerance. * ``'RH'``, use a Ruffini-Horner polynomial evaluation. * ``'push'``, use push-based algorithm for a given tolerance n_iter : int Number of iterations for some solvers. tol : float Tolerance for the convergence of some solvers. Attributes ---------- scores_ : np.ndarray PageRank score of each node. scores_row_: np.ndarray Scores of rows, for bipartite graphs. scores_col_: np.ndarray Scores of columns, for bipartite graphs. Example ------- >>> from sknetwork.ranking import PageRank >>> from sknetwork.data import house >>> pagerank = PageRank() >>> adjacency = house() >>> weights = {0: 1} >>> scores = pagerank.fit_predict(adjacency, weights) >>> np.round(scores, 2) array([0.29, 0.24, 0.12, 0.12, 0.24]) References ---------- Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). The PageRank citation ranking: Bringing order to the web. Stanford InfoLab. """ def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 1e-6): super(PageRank, self).__init__() check_damping_factor(damping_factor) self.damping_factor = damping_factor self.solver = solver self.n_iter = n_iter self.tol = tol self.bipartite = None
[docs] def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], weights: Optional[Union[dict, np.ndarray]] = None, weights_row: Optional[Union[dict, np.ndarray]] = None, weights_col: Optional[Union[dict, np.ndarray]] = None, force_bipartite: bool = False) -> 'PageRank': """Compute the pagerank of each node. Parameters ---------- input_matrix : sparse.csr_matrix, np.ndarray Adjacency matrix or biadjacency matrix of the graph. weights : np.ndarray, dict Weights of the restart distribution for Personalized PageRank. If ``None``, the uniform distribution is used (no personalization, default). weights_row : np.ndarray, dict Weights on rows of the restart distribution for Personalized PageRank. Used for bipartite graphs. If both weights_row and weights_col are ``None`` (default), the uniform distribution on rows is used. weights_col : np.ndarray, dict Weights on columns of the restart distribution for Personalized PageRank. Used for bipartite graphs. force_bipartite : bool If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph. Returns ------- self: :class:`PageRank` """ adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite, values=weights, values_row=weights_row, values_col=weights_col, default_value=0, which='probs') self.scores_ = get_pagerank(adjacency, values, damping_factor=self.damping_factor, n_iter=self.n_iter, solver=self.solver, tol=self.tol) if self.bipartite: self._split_vars(input_matrix.shape) return self