Source code for sknetwork.ranking.pagerank

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created in May 2019
@author: Nathan de Lara <nathan.delara@polytechnique.org>
@author: Thomas Bonald <bonald@enst.fr>
"""
from typing import Union, Optional

import numpy as np
from scipy import sparse

from sknetwork.linalg.ppr_solver import get_pagerank
from sknetwork.ranking.base import BaseRanking
from sknetwork.utils.check import check_damping_factor
from sknetwork.utils.format import get_adjacency_values


[docs]class PageRank(BaseRanking):
    """PageRank of each node, corresponding to its frequency of visit by a random walk.

    The random walk restarts with some fixed probability. The restart distribution can be personalized by the user.
    This variant is known as Personalized PageRank.

    Parameters
    ----------
    damping_factor : float
        Probability to continue the random walk.
    solver : str
        * ``'piteration'``, use power iteration for a given number of iterations.
        * ``'diteration'``, use asynchronous parallel diffusion for a given number of iterations.
        * ``'lanczos'``, use eigensolver with a given tolerance.
        * ``'bicgstab'``, use Biconjugate Gradient Stabilized method for a given tolerance.
        * ``'RH'``, use a Ruffini-Horner polynomial evaluation.
        * ``'push'``, use push-based algorithm for a given tolerance
    n_iter : int
        Number of iterations for some solvers.
    tol : float
        Tolerance for the convergence of some solvers.

    Attributes
    ----------
    scores_ : np.ndarray
        PageRank score of each node.
    scores_row_: np.ndarray
        Scores of rows, for bipartite graphs.
    scores_col_: np.ndarray
        Scores of columns, for bipartite graphs.

    Example
    -------
    >>> from sknetwork.ranking import PageRank
    >>> from sknetwork.data import house
    >>> pagerank = PageRank()
    >>> adjacency = house()
    >>> weights = {0: 1}
    >>> scores = pagerank.fit_predict(adjacency, weights)
    >>> np.round(scores, 2)
    array([0.29, 0.24, 0.12, 0.12, 0.24])

    References
    ----------
    Page, L., Brin, S., Motwani, R., & Winograd, T. (1999). The PageRank citation ranking: Bringing order to the web.
    Stanford InfoLab.
    """
    def __init__(self, damping_factor: float = 0.85, solver: str = 'piteration', n_iter: int = 10, tol: float = 1e-6):
        super(PageRank, self).__init__()
        check_damping_factor(damping_factor)
        self.damping_factor = damping_factor
        self.solver = solver
        self.n_iter = n_iter
        self.tol = tol
        self.bipartite = None

[docs]    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
            weights: Optional[Union[dict, np.ndarray]] = None, weights_row: Optional[Union[dict, np.ndarray]] = None,
            weights_col: Optional[Union[dict, np.ndarray]] = None, force_bipartite: bool = False) -> 'PageRank':
        """Compute the pagerank of each node.

        Parameters
        ----------
        input_matrix : sparse.csr_matrix, np.ndarray
            Adjacency matrix or biadjacency matrix of the graph.
        weights : np.ndarray, dict
            Weights of the restart distribution for Personalized PageRank.
            If ``None``, the uniform distribution is used (no personalization, default).
        weights_row : np.ndarray, dict
            Weights on rows of the restart distribution for Personalized PageRank.
            Used for bipartite graphs.
            If both weights_row and weights_col are ``None`` (default), the uniform distribution on rows is used.
        weights_col : np.ndarray, dict
            Weights on columns of the restart distribution for Personalized PageRank.
            Used for bipartite graphs.
        force_bipartite : bool
            If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.
        Returns
        -------
        self: :class:`PageRank`
        """
        adjacency, values, self.bipartite = get_adjacency_values(input_matrix, force_bipartite=force_bipartite,
                                                                 values=weights,
                                                                 values_row=weights_row,
                                                                 values_col=weights_col,
                                                                 default_value=0,
                                                                 which='probs')
        self.scores_ = get_pagerank(adjacency, values, damping_factor=self.damping_factor, n_iter=self.n_iter,
                                    solver=self.solver, tol=self.tol)
        if self.bipartite:
            self._split_vars(input_matrix.shape)
        return self