Source code for sknetwork.ranking.closeness

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on November 12 2019
@author: Quentin Lutz <qlutz@enst.fr>
"""
from math import log
from typing import Union, Optional

import numpy as np
from scipy import sparse

from sknetwork.path.shortest_path import get_distances
from sknetwork.ranking.base import BaseRanking
from sknetwork.utils.check import check_format, check_square, check_connected


[docs]class Closeness(BaseRanking):
    """Ranking by closeness centrality of each node in a connected graph, corresponding to the average length of the
    shortest paths from that node to all the other ones.

    Parameters
    ----------
    method :
        Denotes if the results should be exact or approximate.
    tol: float
        If ``method=='approximate'``, the allowed tolerance on each score entry.

    Attributes
    ----------
    scores_ : np.ndarray
        Closeness centrality of each node.

    Example
    -------
    >>> from sknetwork.ranking import Closeness
    >>> from sknetwork.data import cyclic_digraph
    >>> closeness = Closeness()
    >>> adjacency = cyclic_digraph(3)
    >>> scores = closeness.fit_predict(adjacency)
    >>> np.round(scores, 2)
    array([0.67, 0.67, 0.67])

    References
    ----------
    Eppstein, D., & Wang, J. (2001, January).
    `Fast approximation of centrality.
    <http://jgaa.info/accepted/2004/EppsteinWang2004.8.1.pdf>`_
    In Proceedings of the twelfth annual ACM-SIAM symposium on Discrete algorithms (pp. 228-229).
    Society for Industrial and Applied Mathematics.
    """

    def __init__(self, method: str = 'exact', tol: float = 1e-1):
        super(Closeness, self).__init__()

        self.method = method
        self.tol = tol

[docs]    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Closeness':
        """Closeness centrality for connected graphs.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.

        Returns
        -------
        self: :class:`Closeness`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        check_connected(adjacency)
        n = adjacency.shape[0]

        if self.method == 'exact':
            n_sources = n
            sources = np.arange(n)
        elif self.method == 'approximate':
            n_sources = min(int(log(n) / self.tol ** 2), n)
            sources = np.random.choice(np.arange(n), n_sources, replace=False)
        else:
            raise ValueError("Method should be either 'exact' or 'approximate'.")

        distances = np.array([get_distances(adjacency, source=source) for source in sources])

        distances_min = np.min(distances, axis=1)
        scores = (n - 1) / n / np.mean(distances, axis=1)
        scores[distances_min < 0] = 0
        self.scores_ = scores

        return self