Source code for sknetwork.ranking.closeness

```#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on November 12 2019
@author: Quentin Lutz <qlutz@enst.fr>
"""
from math import log
from typing import Union, Optional

import numpy as np
from scipy import sparse

from sknetwork.path.shortest_path import get_distances
from sknetwork.ranking.base import BaseRanking
from sknetwork.utils.check import check_format, check_square, check_connected

[docs]class Closeness(BaseRanking):
"""Ranking by closeness centrality of each node in a connected graph, corresponding to the average length of the
shortest paths from that node to all the other ones.

Parameters
----------
method :
Denotes if the results should be exact or approximate.
tol: float
If ``method=='approximate'``, the allowed tolerance on each score entry.

Attributes
----------
scores_ : np.ndarray
Closeness centrality of each node.

Example
-------
>>> from sknetwork.ranking import Closeness
>>> from sknetwork.data import cyclic_digraph
>>> closeness = Closeness()
>>> np.round(scores, 2)
array([0.67, 0.67, 0.67])

References
----------
Eppstein, D., & Wang, J. (2001, January).
`Fast approximation of centrality.
<http://jgaa.info/accepted/2004/EppsteinWang2004.8.1.pdf>`_
In Proceedings of the twelfth annual ACM-SIAM symposium on Discrete algorithms (pp. 228-229).
Society for Industrial and Applied Mathematics.
"""

def __init__(self, method: str = 'exact', tol: float = 1e-1):
super(Closeness, self).__init__()

self.method = method
self.tol = tol

[docs]    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Closeness':
"""Closeness centrality for connected graphs.

Parameters
----------

Returns
-------
self: :class:`Closeness`
"""

if self.method == 'exact':
n_sources = n
sources = np.arange(n)
elif self.method == 'approximate':
n_sources = min(int(log(n) / self.tol ** 2), n)
sources = np.random.choice(np.arange(n), n_sources, replace=False)
else:
raise ValueError("Method should be either 'exact' or 'approximate'.")

distances = np.array([get_distances(adjacency, source=source) for source in sources])

distances_min = np.min(distances, axis=1)
scores = (n - 1) / n / np.mean(distances, axis=1)
scores[distances_min < 0] = 0
self.scores_ = scores

return self
```