Source code for sknetwork.ranking.closeness

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on November 12 2019
@author: Quentin Lutz <qlutz@enst.fr>
"""
from math import log
from typing import Union, Optional

import numpy as np
from scipy import sparse

from sknetwork.path.shortest_path import get_distances
from sknetwork.ranking.base import BaseRanking
from sknetwork.utils.check import check_format, check_square, check_connected


[docs]class Closeness(BaseRanking): """Ranking by closeness centrality of each node in a connected graph, corresponding to the average length of the shortest paths from that node to all the other ones. Parameters ---------- method : Denotes if the results should be exact or approximate. tol: float If ``method=='approximate'``, the allowed tolerance on each score entry. Attributes ---------- scores_ : np.ndarray Closeness centrality of each node. Example ------- >>> from sknetwork.ranking import Closeness >>> from sknetwork.data import cyclic_digraph >>> closeness = Closeness() >>> adjacency = cyclic_digraph(3) >>> scores = closeness.fit_predict(adjacency) >>> np.round(scores, 2) array([0.67, 0.67, 0.67]) References ---------- Eppstein, D., & Wang, J. (2001, January). `Fast approximation of centrality. <http://jgaa.info/accepted/2004/EppsteinWang2004.8.1.pdf>`_ In Proceedings of the twelfth annual ACM-SIAM symposium on Discrete algorithms (pp. 228-229). Society for Industrial and Applied Mathematics. """ def __init__(self, method: str = 'exact', tol: float = 1e-1): super(Closeness, self).__init__() self.method = method self.tol = tol
[docs] def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Closeness': """Closeness centrality for connected graphs. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Closeness` """ adjacency = check_format(adjacency) check_square(adjacency) check_connected(adjacency) n = adjacency.shape[0] if self.method == 'exact': n_sources = n sources = np.arange(n) elif self.method == 'approximate': n_sources = min(int(log(n) / self.tol ** 2), n) sources = np.random.choice(np.arange(n), n_sources, replace=False) else: raise ValueError("Method should be either 'exact' or 'approximate'.") distances = np.array([get_distances(adjacency, source=source) for source in sources]) distances_min = np.min(distances, axis=1) scores = (n - 1) / n / np.mean(distances, axis=1) scores[distances_min < 0] = 0 self.scores_ = scores return self