Source code for sknetwork.clustering.metrics

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created in July 2018
@author: Nathan de Lara <nathan.delara@polytechnique.org>
@author: Thomas Bonald <bonald@enst.fr>
"""
from typing import Optional, Union, Tuple

import numpy as np
from scipy import sparse

from sknetwork.utils.check import get_probs
from sknetwork.utils.format import get_adjacency
from sknetwork.utils.membership import get_membership


[docs]def get_modularity(input_matrix: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray, labels_col: Optional[np.ndarray] = None, weights: str = 'degree', resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]: """Modularity of a clustering. The modularity of a clustering is :math:`Q = \\dfrac{1}{w} \\sum_{i,j}\\left(A_{ij} - \\gamma \\dfrac{w_iw_j}{w}\\right)\\delta_{c_i,c_j}` for graphs, :math:`Q = \\dfrac{1}{w} \\sum_{i,j}\\left(A_{ij} - \\gamma \\dfrac{d^+_id^-_j}{w}\\right)\\delta_{c_i,c_j}` for directed graphs, where * :math:`c_i` is the cluster of node :math:`i`,\n * :math:`w_i` is the weight of node :math:`i`,\n * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for directed graphs),\n * :math:`w = 1^TA1` is the total weight,\n * :math:`\\delta` is the Kronecker symbol,\n * :math:`\\gamma \\ge 0` is the resolution parameter. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. labels : Labels of nodes. labels_col : Labels of column nodes (for bipartite graphs). weights : Weighting of nodes (``'degree'`` (default) or ``'uniform'``). resolution: Resolution parameter (default = 1). return_all: If ``True``, return modularity, fit, diversity. Returns ------- modularity : float fit: float, optional diversity: float, optional Example ------- >>> from sknetwork.clustering import get_modularity >>> from sknetwork.data import house >>> adjacency = house() >>> labels = np.array([0, 0, 1, 1, 0]) >>> np.round(get_modularity(adjacency, labels), 2) 0.11 """ adjacency, bipartite = get_adjacency(input_matrix.astype(float)) if bipartite: if labels_col is None: raise ValueError('For bipartite graphs, you must specify the labels of both rows and columns.') else: labels = np.hstack((labels, labels_col)) if len(labels) != adjacency.shape[0]: raise ValueError('Dimension mismatch between labels and input matrix.') probs_row = get_probs(weights, adjacency) probs_col = get_probs(weights, adjacency.T) membership = get_membership(labels).astype(float) fit = membership.T.dot(adjacency.dot(membership)).diagonal().sum() / adjacency.data.sum() div = membership.T.dot(probs_col).dot(membership.T.dot(probs_row)) mod = fit - resolution * div if return_all: return mod, fit, div else: return mod