Source code for sknetwork.hierarchy.louvain_hierarchy

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created in March 2020
@author: Quentin Lutz <qlutz@enst.fr>
@author: Thomas Bonald <tbonald@enst.fr>
"""
from typing import Optional, Union

import numpy as np
from scipy import sparse

from sknetwork.clustering.louvain import Louvain
from sknetwork.hierarchy.base import BaseHierarchy
from sknetwork.hierarchy.postprocess import get_dendrogram, reorder_dendrogram
from sknetwork.utils.check import check_format
from sknetwork.utils.format import get_adjacency


[docs]class LouvainIteration(BaseHierarchy): """Hierarchical clustering by successive instances of Louvain (top-down). Parameters ---------- depth : int Depth of the tree. A negative value is interpreted as no limit (return a tree of maximum depth). resolution : float Resolution parameter. tol_optimization : float Minimum increase in the objective function to enter a new optimization pass. tol_aggregation : float Minimum increase in the objective function to enter a new aggregation pass. n_aggregations : int Maximum number of aggregations. A negative value is interpreted as no limit. shuffle_nodes : bool If ``True``, shuffle nodes before optimization. random_state : int Random number generator or random seed. If ``None``, numpy.random is used. verbose : bool Verbose mode. Attributes ---------- dendrogram_ : np.ndarray Dendrogram of the graph. dendrogram_row_ : np.ndarray Dendrogram for the rows, for bipartite graphs. dendrogram_col_ : np.ndarray Dendrogram for the columns, for bipartite graphs. dendrogram_full_ : np.ndarray Dendrogram for both rows and columns, indexed in this order, for bipartite graphs. Example ------- >>> from sknetwork.hierarchy import LouvainIteration >>> from sknetwork.data import house >>> louvain = LouvainIteration() >>> adjacency = house() >>> louvain.fit_predict(adjacency) array([[3., 2., 1., 2.], [4., 1., 1., 2.], [6., 0., 1., 3.], [5., 7., 2., 5.]]) Notes ----- Each row of the dendrogram = merge nodes, distance, size of cluster. See Also -------- scipy.cluster.hierarchy.dendrogram sknetwork.clustering.Louvain """ def __init__(self, depth: int = 3, resolution: float = 1, tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(LouvainIteration, self).__init__() self.dendrogram_ = None self.depth = depth self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization, tol_aggregation=tol_aggregation, n_aggregations=n_aggregations, shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose) self.bipartite = None def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int, nodes: Optional[np.ndarray] = None): """Recursive function for fit. Parameters ---------- adjacency : sparse.csr_matrix, np.ndarray Adjacency matrix of the graph. depth : int Depth of the recursion. nodes : np.ndarray The indices of the current nodes in the original graph. Returns ------- tree: recursive list of list of nodes. """ n = adjacency.shape[0] if nodes is None: nodes = np.arange(n) if adjacency.nnz and depth: labels = self._clustering_method.fit_predict(adjacency) else: labels = np.zeros(n) clusters = np.unique(labels) tree = [] if len(clusters) == 1: if len(nodes) > 1: return [[node] for node in nodes] else: return [nodes[0]] else: for cluster in clusters: mask = (labels == cluster) nodes_cluster = nodes[mask] adjacency_cluster = adjacency[mask, :][:, mask] tree.append(self._recursive_louvain(adjacency_cluster, depth - 1, nodes_cluster)) return tree
[docs] def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainIteration': """Fit algorithm to data. Parameters ---------- input_matrix : sparse.csr_matrix, np.ndarray Adjacency matrix or biadjacency matrix of the graph. Returns ------- self: :class:`LouvainIteration` """ self._init_vars() input_matrix = check_format(input_matrix) adjacency, self.bipartite = get_adjacency(input_matrix) tree = self._recursive_louvain(adjacency, self.depth) dendrogram, _ = get_dendrogram(tree) dendrogram = np.array(dendrogram) dendrogram[:, 2] += 1 - min(dendrogram[:, 2]) self.dendrogram_ = reorder_dendrogram(dendrogram) if self.bipartite: self._split_vars(input_matrix.shape) return self
[docs]class LouvainHierarchy(BaseHierarchy): """Hierarchical clustering by Louvain (bottom-up). Each level corresponds to an aggregation step of the Louvain algorithm. Parameters ---------- resolution : float Resolution parameter. tol_optimization : float Minimum increase in the objective function to enter a new optimization pass. tol_aggregation : float Minimum increase in the objective function to enter a new aggregation pass. shuffle_nodes : bool If ``True``, shuffle nodes before optimization. random_state : int Random number generator or random seed. If ``None``, numpy.random is used. verbose : bool Verbose mode. Attributes ---------- dendrogram_ : np.ndarray Dendrogram of the graph. dendrogram_row_ : np.ndarray Dendrogram for the rows, for bipartite graphs. dendrogram_col_ : np.ndarray Dendrogram for the columns, for bipartite graphs. dendrogram_full_ : np.ndarray Dendrogram for both rows and columns, indexed in this order, for bipartite graphs. Example ------- >>> from sknetwork.hierarchy import LouvainHierarchy >>> from sknetwork.data import house >>> louvain = LouvainHierarchy() >>> adjacency = house() >>> louvain.fit_predict(adjacency) array([[3., 2., 1., 2.], [4., 1., 1., 2.], [6., 0., 1., 3.], [5., 7., 2., 5.]]) Notes ----- Each row of the dendrogram = merge nodes, distance, size of cluster. See Also -------- scipy.cluster.hierarchy.dendrogram sknetwork.clustering.Louvain """ def __init__(self, resolution: float = 1, tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, shuffle_nodes: bool = False, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(LouvainHierarchy, self).__init__() self.dendrogram_ = None self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization, tol_aggregation=tol_aggregation, n_aggregations=1, shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose) self.bipartite = None def _get_hierarchy(self, adjacency: Union[sparse.csr_matrix, np.ndarray]): """Get the hierarchy from Louvain. Parameters ---------- adjacency : sparse.csr_matrix, np.ndarray Adjacency matrix of the graph. Returns ------- tree: recursive list of list of nodes """ tree = [[node] for node in range(adjacency.shape[0])] labels = self._clustering_method.fit_predict(adjacency) labels_unique = np.unique(labels) while 1: tree = [[tree[node] for node in np.flatnonzero(labels == label)] for label in labels_unique] tree = [cluster[0] if len(cluster) == 1 else cluster for cluster in tree] aggregate = self._clustering_method.aggregate_ labels = self._clustering_method.fit_predict(aggregate) if len(labels_unique) == len(np.unique(labels)): break else: labels_unique = np.unique(labels) return tree
[docs] def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'LouvainHierarchy': """Fit algorithm to data. Parameters ---------- input_matrix : sparse.csr_matrix, np.ndarray Adjacency matrix or biadjacency matrix of the graph. Returns ------- self: :class:`LouvainHierarchy` """ self._init_vars() input_matrix = check_format(input_matrix) adjacency, self.bipartite = get_adjacency(input_matrix) tree = self._get_hierarchy(adjacency) dendrogram, _ = get_dendrogram(tree) dendrogram = np.array(dendrogram) dendrogram[:, 2] += 1 - min(dendrogram[:, 2]) self.dendrogram_ = reorder_dendrogram(dendrogram) if self.bipartite: self._split_vars(input_matrix.shape) return self