Source code for sknetwork.embedding.louvain_hierarchy

#!/usr/bin/env python3
# coding: utf-8
"""
Created on Dec 2020
@author: Quentin Lutz <qlutz@enst.fr>
"""
from typing import Optional, Union

import numpy as np
from scipy import sparse

from sknetwork.utils.check import check_format, check_random_state
from sknetwork.utils.format import get_adjacency
from sknetwork.clustering.louvain import Louvain
from sknetwork.embedding.base import BaseEmbedding


[docs]class LouvainNE(BaseEmbedding): """Embedding of graphs based on the hierarchical Louvain algorithm with random scattering per level. Parameters ---------- n_components : int Dimension of the embedding. scale : float Dilution factor to be applied on the random vector to be added at each iteration of the clustering method. resolution : Resolution parameter. tol_optimization : Minimum increase in the objective function to enter a new optimization pass. tol_aggregation : Minimum increase in the objective function to enter a new aggregation pass. n_aggregations : Maximum number of aggregations. A negative value is interpreted as no limit. shuffle_nodes : Enables node shuffling before optimization. random_state : Random number generator or random seed. If None, numpy.random is used. Attributes ---------- embedding_ : array, shape = (n, n_components) Embedding of the nodes. embedding_row_ : array, shape = (n_row, n_components) Embedding of the rows, for bipartite graphs. embedding_col_ : array, shape = (n_col, n_components) Embedding of the columns, for bipartite graphs. Example ------- >>> from sknetwork.embedding import LouvainNE >>> from sknetwork.data import karate_club >>> louvain = LouvainNE(n_components=3) >>> adjacency = karate_club() >>> embedding = louvain.fit_transform(adjacency) >>> embedding.shape (34, 3) References ---------- Bhowmick, A. K., Meneni, K., Danisch, M., Guillaume, J. L., & Mitra, B. (2020, January). `LouvainNE: Hierarchical Louvain Method for High Quality and Scalable Network Embedding. <https://hal.archives-ouvertes.fr/hal-02999888/document>`_ In Proceedings of the 13th International Conference on Web Search and Data Mining (pp. 43-51). """ def __init__(self, n_components: int = 2, scale: float = .1, resolution: float = 1, tol_optimization: float = 1e-3, tol_aggregation: float = 1e-3, n_aggregations: int = -1, shuffle_nodes: bool = False, random_state: Optional[Union[np.random.RandomState, int]] = None, verbose: bool = False): super(LouvainNE, self).__init__() self.n_components = n_components self.scale = scale self._clustering_method = Louvain(resolution=resolution, tol_optimization=tol_optimization, tol_aggregation=tol_aggregation, n_aggregations=n_aggregations, shuffle_nodes=shuffle_nodes, random_state=random_state, verbose=verbose) self.random_state = check_random_state(random_state) self.bipartite = None def _recursive_louvain(self, adjacency: Union[sparse.csr_matrix, np.ndarray], depth: int, nodes: Optional[np.ndarray] = None): """Recursive function for fit, modifies the embedding in place. Parameters ---------- adjacency : Adjacency matrix of the graph. depth : Depth of the recursion. nodes : The indices of the current nodes in the original graph. """ n = adjacency.shape[0] if nodes is None: nodes = np.arange(n) if adjacency.nnz: labels = self._clustering_method.fit_transform(adjacency) else: labels = np.zeros(n) clusters = np.unique(labels) if len(clusters) != 1: random_vectors = (self.scale ** depth) * self.random_state.rand(self.n_components, len(clusters)) for index, cluster in enumerate(clusters): mask = (labels == cluster) nodes_cluster = nodes[mask] self.embedding_[nodes_cluster, :] += random_vectors[:, index] n_row = len(mask) indptr = np.zeros(n_row + 1, dtype=int) indptr[1:] = np.cumsum(mask) n_col = indptr[-1] combiner = sparse.csr_matrix((np.ones(n_col), np.arange(n_col, dtype=int), indptr), shape=(n_row, n_col)) adjacency_cluster = adjacency[mask, :].dot(combiner) self._recursive_louvain(adjacency_cluster, depth + 1, nodes_cluster)
[docs] def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False): """Embedding of graphs from a clustering obtained with Louvain. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : If ``True``, force the input matrix to be considered as a biadjacency matrix even if square. Returns ------- self: :class:`LouvainNE` """ # input input_matrix = check_format(input_matrix) adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite) n = adjacency.shape[0] # embedding self.embedding_ = np.zeros((n, self.n_components)) self._recursive_louvain(adjacency, 0) if self.bipartite: self._split_vars(input_matrix.shape) return self