#!/usr/bin/env python3
# coding: utf-8
"""
Created on May, 2020
@author: Thomas Bonald <tbonald@enst.fr>
"""
from typing import Union
import numpy as np
from scipy import sparse
from sknetwork.classification.propagation import Propagation
from sknetwork.clustering.base import BaseClustering
from sknetwork.utils.format import check_format, get_adjacency
[docs]
class PropagationClustering(BaseClustering, Propagation):
"""Clustering by label propagation.
Parameters
----------
n_iter : int
Maximum number of iterations (-1 for infinity).
node_order : str
* `'random'`: node labels are updated in random order.
* `'increasing'`: node labels are updated by increasing order of weight.
* `'decreasing'`: node labels are updated by decreasing order of weight.
* Otherwise, node labels are updated by index order.
weighted : bool
If ``True``, the vote of each neighbor is proportional to the edge weight.
Otherwise, all votes have weight 1.
sort_clusters : bool
If ``True``, sort labels in decreasing order of cluster size.
return_probs : bool
If ``True``, return the probability distribution over clusters (soft clustering).
return_aggregate : bool
If ``True``, return the aggregate adjacency matrix or biadjacency matrix between clusters.
Attributes
----------
labels_ : np.ndarray, shape (n_labels,)
Label of each node.
probs_ : sparse.csr_matrix, shape (n_row, n_labels)
Probability distribution over labels.
labels_row_, labels_col_ : np.ndarray
Labels of rows and columns, for bipartite graphs.
probs_row_, probs_col_ : sparse.csr_matrix, shape (n_row, n_labels)
Probability distributions over labels for rows and columns (for bipartite graphs).
aggregate_ : sparse.csr_matrix
Aggregate adjacency matrix or biadjacency matrix between clusters.
Example
-------
>>> from sknetwork.clustering import PropagationClustering
>>> from sknetwork.data import karate_club
>>> propagation = PropagationClustering()
>>> graph = karate_club(metadata=True)
>>> adjacency = graph.adjacency
>>> labels = propagation.fit_predict(adjacency)
>>> len(set(labels))
2
References
----------
Raghavan, U. N., Albert, R., & Kumara, S. (2007).
`Near linear time algorithm to detect community structures in large-scale networks.
<https://arxiv.org/pdf/0709.2938.pdf>`_
Physical review E, 76(3), 036106.
"""
def __init__(self, n_iter: int = 5, node_order: str = 'decreasing', weighted: bool = True,
sort_clusters: bool = True, return_probs: bool = True, return_aggregate: bool = True):
Propagation.__init__(self, n_iter, node_order, weighted)
BaseClustering.__init__(self, sort_clusters, return_probs, return_aggregate)
self.bipartite = None
[docs]
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'PropagationClustering':
"""Clustering by label propagation.
Parameters
----------
input_matrix : sparse.csr_matrix, np.ndarray
Adjacency matrix or biadjacency matrix of the graph.
Returns
-------
self: :class:`PropagationClustering`
"""
self._init_vars()
# input
input_matrix = check_format(input_matrix)
adjacency, bipartite = get_adjacency(input_matrix)
# propagation
Propagation.fit(self, adjacency)
# output
_, self.labels_ = np.unique(self.labels_, return_inverse=True)
if bipartite:
self._split_vars(input_matrix.shape)
self.bipartite = True
self._secondary_outputs(input_matrix)
return self