Source code for sknetwork.hierarchy.ward

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on October 2019
@author: Nathan de Lara <ndelara@enst.fr>
@author: Thomas Bonald <bonald@enst.fr>
"""

from typing import Union

import numpy as np
from scipy import sparse

from sknetwork.embedding import BaseEmbedding, GSVD
from sknetwork.hierarchy.base import BaseHierarchy
from sknetwork.utils.check import check_format
from sknetwork.utils.format import get_embedding
from sknetwork.utils.ward import WardDense


[docs]class Ward(BaseHierarchy): """Hierarchical clustering by the Ward method. Parameters ---------- embedding_method : Embedding method (default = GSVD in dimension 10, projected on the unit sphere). co_cluster : If ``True``, co-cluster rows and columns, considered as different nodes (default = ``False``). Attributes ---------- dendrogram_ : Dendrogram of the graph. dendrogram_row_ : Dendrogram for the rows, for bipartite graphs. dendrogram_col_ : Dendrogram for the columns, for bipartite graphs. dendrogram_full_ : Dendrogram for both rows and columns, indexed in this order, for bipartite graphs. Examples -------- >>> from sknetwork.hierarchy import Ward >>> from sknetwork.data import karate_club >>> ward = Ward() >>> adjacency = karate_club() >>> dendrogram = ward.fit_transform(adjacency) >>> dendrogram.shape (33, 4) References ---------- * Ward, J. H., Jr. (1963). Hierarchical grouping to optimize an objective function. Journal of the American Statistical Association. * Murtagh, F., & Contreras, P. (2012). Algorithms for hierarchical clustering: an overview. Wiley Interdisciplinary Reviews: Data Mining and Knowledge Discovery. """ def __init__(self, embedding_method: BaseEmbedding = GSVD(10), co_cluster: bool = False): super(Ward, self).__init__() self.embedding_method = embedding_method self.co_cluster = co_cluster self.bipartite = None
[docs] def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'Ward': """Applies embedding method followed by the Ward algorithm. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. Returns ------- self: :class:`Ward` """ self._init_vars() # input check_format(input_matrix) # embedding embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster) # clustering ward = WardDense() self.dendrogram_ = ward.fit_transform(embedding) # output if self.co_cluster: self._split_vars(input_matrix.shape) return self