K-means

This notebook illustrates the clustering of a graph by k-means. This clustering involves the embedding of the graph in a space of low dimension.

[1]:
from IPython.display import SVG
[2]:
import numpy as np
[3]:
from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.clustering import KMeans, modularity, bimodularity
from sknetwork.linalg import normalize
from sknetwork.embedding import GSVD
from sknetwork.utils import membership_matrix
from sknetwork.visualization import svg_graph, svg_digraph, svg_bigraph

Graphs

[4]:
graph = karate_club(metadata=True)
adjacency = graph.adjacency
position = graph.position
[5]:
kmeans = KMeans(n_clusters=2, embedding_method=GSVD(3))
labels = kmeans.fit_transform(adjacency)
[6]:
unique_labels, counts = np.unique(labels, return_counts=True)
print(unique_labels, counts)
[0 1] [20 14]
[7]:
image = svg_graph(adjacency, position, labels=labels)
SVG(image)
[7]:
../../_images/tutorials_clustering_kmeans_9_0.svg
[8]:
# metric
modularity(adjacency, labels)
[8]:
0.34048323471400377
[9]:
# aggregate graph
adjacency_aggregate = kmeans.aggregate_
[10]:
average = normalize(membership_matrix(labels).T)
position_aggregate = average.dot(position)
labels_unique, counts = np.unique(labels, return_counts=True)
[11]:
image = svg_graph(adjacency_aggregate, position_aggregate, counts, labels=labels_unique,
                  display_node_weight=True, node_weights=counts)
SVG(image)
[11]:
../../_images/tutorials_clustering_kmeans_13_0.svg
[12]:
# soft clustering (here probability of label 1)
scores = kmeans.membership_[:,1].toarray().ravel()
[13]:
image = svg_graph(adjacency, position, scores=scores)
SVG(image)
[13]:
../../_images/tutorials_clustering_kmeans_15_0.svg

Directed graphs

[14]:
graph = painters(metadata=True)
adjacency = graph.adjacency
position = graph.position
names = graph.names
[15]:
kmeans = KMeans(3, GSVD(3), co_cluster=False)
labels = kmeans.fit_transform(adjacency)
[16]:
image = svg_digraph(adjacency, position, names=names, labels=labels)
SVG(image)
[16]:
../../_images/tutorials_clustering_kmeans_19_0.svg
[17]:
modularity(adjacency, labels)
[17]:
0.10760000000000008
[18]:
# aggregate graph
adjacency_aggregate = kmeans.aggregate_
[19]:
average = normalize(membership_matrix(labels).T)
position_aggregate = average.dot(position)
labels_unique, counts = np.unique(labels, return_counts=True)
[20]:
image = svg_digraph(adjacency_aggregate, position_aggregate, counts, labels=labels_unique,
                    display_node_weight=True, node_weights=counts)
SVG(image)
[20]:
../../_images/tutorials_clustering_kmeans_23_0.svg
[21]:
# soft clustering (probability of label 0)
scores = kmeans.membership_[:,0].toarray().ravel()
[22]:
image = svg_digraph(adjacency, position, scores=scores)
[23]:
SVG(image)
[23]:
../../_images/tutorials_clustering_kmeans_26_0.svg

Bipartite graphs

[24]:
graph = movie_actor(metadata=True)
biadjacency = graph.biadjacency
names_row = graph.names_row
names_col = graph.names_col
[25]:
kmeans = KMeans(3, GSVD(3), co_cluster=True)
kmeans.fit(biadjacency)
labels_row = kmeans.labels_row_
labels_col = kmeans.labels_col_
[26]:
image = svg_bigraph(biadjacency, names_row, names_col, labels_row, labels_col)
SVG(image)
[26]:
../../_images/tutorials_clustering_kmeans_30_0.svg
[27]:
# metric
bimodularity(biadjacency, labels_row, labels_col)
[27]:
0.4943310657596373
[28]:
# aggregate graph
biadjacency_aggregate = kmeans.aggregate_
[29]:
labels_unique_row, counts_row = np.unique(labels_row, return_counts=True)
labels_unique_col, counts_col = np.unique(labels_col, return_counts=True)
[30]:
image = svg_bigraph(biadjacency_aggregate, counts_row, counts_col, labels_unique_row, labels_unique_col,
                    display_node_weight=True, node_weights_row=counts_row, node_weights_col=counts_col)
SVG(image)
[30]:
../../_images/tutorials_clustering_kmeans_34_0.svg
[31]:
# soft clustering (here probability of label 1)
scores_row = kmeans.membership_row_[:,1].toarray().ravel()
scores_col = kmeans.membership_col_[:,1].toarray().ravel()
[32]:
image = svg_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col)
SVG(image)
[32]:
../../_images/tutorials_clustering_kmeans_36_0.svg