Leiden

This notebook illustrates the clustering of a graph by the Leiden algorithm.

[1]:
from IPython.display import SVG
[2]:
import numpy as np
[3]:
from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.clustering import Leiden, get_modularity
from sknetwork.linalg import normalize
from sknetwork.utils import get_membership
from sknetwork.visualization import visualize_graph, visualize_bigraph

Graphs

[4]:
graph = karate_club(metadata=True)
adjacency = graph.adjacency
position = graph.position
[5]:
leiden = Leiden()
labels = leiden.fit_predict(adjacency)
[6]:
labels_unique, counts = np.unique(labels, return_counts=True)
print(labels_unique, counts)
[0 1 2 3] [12 11  6  5]
[7]:
image = visualize_graph(adjacency, position, labels=labels)
SVG(image)
[7]:
../../_images/tutorials_clustering_leiden_10_0.svg
[8]:
# metric
get_modularity(adjacency, labels)
[8]:
np.float64(0.4188034188034188)
[9]:
# aggregate graph (one node per cluster)
adjacency_aggregate = leiden.aggregate_
[10]:
average = normalize(get_membership(labels).T)
position_aggregate = average.dot(position)
labels_unique, counts = np.unique(labels, return_counts=True)
[11]:
image = visualize_graph(adjacency_aggregate, position_aggregate, counts, labels=labels_unique,
                  display_node_weight=True, node_weights=counts)
SVG(image)
[11]:
../../_images/tutorials_clustering_leiden_14_0.svg
[12]:
# probability distribution over clusters
probs = leiden.predict_proba()
[13]:
# scores for cluster 1
scores = probs[:,1]
[14]:
image = visualize_graph(adjacency, position, scores=scores)
SVG(image)
[14]:
../../_images/tutorials_clustering_leiden_17_0.svg

Directed graphs

[15]:
graph = painters(metadata=True)
adjacency = graph.adjacency
names = graph.names
position = graph.position
[16]:
# clustering
leiden = Leiden()
labels = leiden.fit_predict(adjacency)
[17]:
labels_unique, counts = np.unique(labels, return_counts=True)
print(labels_unique, counts)
[0 1 2] [5 5 4]
[18]:
image = visualize_graph(adjacency, position, names=names, labels=labels)
SVG(image)
[18]:
../../_images/tutorials_clustering_leiden_22_0.svg
[19]:
get_modularity(adjacency, labels)
[19]:
np.float64(0.32480000000000003)
[20]:
# aggregate graph
adjacency_aggregate = leiden.aggregate_
[21]:
average = normalize(get_membership(labels).T)
position_aggregate = average.dot(position)
labels_unique, counts = np.unique(labels, return_counts=True)
[22]:
image = visualize_graph(adjacency_aggregate, position_aggregate, counts, labels=labels_unique,
                    display_node_weight=True, node_weights=counts)
SVG(image)
[22]:
../../_images/tutorials_clustering_leiden_26_0.svg
[23]:
# probability distribution over clusters
probs = leiden.predict_proba()
[24]:
# scores for cluster 1
scores = probs[:,1]
[25]:
image = visualize_graph(adjacency, position, scores=scores)
SVG(image)
[25]:
../../_images/tutorials_clustering_leiden_29_0.svg

Bipartite graphs

[26]:
graph = movie_actor(metadata=True)
biadjacency = graph.biadjacency
names_row = graph.names_row
names_col = graph.names_col
[27]:
# clustering
leiden = Leiden()
leiden.fit(biadjacency)
labels_row = leiden.labels_row_
labels_col = leiden.labels_col_
[28]:
image = visualize_bigraph(biadjacency, names_row, names_col, labels_row, labels_col)
SVG(image)
[28]:
../../_images/tutorials_clustering_leiden_33_0.svg
[29]:
# metric
get_modularity(biadjacency, labels_row, labels_col)
[29]:
np.float64(0.5565170362358031)
[30]:
# aggregate graph
biadjacency_aggregate = leiden.aggregate_
[31]:
labels_unique_row, counts_row = np.unique(labels_row, return_counts=True)
labels_unique_col, counts_col = np.unique(labels_col, return_counts=True)
[32]:
image = visualize_bigraph(biadjacency_aggregate, counts_row, counts_col, labels_unique_row, labels_unique_col,
                    display_node_weight=True, node_weights_row=counts_row, node_weights_col=counts_col)
SVG(image)
[32]:
../../_images/tutorials_clustering_leiden_37_0.svg
[33]:
# probability distribution over clusters
probs_row = leiden.predict_proba()
probs_col = leiden.predict_proba(columns=True)
[34]:
# soft clustering
scores_row = probs_row[:,1]
scores_col = probs_col[:,1]
[35]:
image = visualize_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col)
SVG(image)

[35]:
../../_images/tutorials_clustering_leiden_40_0.svg