Nearest neighbors

This notebook illustrates the classification of the nodes of a graph by the k-nearest neighbors algorithm, based on the labels of a few nodes.

[1]:
from IPython.display import SVG
[2]:
import numpy as np
[3]:
from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.classification import KNN
from sknetwork.embedding import GSVD
from sknetwork.visualization import svg_graph, svg_digraph, svg_bigraph

Graphs

[4]:
graph = karate_club(metadata=True)
adjacency = graph.adjacency
position = graph.position
labels_true = graph.labels
[5]:
seeds = {i: labels_true[i] for i in [0, 33]}
[6]:
knn = KNN(GSVD(3), n_neighbors=1)
labels_pred = knn.fit_transform(adjacency, seeds)
[7]:
precision = np.round(np.mean(labels_pred == labels_true), 2)
precision
[7]:
0.97
[8]:
image = svg_graph(adjacency, position, labels=labels_pred, seeds=seeds)
SVG(image)
[8]:
../../_images/tutorials_classification_knn_10_0.svg
[9]:
# soft classification (here probability of label 1)
knn = KNN(GSVD(3), n_neighbors=2)
knn.fit(adjacency, seeds)
membership = knn.membership_
[10]:
scores = membership[:,1].toarray().ravel()
[11]:
image = svg_graph(adjacency, position, scores=scores, seeds=seeds)
SVG(image)
[11]:
../../_images/tutorials_classification_knn_13_0.svg

Directed graphs

[12]:
graph = painters(metadata=True)
adjacency = graph.adjacency
position = graph.position
names = graph.names
[13]:
rembrandt = 5
klimt = 6
cezanne = 11
seeds = {cezanne: 0, rembrandt: 1, klimt: 2}
[14]:
knn = KNN(GSVD(3), n_neighbors=2)
labels = knn.fit_transform(adjacency, seeds)
[15]:
image = svg_digraph(adjacency, position, names, labels=labels, seeds=seeds)
SVG(image)
[15]:
../../_images/tutorials_classification_knn_18_0.svg
[16]:
# soft classification
membership = knn.membership_
scores = membership[:,0].toarray().ravel()
[17]:
image = svg_digraph(adjacency, position, names, scores=scores, seeds=[cezanne])
SVG(image)
[17]:
../../_images/tutorials_classification_knn_20_0.svg

Bipartite graphs

[18]:
graph = movie_actor(metadata=True)
biadjacency = graph.biadjacency
names_row = graph.names_row
names_col = graph.names_col
[19]:
inception = 0
drive = 3
budapest = 8
[20]:
seeds_row = {inception: 0, drive: 1, budapest: 2}
[21]:
knn = KNN(GSVD(3), n_neighbors=2)
labels_row = knn.fit_transform(biadjacency, seeds_row)
labels_col = knn.labels_col_
[22]:
image = svg_bigraph(biadjacency, names_row, names_col, labels_row, labels_col, seeds_row=seeds_row)
SVG(image)
[22]:
../../_images/tutorials_classification_knn_26_0.svg
[23]:
# soft classification
membership_row = knn.membership_row_
membership_col = knn.membership_col_
[24]:
scores_row = membership_row[:,1].toarray().ravel()
scores_col = membership_col[:,1].toarray().ravel()
[25]:
image = svg_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col,
                    seeds_row=seeds_row)
SVG(image)
[25]:
../../_images/tutorials_classification_knn_29_0.svg