Nearest neighbors

This notebook illustrates the classification of the nodes of a graph by the k-nearest neighbors algorithm, using the labels known for some nodes.

[1]:
from IPython.display import SVG
[2]:
import numpy as np
[3]:
from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.classification import NNClassifier, get_accuracy_score
from sknetwork.embedding import Spectral
from sknetwork.visualization import svg_graph, visualize_bigraph

Graphs

[4]:
graph = karate_club(metadata=True)
adjacency = graph.adjacency
position = graph.position
labels_true = graph.labels
[5]:
labels = {i: labels_true[i] for i in [0, 33]}
[6]:
# Nearest neighbors with cosine similarity
classifier = NNClassifier(n_neighbors=1)
labels_pred = classifier.fit_predict(adjacency, labels)
[7]:
accuracy = get_accuracy_score(labels_true, labels_pred)
np.round(accuracy, 2)
[7]:
0.82
[8]:
image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)
SVG(image)
[8]:
../../_images/tutorials_classification_knn_10_0.svg
[9]:
# Nearest neighbors in embedding space
classifier = NNClassifier(n_neighbors=1, embedding_method=Spectral(2))
labels_pred = classifier.fit_predict(adjacency, labels)
[10]:
accuracy = get_accuracy_score(labels_true, labels_pred)
np.round(accuracy, 2)
[10]:
0.97
[11]:
image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)
SVG(image)
[11]:
../../_images/tutorials_classification_knn_13_0.svg
[12]:
# distribution over labels
labels = {i: labels_true[i] for i in [0, 1, 30, 33]}
classifier = NNClassifier(n_neighbors=3, embedding_method=Spectral(2))
classifier.fit(adjacency, labels)
probs = classifier.predict_proba()
[13]:
scores = probs[:,1]
[14]:
image = svg_graph(adjacency, position, scores=scores, seeds=labels)
SVG(image)
[14]:
../../_images/tutorials_classification_knn_16_0.svg

Directed graphs

[15]:
graph = painters(metadata=True)
adjacency = graph.adjacency
position = graph.position
names = graph.names
[16]:
rembrandt = 5
klimt = 6
cezanne = 11
labels = {cezanne: 0, rembrandt: 1, klimt: 2}
[17]:
classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))
labels_pred = classifier.fit_predict(adjacency, labels)
[18]:
image = svg_graph(adjacency, position, names, labels=labels_pred, seeds=labels)
SVG(image)
[18]:
../../_images/tutorials_classification_knn_21_0.svg
[19]:
# probability distribution over labels (here for scores for the label of Cezanne)
probs = classifier.predict_proba()
scores = probs[:,0]
[20]:
image = svg_graph(adjacency, position, names, scores=scores, seeds=[cezanne])
SVG(image)
[20]:
../../_images/tutorials_classification_knn_23_0.svg

Bipartite graphs

[21]:
graph = movie_actor(metadata=True)
biadjacency = graph.biadjacency
names_row = graph.names_row
names_col = graph.names_col
[22]:
inception = 0
drive = 3
budapest = 8
[23]:
labels_row = {inception: 0, drive: 1, budapest: 2}
[24]:
classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))
classifier.fit(biadjacency, labels_row)
labels_row_pred = classifier.labels_row_
labels_col_pred = classifier.labels_col_
[25]:
inception = 0
drive = 3
budapest = 8
[26]:
labels_row = {inception: 0, drive: 1, budapest: 2}
[27]:
classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))
classifier.fit(biadjacency, labels_row)
labels_row_pred = classifier.labels_row_
labels_col_pred = classifier.labels_col_
[28]:
image = visualize_bigraph(biadjacency, names_row, names_col, labels_row_pred, labels_col_pred, seeds_row=labels_row)
SVG(image)
[28]:
../../_images/tutorials_classification_knn_32_0.svg
[29]:
# probability distribution over labels
probs_row = classifier.predict_proba()
probs_col = classifier.predict_proba(columns=True)
[30]:
scores_row = probs_row[:,1]
scores_col = probs_col[:,1]
[31]:
image = visualize_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col,
                    seeds_row=labels_row)
SVG(image)

[31]:
../../_images/tutorials_classification_knn_35_0.svg