Nearest neighbors

This notebook illustrates the classification of the nodes of a graph by the k-nearest neighbors algorithm, using the labels known for some nodes.

[1]:

from IPython.display import SVG

[2]:

import numpy as np

[3]:

from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.classification import NNClassifier, get_accuracy_score
from sknetwork.embedding import Spectral
from sknetwork.visualization import svg_graph, visualize_bigraph


Graphs

[4]:

graph = karate_club(metadata=True)
position = graph.position
labels_true = graph.labels

[5]:

labels = {i: labels_true[i] for i in [0, 33]}

[6]:

# Nearest neighbors with cosine similarity
classifier = NNClassifier(n_neighbors=1)

[7]:

accuracy = get_accuracy_score(labels_true, labels_pred)
np.round(accuracy, 2)

[7]:

0.82

[8]:

image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)
SVG(image)

[8]:

[9]:

# Nearest neighbors in embedding space
classifier = NNClassifier(n_neighbors=1, embedding_method=Spectral(2))

[10]:

accuracy = get_accuracy_score(labels_true, labels_pred)
np.round(accuracy, 2)

[10]:

0.97

[11]:

image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)
SVG(image)

[11]:

[12]:

# distribution over labels
labels = {i: labels_true[i] for i in [0, 1, 30, 33]}
classifier = NNClassifier(n_neighbors=3, embedding_method=Spectral(2))
probs = classifier.predict_proba()

[13]:

scores = probs[:,1]

[14]:

image = svg_graph(adjacency, position, scores=scores, seeds=labels)
SVG(image)

[14]:


Directed graphs

[15]:

graph = painters(metadata=True)
position = graph.position
names = graph.names

[16]:

rembrandt = 5
klimt = 6
cezanne = 11
labels = {cezanne: 0, rembrandt: 1, klimt: 2}

[17]:

classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))

[18]:

image = svg_graph(adjacency, position, names, labels=labels_pred, seeds=labels)
SVG(image)

[18]:

[19]:

# probability distribution over labels (here for scores for the label of Cezanne)
probs = classifier.predict_proba()
scores = probs[:,0]

[20]:

image = svg_graph(adjacency, position, names, scores=scores, seeds=[cezanne])
SVG(image)

[20]:


Bipartite graphs

[21]:

graph = movie_actor(metadata=True)
names_row = graph.names_row
names_col = graph.names_col

[22]:

inception = 0
drive = 3
budapest = 8

[23]:

labels_row = {inception: 0, drive: 1, budapest: 2}

[24]:

classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))
labels_row_pred = classifier.labels_row_
labels_col_pred = classifier.labels_col_

[25]:

inception = 0
drive = 3
budapest = 8

[26]:

labels_row = {inception: 0, drive: 1, budapest: 2}

[27]:

classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))
labels_row_pred = classifier.labels_row_
labels_col_pred = classifier.labels_col_

[28]:

image = visualize_bigraph(biadjacency, names_row, names_col, labels_row_pred, labels_col_pred, seeds_row=labels_row)
SVG(image)

[28]:

[29]:

# probability distribution over labels
probs_row = classifier.predict_proba()
probs_col = classifier.predict_proba(columns=True)

[30]:

scores_row = probs_row[:,1]
scores_col = probs_col[:,1]

[31]:

image = visualize_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col,
seeds_row=labels_row)
SVG(image)

[31]: