Diffusion
This notebook illustrates the classification of the nodes of a graph by diffusion, based on the labels of a few nodes.
[1]:
from IPython.display import SVG
[2]:
import numpy as np
[3]:
from sknetwork.data import karate_club, painters, movie_actor
from sknetwork.classification import DiffusionClassifier, get_accuracy_score
from sknetwork.visualization import visualize_graph, visualize_bigraph
Graphs
[4]:
graph = karate_club(metadata=True)
adjacency = graph.adjacency
position = graph.position
labels_true = graph.labels
[5]:
labels = {i: labels_true[i] for i in [0, 33]}
[6]:
diffusion = DiffusionClassifier()
labels_pred = diffusion.fit_predict(adjacency, labels)
[7]:
accuracy = get_accuracy_score(labels_true, labels_pred)
np.round(accuracy, 2)
[7]:
np.float64(0.97)
[8]:
image = visualize_graph(adjacency, position, labels=labels_pred, seeds=labels)
SVG(image)
[8]:
[9]:
# probability distribution over labels
probs = diffusion.predict_proba()
[10]:
probs
[10]:
array([[0.0061429 , 0.9938571 ],
[0.12913582, 0.87086418],
[0.43828759, 0.56171241],
[0.08339471, 0.91660529],
[0.0080928 , 0.9919072 ],
[0.00879691, 0.99120309],
[0.00879691, 0.99120309],
[0.08194841, 0.91805159],
[0.69237072, 0.30762928],
[0.91084856, 0.08915144],
[0.0080928 , 0.9919072 ],
[0.0061429 , 0.9938571 ],
[0.02313087, 0.97686913],
[0.27884655, 0.72115345],
[0.97980535, 0.02019465],
[0.97980535, 0.02019465],
[0.0098036 , 0.9901964 ],
[0.02932703, 0.97067297],
[0.97980535, 0.02019465],
[0.3329027 , 0.6670973 ],
[0.97980535, 0.02019465],
[0.02932703, 0.97067297],
[0.97980535, 0.02019465],
[0.96024402, 0.03975598],
[0.87259101, 0.12740899],
[0.89263086, 0.10736914],
[0.98710892, 0.01289108],
[0.91730386, 0.08269614],
[0.88166224, 0.11833776],
[0.97813879, 0.02186121],
[0.84039367, 0.15960633],
[0.80588381, 0.19411619],
[0.94725549, 0.05274451],
[0.99270832, 0.00729168]])
[11]:
# label 1
scores = probs[:, 1]
[12]:
image = visualize_graph(adjacency, position, scores=scores, seeds=labels)
SVG(image)
[12]:
Directed graphs
[13]:
graph = painters(metadata=True)
adjacency = graph.adjacency
position = graph.position
names = graph.names
[14]:
rembrandt = 5
cezanne = 11
labels = {cezanne: 0, rembrandt: 1}
[15]:
diffusion = DiffusionClassifier()
labels_pred = diffusion.fit_predict(adjacency, labels)
[16]:
image = visualize_graph(adjacency, position, names, labels=labels_pred, seeds=labels)
SVG(image)
[16]:
[17]:
# probability distribution over labels
probs = diffusion.predict_proba()
[18]:
# label 0
scores = probs[:, 0]
[19]:
image = visualize_graph(adjacency, position, names=names, scores=scores, seeds=[cezanne])
SVG(image)
[19]:
[20]:
graph = movie_actor(metadata=True)
biadjacency = graph.biadjacency
names_row = graph.names_row
names_col = graph.names_col
[21]:
inception = 0
drive = 3
[22]:
labels_row = {inception: 0, drive: 1}
[23]:
diffusion = DiffusionClassifier()
diffusion.fit(biadjacency, labels_row)
labels_row_pred = diffusion.labels_row_
labels_col_pred = diffusion.labels_col_
[24]:
image = visualize_bigraph(biadjacency, names_row, names_col, labels_row_pred, labels_col_pred, seeds_row=labels_row)
SVG(image)
[24]:
[25]:
# probability distributions
probs_row = diffusion.predict_proba()
probs_col = diffusion.predict_proba(columns=True)
[26]:
# probability of label 1
scores_row = probs_row[:,1]
scores_col = probs_col[:,1]
[27]:
image = visualize_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col,
seeds_row=labels_row)
SVG(image)
[27]: