{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# Nearest neighbors"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"This notebook illustrates the classification of the nodes of a graph by the [k-nearest neighbors algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm), using the labels known for some nodes."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-15T12:29:50.554431Z",
"start_time": "2019-07-15T12:29:50.414075Z"
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"from IPython.display import SVG"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from sknetwork.data import karate_club, painters, movie_actor\n",
"from sknetwork.classification import NNClassifier, get_accuracy_score\n",
"from sknetwork.embedding import Spectral\n",
"from sknetwork.visualization import svg_graph, visualize_bigraph"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## Graphs"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"graph = karate_club(metadata=True)\n",
"adjacency = graph.adjacency\n",
"position = graph.position\n",
"labels_true = graph.labels"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"labels = {i: labels_true[i] for i in [0, 33]}"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Nearest neighbors with cosine similarity\n",
"classifier = NNClassifier(n_neighbors=1)\n",
"labels_pred = classifier.fit_predict(adjacency, labels)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"accuracy = get_accuracy_score(labels_true, labels_pred)\n",
"np.round(accuracy, 2)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)\n",
"SVG(image)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# Nearest neighbors in embedding space\n",
"classifier = NNClassifier(n_neighbors=1, embedding_method=Spectral(2))\n",
"labels_pred = classifier.fit_predict(adjacency, labels)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"accuracy = get_accuracy_score(labels_true, labels_pred)\n",
"np.round(accuracy, 2)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)\n",
"SVG(image)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# distribution over labels\n",
"labels = {i: labels_true[i] for i in [0, 1, 30, 33]}\n",
"classifier = NNClassifier(n_neighbors=3, embedding_method=Spectral(2))\n",
"classifier.fit(adjacency, labels)\n",
"probs = classifier.predict_proba()"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"scores = probs[:,1]"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"image = svg_graph(adjacency, position, scores=scores, seeds=labels)\n",
"SVG(image)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## Directed graphs"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"graph = painters(metadata=True)\n",
"adjacency = graph.adjacency\n",
"position = graph.position\n",
"names = graph.names"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"rembrandt = 5\n",
"klimt = 6\n",
"cezanne = 11\n",
"labels = {cezanne: 0, rembrandt: 1, klimt: 2}"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))\n",
"labels_pred = classifier.fit_predict(adjacency, labels)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"image = svg_graph(adjacency, position, names, labels=labels_pred, seeds=labels)\n",
"SVG(image)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"# probability distribution over labels (here for scores for the label of Cezanne)\n",
"probs = classifier.predict_proba()\n",
"scores = probs[:,0]"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"image = svg_graph(adjacency, position, names, scores=scores, seeds=[cezanne])\n",
"SVG(image)"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## Bipartite graphs"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"graph = movie_actor(metadata=True)\n",
"biadjacency = graph.biadjacency\n",
"names_row = graph.names_row\n",
"names_col = graph.names_col"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"inception = 0\n",
"drive = 3\n",
"budapest = 8"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"labels_row = {inception: 0, drive: 1, budapest: 2}"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))\n",
"classifier.fit(biadjacency, labels_row)\n",
"labels_row_pred = classifier.labels_row_\n",
"labels_col_pred = classifier.labels_col_"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"inception = 0\n",
"drive = 3\n",
"budapest = 8"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"labels_row = {inception: 0, drive: 1, budapest: 2}"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))\n",
"classifier.fit(biadjacency, labels_row)\n",
"labels_row_pred = classifier.labels_row_\n",
"labels_col_pred = classifier.labels_col_"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
""
],
"text/plain": [
""
]
},
"execution_count": 68,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"image = visualize_bigraph(biadjacency, names_row, names_col, labels_row_pred, labels_col_pred, seeds_row=labels_row)\n",
"SVG(image)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"# probability distribution over labels\n",
"probs_row = classifier.predict_proba()\n",
"probs_col = classifier.predict_proba(columns=True)"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"scores_row = probs_row[:,1]\n",
"scores_col = probs_col[:,1]"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"image/svg+xml": [
""
],
"text/plain": [
""
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"image = visualize_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col,\n",
" seeds_row=labels_row)\n",
"SVG(image)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
},
"pycharm": {
"stem_cell": {
"cell_type": "raw",
"metadata": {
"collapsed": false
},
"source": []
}
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}