{ "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "# Nearest neighbors" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "This notebook illustrates the classification of the nodes of a graph by the [k-nearest neighbors algorithm](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm), using the labels known for some nodes." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-07-15T12:29:50.554431Z", "start_time": "2019-07-15T12:29:50.414075Z" }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "from IPython.display import SVG" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "from sknetwork.data import karate_club, painters, movie_actor\n", "from sknetwork.classification import NNClassifier, get_accuracy_score\n", "from sknetwork.embedding import Spectral\n", "from sknetwork.visualization import svg_graph, visualize_bigraph" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "## Graphs" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "graph = karate_club(metadata=True)\n", "adjacency = graph.adjacency\n", "position = graph.position\n", "labels_true = graph.labels" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "labels = {i: labels_true[i] for i in [0, 33]}" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# Nearest neighbors with cosine similarity\n", "classifier = NNClassifier(n_neighbors=1)\n", "labels_pred = classifier.fit_predict(adjacency, labels)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "accuracy = get_accuracy_score(labels_true, labels_pred)\n", "np.round(accuracy, 2)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)\n", "SVG(image)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# Nearest neighbors in embedding space\n", "classifier = NNClassifier(n_neighbors=1, embedding_method=Spectral(2))\n", "labels_pred = classifier.fit_predict(adjacency, labels)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "accuracy = get_accuracy_score(labels_true, labels_pred)\n", "np.round(accuracy, 2)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "image = svg_graph(adjacency, position, labels=labels_pred, seeds=labels)\n", "SVG(image)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# distribution over labels\n", "labels = {i: labels_true[i] for i in [0, 1, 30, 33]}\n", "classifier = NNClassifier(n_neighbors=3, embedding_method=Spectral(2))\n", "classifier.fit(adjacency, labels)\n", "probs = classifier.predict_proba()" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "scores = probs[:,1]" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "image = svg_graph(adjacency, position, scores=scores, seeds=labels)\n", "SVG(image)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "## Directed graphs" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "graph = painters(metadata=True)\n", "adjacency = graph.adjacency\n", "position = graph.position\n", "names = graph.names" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "rembrandt = 5\n", "klimt = 6\n", "cezanne = 11\n", "labels = {cezanne: 0, rembrandt: 1, klimt: 2}" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))\n", "labels_pred = classifier.fit_predict(adjacency, labels)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "image = svg_graph(adjacency, position, names, labels=labels_pred, seeds=labels)\n", "SVG(image)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# probability distribution over labels (here for scores for the label of Cezanne)\n", "probs = classifier.predict_proba()\n", "scores = probs[:,0]" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "image = svg_graph(adjacency, position, names, scores=scores, seeds=[cezanne])\n", "SVG(image)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "## Bipartite graphs" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "graph = movie_actor(metadata=True)\n", "biadjacency = graph.biadjacency\n", "names_row = graph.names_row\n", "names_col = graph.names_col" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "inception = 0\n", "drive = 3\n", "budapest = 8" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "labels_row = {inception: 0, drive: 1, budapest: 2}" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))\n", "classifier.fit(biadjacency, labels_row)\n", "labels_row_pred = classifier.labels_row_\n", "labels_col_pred = classifier.labels_col_" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 65, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "inception = 0\n", "drive = 3\n", "budapest = 8" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "labels_row = {inception: 0, drive: 1, budapest: 2}" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "classifier = NNClassifier(n_neighbors=2, embedding_method=Spectral(3))\n", "classifier.fit(biadjacency, labels_row)\n", "labels_row_pred = classifier.labels_row_\n", "labels_col_pred = classifier.labels_col_" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "InceptionThe Dark Knight RisesThe Big ShortDriveThe Great GatsbyLa La LandCrazy Stupid LoveViceThe Grand Budapest HotelAviator007 SpectreInglourious BasterdsMidnight In ParisMurder on the Orient ExpressFantastic Beasts 2Leonardo DiCaprioMarion CotillardJoseph Gordon LewittChristian BaleRyan GoslingBrad PittCarey MulliganEmma StoneSteve CarellLea SeydouxRalph FiennesJude LawWillem DafoeChristophe WaltzJohnny DeppOwen Wilson" ], "text/plain": [ "" ] }, "execution_count": 68, "metadata": {}, "output_type": "execute_result" } ], "source": [ "image = visualize_bigraph(biadjacency, names_row, names_col, labels_row_pred, labels_col_pred, seeds_row=labels_row)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# probability distribution over labels\n", "probs_row = classifier.predict_proba()\n", "probs_col = classifier.predict_proba(columns=True)" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "scores_row = probs_row[:,1]\n", "scores_col = probs_col[:,1]" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "InceptionThe Dark Knight RisesThe Big ShortDriveThe Great GatsbyLa La LandCrazy Stupid LoveViceThe Grand Budapest HotelAviator007 SpectreInglourious BasterdsMidnight In ParisMurder on the Orient ExpressFantastic Beasts 2Leonardo DiCaprioMarion CotillardJoseph Gordon LewittChristian BaleRyan GoslingBrad PittCarey MulliganEmma StoneSteve CarellLea SeydouxRalph FiennesJude LawWillem DafoeChristophe WaltzJohnny DeppOwen Wilson" ], "text/plain": [ "" ] }, "execution_count": 71, "metadata": {}, "output_type": "execute_result" } ], "source": [ "image = visualize_bigraph(biadjacency, names_row, names_col, scores_row=scores_row, scores_col=scores_col,\n", " seeds_row=labels_row)\n", "SVG(image)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "pycharm": { "stem_cell": { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [] } }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }