{ "cells": [ { "cell_type": "markdown", "metadata": { "collapsed": true, "pycharm": { "name": "#%% md\n" } }, "source": [ "# Louvain hierarchy" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "This notebook illustrates the hierarchical clustering of graphs by [Louvain](https://scikit-network.readthedocs.io/en/latest/reference/hierarchy.html) (successive aggregations, in a bottom-up manner)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "from IPython.display import SVG" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "from sknetwork.data import karate_club, painters, movie_actor\n", "from sknetwork.hierarchy import LouvainHierarchy\n", "from sknetwork.hierarchy import cut_straight, dasgupta_score, tree_sampling_divergence\n", "from sknetwork.visualization import visualize_graph, visualize_bigraph, visualize_dendrogram" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## Graphs" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "graph = karate_club(metadata=True)\n", "adjacency = graph.adjacency\n", "position = graph.position" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "# hierarchical clustering\n", "louvain = LouvainHierarchy()\n", "dendrogram = louvain.fit_predict(adjacency)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_dendrogram(dendrogram)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" }, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0 0 3 3 3 0 1 0 3 0 0 0 1 1 3 0 1 0 1 0 1 2 2 2 1 2 2 1 1 2 1 1]\n" ] } ], "source": [ "# cuts\n", "labels = cut_straight(dendrogram)\n", "print(labels)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 0 0 3 3 3 0 1 0 3 0 0 0 1 1 3 0 1 0 1 0 1 2 2 2 1 2 2 1 1 2 1 1]\n" ] } ], "source": [ "labels, dendrogram_aggregate = cut_straight(dendrogram, n_clusters=4, return_dendrogram=True)\n", "print(labels)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "_, counts = np.unique(labels, return_counts=True)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_dendrogram(dendrogram_aggregate, names=counts, rotate_names=False)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_graph(adjacency, position, labels=labels)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "0.5878582202111614" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# metrics\n", "dasgupta_score(adjacency, dendrogram)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## Directed graphs" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "graph = painters(metadata=True)\n", "adjacency = graph.adjacency\n", "position = graph.position\n", "names = graph.names" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "# hierarchical clustering\n", "louvain = LouvainHierarchy()\n", "dendrogram = louvain.fit_predict(adjacency)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_dendrogram(dendrogram, names, rotate=True)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1 0 2 0 2 2 1 0 1 2 1 0 0 1]\n" ] } ], "source": [ "# cut with 3 clusters\n", "labels = cut_straight(dendrogram, n_clusters = 3)\n", "print(labels)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_graph(adjacency, position, names=names, labels=labels)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "pycharm": { "is_executing": false, "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "0.4842857142857142" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# metrics\n", "dasgupta_score(adjacency, dendrogram)" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## Bipartite graphs" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "graph = movie_actor(metadata=True)\n", "biadjacency = graph.biadjacency\n", "names_row = graph.names_row\n", "names_col = graph.names_col" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# hierarchical clustering\n", "louvain = LouvainHierarchy()\n", "louvain.fit(biadjacency)\n", "dendrogram_row = louvain.dendrogram_row_\n", "dendrogram_col = louvain.dendrogram_col_\n", "dendrogram_full = louvain.dendrogram_full_" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_dendrogram(dendrogram_row, names_row, n_clusters=4, rotate=True)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_dendrogram(dendrogram_col, names_col, n_clusters=4, rotate=True)\n", "SVG(image)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# cuts\n", "labels = cut_straight(dendrogram_full, n_clusters = 4)\n", "n_row = biadjacency.shape[0]\n", "labels_row = labels[:n_row]\n", "labels_col = labels[n_row:]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "image = visualize_bigraph(biadjacency, names_row, names_col, labels_row, labels_col)\n", "SVG(image)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "pycharm": { "stem_cell": { "cell_type": "raw", "metadata": { "collapsed": false }, "source": [] } }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 1 }