{ "cells": [ { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "# Recommendation" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "This notebook shows how to apply [scikit-network](https://scikit-network.readthedocs.io/) for content recommendation.\n", "\n", "We use consider the [Movielens](https://netset.telecom-paris.fr/pages/movielens.html) dataset of the [netset](https://netset.telecom-paris.fr) collection, corresponding to ratings of 9066 movies by 671 users." ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "from IPython.display import SVG" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import numpy as np\n", "from scipy.cluster.hierarchy import linkage" ] }, { "cell_type": "code", "execution_count": 122, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "from sknetwork.data import load_netset\n", "from sknetwork.ranking import PageRank, top_k\n", "from sknetwork.embedding import Spectral\n", "from sknetwork.utils import get_neighbors\n", "from sknetwork.visualization import visualize_dendrogram" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "## Data" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Parsing files...\n", "Done.\n" ] } ], "source": [ "dataset = load_netset('movielens')" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "biadjacency = dataset.biadjacency\n", "names = dataset.names\n", "labels = dataset.labels\n", "names_labels = dataset.names_labels" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "<9066x671 sparse matrix of type ''\n", "\twith 100004 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "biadjacency" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "n_movies, n_users = biadjacency.shape" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "(array([0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. ]),\n", " array([ 1101, 3326, 1687, 7271, 4449, 20064, 10538, 28750, 7723,\n", " 15095]))" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# ratings\n", "np.unique(biadjacency.data, return_counts=True)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "# positive ratings\n", "positive = biadjacency >= 3" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "<9066x671 sparse matrix of type ''\n", "\twith 82170 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "positive" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "array(['Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime',\n", " 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'IMAX',\n", " 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War',\n", " 'Western'], dtype='Forrest Gump (1994)Pulp Fiction (1994)Shawshank Redemption, The (1994)Silence of the Lambs, The (1991)Star Wars: Episode IV - A New Hope (1977)Matrix, The (1999)Jurassic Park (1993)Schindler's List (1993)Back to the Future (1985)Star Wars: Episode V - The Empire Strikes Back (1980)Toy Story (1995)American Beauty (1999)Terminator 2: Judgment Day (1991)Fargo (1996)Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)Star Wars: Episode VI - Return of the Jedi (1983)Godfather, The (1972)Braveheart (1995)Fight Club (1999)Fugitive, The (1993)Usual Suspects, The (1995)Lord of the Rings: The Fellowship of the Ring, The (2001)Aladdin (1992)Sixth Sense, The (1999)Seven (a.k.a. Se7en) (1995)Twelve Monkeys (a.k.a. 12 Monkeys) (1995)Apollo 13 (1995)Saving Private Ryan (1998)Lion King, The (1994)Lord of the Rings: The Two Towers, The (2002)Independence Day (a.k.a. ID4) (1996)Men in Black (a.k.a. MIB) (1997)Dances with Wolves (1990)Lord of the Rings: The Return of the King, The (2003)Batman (1989)Princess Bride, The (1987)Shrek (2001)Good Will Hunting (1997)True Lies (1994)Beauty and the Beast (1991)Terminator, The (1984)Gladiator (2000)Groundhog Day (1993)E.T. the Extra-Terrestrial (1982)Speed (1994)Monty Python and the Holy Grail (1975)Blade Runner (1982)One Flew Over the Cuckoo's Nest (1975)Mission: Impossible (1996)Die Hard (1988)Indiana Jones and the Last Crusade (1989)Ghostbusters (a.k.a. Ghost Busters) (1984)Truman Show, The (1998)Memento (2000)Pirates of the Caribbean: The Curse of the Black Pearl (2003)Godfather: Part II, The (1974)Goodfellas (1990)Willy Wonka the Chocolate Factory (1971)Reservoir Dogs (1992)Ocean's Eleven (2001)Monsters, Inc. (2001)Léon: The Professional (a.k.a. The Professional) (Léon) (1994)Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)L.A. Confidential (1997)Mrs. Doubtfire (1993)Ferris Bueller's Day Off (1986)Trainspotting (1996)Dark Knight, The (2008)Incredibles, The (2004)Alien (1979)Clockwork Orange, A (1971)Babe (1995)Spider-Man (2002)Being John Malkovich (1999)Taxi Driver (1976)Titanic (1997)Pretty Woman (1990)Finding Nemo (2003)Rain Man (1988)Rock, The (1996)Big Lebowski, The (1998)Casablanca (1942)Toy Story 2 (1999)Shakespeare in Love (1998)Beautiful Mind, A (2001)Breakfast Club, The (1985)Die Hard: With a Vengeance (1995)Aliens (1986)2001: A Space Odyssey (1968)Wizard of Oz, The (1939)Apocalypse Now (1979)Inception (2010)X-Men (2000)Crouching Tiger, Hidden Dragon (Wo hu cang long) (2000)Eternal Sunshine of the Spotless Mind (2004)Mask, The (1994)Stargate (1994)Twister (1996)Fifth Element, The (1997)Dumb Dumber (Dumb and Dumber) (1994)" ], "text/plain": [ "" ] }, "execution_count": 149, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# visualization\n", "image = visualize_dendrogram(dendrogram, names=names[index], rotate=True, width=200, height=1000, n_clusters=6)\n", "SVG(image)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 4 }