Source code for sknetwork.embedding.metrics

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Nov 6 2018
@author: Nathan De Lara <ndelara@enst.fr>
Quality metrics for adjacency embeddings
"""
import numpy as np

from sknetwork.linalg import normalize
from sknetwork.utils.check import check_format, get_probs, check_square


[docs]def cosine_modularity(adjacency, embedding: np.ndarray, embedding_col=None, resolution=1., weights='degree',
                      return_all: bool = False):
    """Quality metric of an embedding :math:`x` defined by:

    :math:`Q = \\sum_{ij}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right)
    \\left(\\dfrac{1 + \\cos(x_i, x_j)}{2}\\right)`

    where

    * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n
    * :math:`w = 1^TA1` is the total weight of the graph.

    For bipartite graphs with column embedding :math:`y`, the metric is

    :math:`Q = \\sum_{ij}\\left(\\dfrac{B_{ij}}{w} - \\gamma \\dfrac{w_{1,i}w_{2,j}}{w^2}\\right)
    \\left(\\dfrac{1 + \\cos(x_i, y_j)}{2}\\right)`

    where

    * :math:`w_{1,i}, w_{2,j}` are the weights of nodes :math:`i` (row) and :math:`j` (column),\n
    * :math:`w = 1^TB1` is the total weight of the graph.

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    embedding :
        Embedding of the nodes.
    embedding_col :
        Embedding of the columns (for bipartite graphs).
    resolution :
        Resolution parameter.
    weights : ``'degree'`` or ``'uniform'``
        Weights of the nodes.
    return_all :
        If ``True``, also return fit and diversity

    Returns
    -------
    modularity : float
    fit: float, optional
    diversity: float, optional

    Example
    -------
    >>> from sknetwork.embedding import cosine_modularity
    >>> from sknetwork.data import karate_club
    >>> graph = karate_club(metadata=True)
    >>> adjacency = graph.adjacency
    >>> embedding = graph.position
    >>> np.round(cosine_modularity(adjacency, embedding), 2)
    0.35
    """
    adjacency = check_format(adjacency)
    total_weight: float = adjacency.data.sum()

    if embedding_col is None:
        check_square(adjacency)
        embedding_col = embedding.copy()

    embedding_row_norm = normalize(embedding, p=2)
    embedding_col_norm = normalize(embedding_col, p=2)

    probs_row = get_probs(weights, adjacency)
    probs_col = get_probs(weights, adjacency.T)

    fit: float = 0.5 * (1 + (np.multiply(embedding_row_norm, adjacency.dot(embedding_col_norm))).sum() / total_weight)
    div: float = 0.5 * (1 + (embedding.T.dot(probs_row)).dot(embedding_col.T.dot(probs_col)))

    if return_all:
        return fit, div, fit - resolution * div
    else:
        return fit - resolution * div