Source code for sknetwork.embedding.random_projection

#!/usr/bin/env python3
# coding: utf-8
"""
Created in January 2021
@author: Thomas Bonald <bonald@enst.fr>
"""
from abc import ABC
from typing import Union

import numpy as np
from scipy import sparse

from sknetwork.embedding.base import BaseEmbedding
from sknetwork.linalg import Regularizer, Normalizer, normalize
from sknetwork.utils.check import check_format, check_random_state
from sknetwork.utils.format import get_adjacency



[docs]
class RandomProjection(BaseEmbedding, ABC):
    r"""Embedding of graphs based the random projection of the adjacency matrix:

    :math:`(I + \\alpha A +... + (\\alpha A)^K)G`

    where :math:`A` is the adjacency matrix, :math:`G` is a random Gaussian matrix,
    :math:`\\alpha` is some smoothing factor and :math:`K` some non-negative integer.

    Parameters
    ----------
    n_components : int (default = 2)
        Dimension of the embedding space.
    alpha : float (default = 0.5)
        Smoothing parameter.
    n_iter : int (default = 3)
        Number of power iterations of the adjacency matrix.
    random_walk : bool (default = ``False``)
        If ``True``, use the transition matrix of the random walk, :math:`P = D^{-1}A`, instead of the adjacency matrix.
    regularization : float (default = ``-1``)
        Regularization factor :math:`\\alpha` so that the matrix is :math:`A + \\alpha \\frac{11^T}{n}`.
        If negative, regularization is applied only if the graph is disconnected (and then equal to the absolute value
        of the parameter).
    normalized : bool (default = ``True``)
        If ``True``, normalize the embedding so that each vector has norm 1 in the embedding space, i.e.,
        each vector lies on the unit sphere.
    random_state : int, optional
        Seed used by the random number generator.

    Attributes
    ----------
    embedding_ : array, shape = (n_nodes, n_components)
        Embedding of the nodes.

    Example
    -------
    >>> from sknetwork.embedding import RandomProjection
    >>> from sknetwork.data import karate_club
    >>> projection = RandomProjection()
    >>> adjacency = karate_club()
    >>> embedding = projection.fit_transform(adjacency)
    >>> embedding.shape
    (34, 2)

    References
    ----------
    Zhang, Z., Cui, P., Li, H., Wang, X., & Zhu, W. (2018).
    Billion-scale network embedding with iterative random projection, ICDM.
    """
    def __init__(self, n_components: int = 2, alpha: float = 0.5, n_iter: int = 3, random_walk: bool = False,
                 regularization: float = -1, normalized: bool = True, random_state: int = None):
        super(RandomProjection, self).__init__()

        self.embedding_ = None
        self.n_components = n_components
        self.alpha = alpha
        self.n_iter = n_iter
        self.random_walk = random_walk
        self.regularization = regularization
        self.normalized = normalized
        self.random_state = random_state
        self.bipartite = None
        self.regularized = None


[docs]
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) \
            -> 'RandomProjection':
        """Compute the graph embedding.

        Parameters
        ----------
        input_matrix : sparse.csr_matrix, np.ndarray
              Adjacency matrix or biadjacency matrix of the graph.
        force_bipartite : bool (default = ``False``)
            If ``True``, force the input matrix to be considered as a biadjacency matrix.
        Returns
        -------
        self: :class:`RandomProjection`
        """
        # input
        input_matrix = check_format(input_matrix)
        adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
        n = adjacency.shape[0]

        # regularization
        regularization = self._get_regularization(self.regularization, adjacency)
        self.regularized = regularization > 0

        # multiplier
        if self.random_walk:
            multiplier = Normalizer(adjacency, regularization)
        else:
            multiplier = Regularizer(adjacency, regularization)

        # random matrix
        random_generator = check_random_state(self.random_state)
        random_matrix = random_generator.normal(size=(n, self.n_components))
        random_matrix, _ = np.linalg.qr(random_matrix)

        # random projection
        factor = random_matrix
        embedding = factor.copy()
        for t in range(self.n_iter):
            factor = self.alpha * multiplier.dot(factor)
            embedding += factor

        # normalization
        if self.normalized:
            embedding = normalize(embedding, p=2)

        # output
        self.embedding_ = embedding
        if self.bipartite:
            self._split_vars(input_matrix.shape)
        return self