# Source code for sknetwork.embedding.spectral

#!/usr/bin/env python3
# coding: utf-8
"""
Created on September 2018
@author: Nathan de Lara <nathan.delara@polytechnique.org>
@author: Thomas Bonald <bonald@enst.fr>
"""
from typing import Union

import numpy as np
from scipy import sparse

from sknetwork.embedding.base import BaseEmbedding
from sknetwork.linalg import LanczosEig, Laplacian, Normalizer, normalize
from sknetwork.utils.check import check_format, check_adjacency_vector, check_nonnegative, check_n_components

[docs]class Spectral(BaseEmbedding):
"""Spectral embedding of graphs, based the spectral decomposition of the Laplacian matrix :math:L = D - A
or the transition matrix of the random walk :math:P = D^{-1}A (default), where :math:D is the
diagonal matrix of degrees.

Eigenvectors are considered in increasing order (for the Laplacian matrix :math:L) or decreasing order
(for the transition matrix of the random walk :math:P) of eigenvalues, skipping the first.

Parameters
----------
n_components : int (default = 2)
Dimension of the embedding space.
decomposition : str (laplacian or rw, default = rw)
Matrix used for the spectral decomposition.
regularization : float (default = -1)
Regularization factor :math:\\alpha so that the adjacency matrix is :math:A + \\alpha \\frac{11^T}{n}.
If negative, regularization is applied only if the graph is disconnected; the regularization factor
:math:\\alpha is then set to the absolute value of the parameter.
normalized : bool (default = True)
If True, normalized the embedding so that each vector has norm 1 in the embedding space, i.e.,
each vector lies on the unit sphere.
Attributes
----------
embedding_ : array, shape = (n, n_components)
Embedding of the nodes.
embedding_row_ : array, shape = (n_row, n_components)
Embedding of the rows, for bipartite graphs.
embedding_col_ : array, shape = (n_col, n_components)
Embedding of the columns, for bipartite graphs.
eigenvalues_ : array, shape = (n_components)
Eigenvalues.
eigenvectors_ : array, shape = (n, n_components)
Eigenvectors.

Example
-------
>>> from sknetwork.embedding import Spectral
>>> from sknetwork.data import karate_club
>>> spectral = Spectral(n_components=3)
>>> embedding.shape
(34, 3)

References
----------
Belkin, M. & Niyogi, P. (2003). Laplacian Eigenmaps for Dimensionality Reduction and Data Representation,
Neural computation.
"""
def __init__(self, n_components: int = 2, decomposition: str = 'rw', regularization: float = -1,
normalized: bool = True):
super(Spectral, self).__init__()

self.embedding_ = None
self.n_components = n_components
self.decomposition = decomposition
self.regularization = regularization
self.normalized = normalized
self.bipartite = None
self.regularized = None
self.eigenvalues_ = None
self.eigenvectors_ = None

[docs]    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Spectral':
"""Compute the graph embedding.

If the input matrix :math:B is not square (e.g., biadjacency matrix of a bipartite graph) or not symmetric

:math:A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}

and return the embedding for both rows and columns of the input matrix :math:B.

Parameters
----------
input_matrix :
force_bipartite : bool (default = False)
If True, force the input matrix to be considered as a biadjacency matrix.

Returns
-------
self: :class:Spectral
"""
# input
input_matrix = check_format(input_matrix)

# regularization
self.regularized = regularization > 0

# laplacian
normalized_laplacian = self.decomposition == 'rw'

# spectral decomposition
n_components = check_n_components(self.n_components, n - 2) + 1
solver = LanczosEig(which='SM')
solver.fit(matrix=laplacian, n_components=n_components)
index = np.argsort(solver.eigenvalues_)[1:]  # increasing order, skip first

eigenvalues = solver.eigenvalues_[index]
eigenvectors = solver.eigenvectors_[:, index]

if normalized_laplacian:
eigenvectors = laplacian.norm_diag.dot(eigenvectors)
eigenvalues = 1 - eigenvalues

# embedding
embedding = eigenvectors.copy()
if self.normalized:
embedding = normalize(embedding, p=2)

# output
self.embedding_ = embedding
self.eigenvalues_ = eigenvalues
self.eigenvectors_ = eigenvectors
if self.bipartite:
self._split_vars(input_matrix.shape)

return self

[docs]    def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray:
"""Predict the embedding of new nodes, when possible (otherwise return 0).

Each new node is defined by its adjacency row vector.

Parameters
----------
Array of shape (n_col,) (single vector) or (n_vectors, n_col)

Returns
-------
embedding_vectors : np.ndarray
Embedding of the nodes.

Example
-------
>>> from sknetwork.embedding import Spectral
>>> from sknetwork.data import karate_club
>>> spectral = Spectral(n_components=3)
>>> adjacency_vector = np.arange(34) < 5
3
"""
self._check_fitted()

# input
if self.bipartite:
n = len(self.embedding_col_)
else:
n = len(self.embedding_)

if self.bipartite:
eigenvectors = self.eigenvectors_
eigenvalues = self.eigenvalues_

# regularization
if self.regularized:
regularization = np.abs(self.regularization)
else:
regularization = 0

# prediction
embedding_vectors = normalizer.dot(eigenvectors)
normalized_laplacian = self.decomposition == 'rw'
if normalized_laplacian:
norm_vect = eigenvalues.copy()
norm_vect[norm_vect == 0] = 1
embedding_vectors /= norm_vect
else:
norm_matrix = sparse.csr_matrix(1 - np.outer(normalizer.norm_diag.data, eigenvalues))
norm_matrix.data = 1 / norm_matrix.data
embedding_vectors *= norm_matrix.toarray()

# normalization
if self.normalized:
embedding_vectors = normalize(embedding_vectors, p=2)

# shape
if len(embedding_vectors) == 1:
embedding_vectors = embedding_vectors.ravel()

return embedding_vectors