Source code for sknetwork.embedding.spectral

#!/usr/bin/env python3
# coding: utf-8
"""
Created on September 2018
@author: Nathan de Lara <nathan.delara@polytechnique.org>
@author: Thomas Bonald <bonald@enst.fr>
"""
from typing import Union

import numpy as np
from scipy import sparse

from sknetwork.embedding.base import BaseEmbedding
from sknetwork.linalg import LanczosEig, Laplacian, Normalizer, normalize
from sknetwork.utils.format import get_adjacency
from sknetwork.utils.check import check_format, check_adjacency_vector, check_nonnegative, check_n_components


[docs]class Spectral(BaseEmbedding): """Spectral embedding of graphs, based the spectral decomposition of the Laplacian matrix :math:`L = D - A` or the transition matrix of the random walk :math:`P = D^{-1}A` (default), where :math:`D` is the diagonal matrix of degrees. Eigenvectors are considered in increasing order (for the Laplacian matrix :math:`L`) or decreasing order (for the transition matrix of the random walk :math:`P`) of eigenvalues, skipping the first. Parameters ---------- n_components : int (default = ``2``) Dimension of the embedding space. decomposition : str (``laplacian`` or ``rw``, default = ``rw``) Matrix used for the spectral decomposition. regularization : float (default = ``-1``) Regularization factor :math:`\\alpha` so that the adjacency matrix is :math:`A + \\alpha \\frac{11^T}{n}`. If negative, regularization is applied only if the graph is disconnected; the regularization factor :math:`\\alpha` is then set to the absolute value of the parameter. normalized : bool (default = ``True``) If ``True``, normalized the embedding so that each vector has norm 1 in the embedding space, i.e., each vector lies on the unit sphere. Attributes ---------- embedding_ : array, shape = (n, n_components) Embedding of the nodes. embedding_row_ : array, shape = (n_row, n_components) Embedding of the rows, for bipartite graphs. embedding_col_ : array, shape = (n_col, n_components) Embedding of the columns, for bipartite graphs. eigenvalues_ : array, shape = (n_components) Eigenvalues. eigenvectors_ : array, shape = (n, n_components) Eigenvectors. Example ------- >>> from sknetwork.embedding import Spectral >>> from sknetwork.data import karate_club >>> spectral = Spectral(n_components=3) >>> adjacency = karate_club() >>> embedding = spectral.fit_transform(adjacency) >>> embedding.shape (34, 3) References ---------- Belkin, M. & Niyogi, P. (2003). Laplacian Eigenmaps for Dimensionality Reduction and Data Representation, Neural computation. """ def __init__(self, n_components: int = 2, decomposition: str = 'rw', regularization: float = -1, normalized: bool = True): super(Spectral, self).__init__() self.embedding_ = None self.n_components = n_components self.decomposition = decomposition self.regularization = regularization self.normalized = normalized self.bipartite = None self.regularized = None self.eigenvalues_ = None self.eigenvectors_ = None
[docs] def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Spectral': """Compute the graph embedding. If the input matrix :math:`B` is not square (e.g., biadjacency matrix of a bipartite graph) or not symmetric (e.g., adjacency matrix of a directed graph), use the adjacency matrix :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}` and return the embedding for both rows and columns of the input matrix :math:`B`. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : bool (default = ``False``) If ``True``, force the input matrix to be considered as a biadjacency matrix. Returns ------- self: :class:`Spectral` """ # input input_matrix = check_format(input_matrix) adjacency, self.bipartite = get_adjacency(input_matrix, allow_directed=False, force_bipartite=force_bipartite) n = adjacency.shape[0] # regularization regularization = self._get_regularization(self.regularization, adjacency) self.regularized = regularization > 0 # laplacian normalized_laplacian = self.decomposition == 'rw' laplacian = Laplacian(adjacency, regularization, normalized_laplacian) # spectral decomposition n_components = check_n_components(self.n_components, n - 2) + 1 solver = LanczosEig(which='SM') solver.fit(matrix=laplacian, n_components=n_components) index = np.argsort(solver.eigenvalues_)[1:] # increasing order, skip first eigenvalues = solver.eigenvalues_[index] eigenvectors = solver.eigenvectors_[:, index] if normalized_laplacian: eigenvectors = laplacian.norm_diag.dot(eigenvectors) eigenvalues = 1 - eigenvalues # embedding embedding = eigenvectors.copy() if self.normalized: embedding = normalize(embedding, p=2) # output self.embedding_ = embedding self.eigenvalues_ = eigenvalues self.eigenvectors_ = eigenvectors if self.bipartite: self._split_vars(input_matrix.shape) return self
[docs] def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new nodes, when possible (otherwise return 0). Each new node is defined by its adjacency row vector. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. Example ------- >>> from sknetwork.embedding import Spectral >>> from sknetwork.data import karate_club >>> spectral = Spectral(n_components=3) >>> adjacency = karate_club() >>> adjacency_vector = np.arange(34) < 5 >>> _ = spectral.fit(adjacency) >>> len(spectral.predict(adjacency_vector)) 3 """ self._check_fitted() # input if self.bipartite: n = len(self.embedding_col_) else: n = len(self.embedding_) adjacency_vectors = check_adjacency_vector(adjacency_vectors, n) check_nonnegative(adjacency_vectors) if self.bipartite: shape = (adjacency_vectors.shape[0], self.embedding_row_.shape[0]) adjacency_vectors = sparse.csr_matrix(adjacency_vectors) adjacency_vectors = sparse.hstack([sparse.csr_matrix(shape), adjacency_vectors], format='csr') eigenvectors = self.eigenvectors_ eigenvalues = self.eigenvalues_ # regularization if self.regularized: regularization = np.abs(self.regularization) else: regularization = 0 normalizer = Normalizer(adjacency_vectors, regularization) # prediction embedding_vectors = normalizer.dot(eigenvectors) normalized_laplacian = self.decomposition == 'rw' if normalized_laplacian: norm_vect = eigenvalues.copy() norm_vect[norm_vect == 0] = 1 embedding_vectors /= norm_vect else: norm_matrix = sparse.csr_matrix(1 - np.outer(normalizer.norm_diag.data, eigenvalues)) norm_matrix.data = 1 / norm_matrix.data embedding_vectors *= norm_matrix.toarray() # normalization if self.normalized: embedding_vectors = normalize(embedding_vectors, p=2) # shape if len(embedding_vectors) == 1: embedding_vectors = embedding_vectors.ravel() return embedding_vectors