Source code for sknetwork.topology.structure

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created in July 2019
@author: Nathan de Lara <nathan.delara@polytechnique.org>
@author: Quentin Lutz <qlutz@enst.fr>
@author: Thomas Bonald <tbonald@enst.fr>
"""
from typing import Tuple, Optional, Union, List

import numpy as np
from scipy import sparse

from sknetwork.utils.check import is_symmetric, check_format
from sknetwork.utils.format import get_adjacency
from sknetwork.path import get_distances


[docs]def get_connected_components(input_matrix: sparse.csr_matrix, connection: str = 'weak', force_bipartite: bool = False) \
        -> np.ndarray:
    """Extract the connected components of a graph.

    Parameters
    ----------
    input_matrix :
        Input matrix (either the adjacency matrix or the biadjacency matrix of the graph).
    connection :
        Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs.
    force_bipartite : bool
        If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.

    Returns
    -------
    labels :
        Connected component of each node.
        For bipartite graphs, rows and columns are concatenated (rows first).

    Example
    -------
    >>> from sknetwork.topology import get_connected_components
    >>> from sknetwork.data import house
    >>> get_connected_components(house())
    array([0, 0, 0, 0, 0], dtype=int32)
    """
    input_matrix = check_format(input_matrix)
    if len(input_matrix.data) == 0:
        raise ValueError('The graph is empty (no edge).')
    adjacency, _ = get_adjacency(input_matrix, force_bipartite=force_bipartite)
    labels = sparse.csgraph.connected_components(adjacency, connection=connection, return_labels=True)[1]
    return labels


[docs]def is_connected(input_matrix: sparse.csr_matrix, connection: str = 'weak', force_bipartite: bool = False) -> bool:
    """Check whether the graph is connected.

    Parameters
    ----------
    input_matrix :
        Input matrix (either the adjacency matrix or the biadjacency matrix of the graph).
    connection :
        Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs.
    force_bipartite : bool
        If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.

    Example
    -------
    >>> from sknetwork.topology import is_connected
    >>> from sknetwork.data import house
    >>> is_connected(house())
    True
    """
    return len(set(get_connected_components(input_matrix, connection, force_bipartite))) == 1


[docs]def get_largest_connected_component(input_matrix: sparse.csr_matrix, connection: str = "weak",
                                    force_bipartite: bool = False, return_index: bool = False) \
        -> Union[sparse.csr_matrix, Tuple[sparse.csr_matrix, np.ndarray]]:
    """Extract the largest connected component of a graph. Bipartite graphs are treated as undirected.

    Parameters
    ----------
    input_matrix :
        Adjacency matrix or biadjacency matrix of the graph.
    connection :
        Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs.
    force_bipartite : bool
        If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.
    return_index : bool
        Whether to return the index of the nodes of the largest connected component in the original graph.

    Returns
    -------
    output_matrix : sparse.csr_matrix
        Adjacency matrix or biadjacency matrix of the largest connected component.
    index : array
        Indices of the nodes in the original graph.
        For bipartite graphs, rows and columns are concatenated (rows first).

    Example
    -------
    >>> from sknetwork.topology import get_largest_connected_component
    >>> from sknetwork.data import house
    >>> get_largest_connected_component(house()).shape
    (5, 5)
    """
    input_matrix = check_format(input_matrix)
    adjacency, bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite)
    labels = get_connected_components(adjacency, connection=connection)
    unique_labels, counts = np.unique(labels, return_counts=True)
    largest_component_label = unique_labels[np.argmax(counts)]

    if bipartite:
        n_row, n_col = input_matrix.shape
        index_row = np.argwhere(labels[:n_row] == largest_component_label).ravel()
        index_col = np.argwhere(labels[n_row:] == largest_component_label).ravel()
        index = np.hstack((index_row, index_col))
        output_matrix = input_matrix[index_row, :]
        output_matrix = (output_matrix.tocsc()[:, index_col]).tocsr()
    else:
        index = np.argwhere(labels == largest_component_label).ravel()
        output_matrix = input_matrix[index, :]
        output_matrix = (output_matrix.tocsc()[:, index]).tocsr()
    if return_index:
        return output_matrix, index
    else:
        return output_matrix


[docs]def is_bipartite(adjacency: sparse.csr_matrix, return_biadjacency: bool = False) \
        -> Union[bool, Tuple[bool, Optional[sparse.csr_matrix], Optional[np.ndarray], Optional[np.ndarray]]]:
    """Check whether a graph is bipartite.

    Parameters
    ----------
    adjacency :
       Adjacency matrix of the graph (symmetric).
    return_biadjacency :
        If ``True``, return a biadjacency matrix of the graph if bipartite.

    Returns
    -------
    is_bipartite : bool
        A boolean denoting if the graph is bipartite.
    biadjacency : sparse.csr_matrix
        A biadjacency matrix of the graph if bipartite (optional).
    rows : np.ndarray
        Index of rows in the original graph (optional).
    cols : np.ndarray
        Index of columns in the original graph (optional).

    Example
    -------
    >>> from sknetwork.topology import is_bipartite
    >>> from sknetwork.data import cyclic_graph
    >>> is_bipartite(cyclic_graph(4))
    True
    >>> is_bipartite(cyclic_graph(3))
    False
    """
    if not is_symmetric(adjacency):
        raise ValueError('The graph must be undirected.')
    if adjacency.diagonal().any():
        if return_biadjacency:
            return False, None, None, None
        else:
            return False
    n = adjacency.indptr.shape[0] - 1
    coloring = np.full(n, -1, dtype=int)
    exists_remaining = n
    while exists_remaining:
        src = np.argwhere(coloring == -1)[0, 0]
        next_nodes = [src]
        coloring[src] = 0
        exists_remaining -= 1
        while next_nodes:
            node = next_nodes.pop()
            for neighbor in adjacency.indices[adjacency.indptr[node]:adjacency.indptr[node + 1]]:
                if coloring[neighbor] == -1:
                    coloring[neighbor] = 1 - coloring[node]
                    next_nodes.append(neighbor)
                    exists_remaining -= 1
                elif coloring[neighbor] == coloring[node]:
                    if return_biadjacency:
                        return False, None, None, None
                    else:
                        return False
    if return_biadjacency:
        rows = np.argwhere(coloring == 0).ravel()
        cols = np.argwhere(coloring == 1).ravel()
        return True, adjacency[rows, :][:, cols], rows, cols
    else:
        return True