Source code for sknetwork.ranking.hits

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Oct 07 2019
@author: Nathan de Lara <nathan.delara@polytechnique.org>
"""

from typing import Union

import numpy as np
from scipy import sparse

from sknetwork.linalg import SVDSolver, LanczosSVD
from sknetwork.ranking.base import BaseRanking
from sknetwork.utils.check import check_format


[docs]class HITS(BaseRanking): """Hub and authority scores of each node. For bipartite graphs, the hub score is computed on rows and the authority score on columns. Parameters ---------- solver : ``'lanczos'`` (default, Lanczos algorithm) or :class:`SVDSolver` (custom solver) Which solver to use. Attributes ---------- scores_ : np.ndarray Hub score of each node. scores_row_ : np.ndarray Hub score of each row, for bipartite graphs. scores_col_ : np.ndarray Authority score of each column, for bipartite graphs. Example ------- >>> from sknetwork.ranking import HITS >>> from sknetwork.data import star_wars >>> hits = HITS() >>> biadjacency = star_wars() >>> scores = hits.fit_predict(biadjacency) >>> np.round(scores, 2) array([0.5 , 0.23, 0.69, 0.46]) References ---------- Kleinberg, J. M. (1999). Authoritative sources in a hyperlinked environment. Journal of the ACM, 46(5), 604-632. """ def __init__(self, solver: Union[str, SVDSolver] = 'lanczos'): super(HITS, self).__init__() if type(solver) == str: self.solver: SVDSolver = LanczosSVD() else: self.solver = solver
[docs] def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'HITS': """Compute HITS algorithm with a spectral method. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. Returns ------- self: :class:`HITS` """ adjacency = check_format(adjacency) self.solver.fit(adjacency, 1) hubs: np.ndarray = self.solver.singular_vectors_left_.reshape(-1) authorities: np.ndarray = self.solver.singular_vectors_right_.reshape(-1) h_pos, h_neg = (hubs > 0).sum(), (hubs < 0).sum() a_pos, a_neg = (authorities > 0).sum(), (authorities < 0).sum() if h_pos > h_neg: hubs = np.clip(hubs, a_min=0., a_max=None) else: hubs = np.clip(-hubs, a_min=0., a_max=None) if a_pos > a_neg: authorities = np.clip(authorities, a_min=0., a_max=None) else: authorities = np.clip(-authorities, a_min=0., a_max=None) self.scores_row_ = hubs self.scores_col_ = authorities self.scores_ = hubs return self