From a54c465c183551b9b20a705e450bae757945feac Mon Sep 17 00:00:00 2001 From: Nicholas Landry Date: Sat, 7 Sep 2024 16:21:38 -0400 Subject: [PATCH] try stuff --- tests/algorithms/test_centrality.py | 4 - xgi/algorithms/centrality.py | 142 +++++++++++++++++--- xgi/utils/__init__.py | 3 +- xgi/utils/tensor.py | 201 ++++++++++++++++++++++++++++ 4 files changed, 326 insertions(+), 24 deletions(-) create mode 100644 xgi/utils/tensor.py diff --git a/tests/algorithms/test_centrality.py b/tests/algorithms/test_centrality.py index 80fecabe3..493a7448a 100644 --- a/tests/algorithms/test_centrality.py +++ b/tests/algorithms/test_centrality.py @@ -66,10 +66,6 @@ def test_h_eigenvector_centrality(): assert norm(c[1:] - c[1]) < 1e-4 assert abs(c[0] / c[1] - ratio(5, 7, kind="HEC")) < 1e-4 - with pytest.raises(XGIError): - H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) - H.nodes.h_eigenvector_centrality.asnumpy() - def test_node_edge_centrality(): # test empty hypergraph diff --git a/xgi/algorithms/centrality.py b/xgi/algorithms/centrality.py index 37c7a1dc4..5b26a012b 100644 --- a/xgi/algorithms/centrality.py +++ b/xgi/algorithms/centrality.py @@ -10,7 +10,7 @@ from ..convert import to_line_graph from ..exception import XGIError from ..linalg import clique_motif_matrix, incidence_matrix -from ..utils import convert_labels_to_integers +from ..utils import convert_labels_to_integers, pairwise_incidence, ttsv1, ttsv2 from .properties import is_uniform __all__ = [ @@ -65,8 +65,8 @@ def clique_eigenvector_centrality(H, tol=1e-6): return {node_dict[n]: v[n].item() for n in node_dict} -def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): - """Compute the H-eigenvector centrality of a uniform hypergraph. +def h_eigenvector_centrality(H, max_iter=100, tol=1e-6, fast=True): + """Compute the H-eigenvector centrality of a hypergraph. Parameters ---------- @@ -76,7 +76,7 @@ def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): The maximum number of iterations before the algorithm terminates. By default, 100. tol : float > 0, optional - The desired L2 error in the centrality vector. By default, 1e-6. + The desired convergence tolerance. By default, 1e-6. Returns ------- @@ -95,11 +95,16 @@ def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): References ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.48550/arXiv.2306.17825 + Three Hypergraph Eigenvector Centralities, Austin R. Benson, https://doi.org/10.1137/18M1203031 """ from ..algorithms import is_connected + from ..convert import to_hyperedge_dict # if there aren't any nodes, return an empty dict if H.num_nodes == 0: @@ -109,29 +114,128 @@ def h_eigenvector_centrality(H, max_iter=100, tol=1e-6): if not is_connected(H): return {n: np.nan for n in H.nodes} - m = is_uniform(H) - if not m: - raise XGIError("This method is not defined for non-uniform hypergraphs.") - new_H = convert_labels_to_integers(H, "old-label") - f = lambda v, m: np.power(v, 1.0 / m) # noqa: E731 - g = lambda v, x: np.prod(v[list(x)]) # noqa: E731 + r = H.edges.size.max() x = np.random.uniform(size=(new_H.num_nodes)) x = x / norm(x, 1) - for iter in range(max_iter): - new_x = apply(new_H, x, g) - new_x = f(new_x, m) - # multiply by the sign to try and enforce positivity - new_x = np.sign(new_x[0]) * new_x / norm(new_x, 1) - if norm(x - new_x) <= tol: - break - x = new_x.copy() + if fast: + edge_dict = to_hyperedge_dict(new_H) + node_dict = new_H.nodes.memberships() + y = np.abs(np.array(ttsv1(node_dict, edge_dict, r, x))) + else: + f = lambda v, r: np.power(v, 1.0 / r) # noqa: E731 + g = lambda v, x: np.prod(v[list(x)]) # noqa: E731 + + converged = False + it = 0 + while it < max_iter and not converged: + if fast: + y_scaled = [_y ** (1 / (r - 1)) for _y in y] + x = y_scaled / np.linalg.norm(y_scaled, 1) + y = np.abs(np.array(ttsv1(node_dict, edge_dict, r, x))) + s = [a / (b ** (r - 1)) for a, b in zip(y, x)] + converged = (np.max(s) - np.min(s)) / np.min(s) < tol + else: + new_x = apply(new_H, x, g) + new_x = f(new_x, r) + # multiply by the sign to try and enforce positivity + new_x = np.sign(new_x[0]) * new_x / norm(new_x, 1) + if norm(x - new_x) <= tol: + break + x = new_x.copy() + it += 1 else: warn("Iteration did not converge!") - return {new_H.nodes[n]["old-label"]: c for n, c in zip(new_H.nodes, new_x)} + return {new_H.nodes[n]["old-label"]: c for n, c in zip(new_H.nodes, x / norm(x, 1))} + + +def z_eigenvector_centrality(H, max_iter=100, tol=1e-6, fast=True): + """Compute the Z-eigenvector centrality of a hypergraph. + + Parameters + ---------- + H : Hypergraph + The hypergraph of interest. + max_iter : int, optional + The maximum number of iterations before the algorithm terminates. + By default, 100. + tol : float > 0, optional + The desired convergence tolerance. By default, 1e-6. + + Returns + ------- + dict + Centrality, where keys are node IDs and values are centralities. The + centralities are 1-normalized. + + Raises + ------ + XGIError + If the hypergraph is not uniform. + + See Also + -------- + clique_eigenvector_centrality + + References + ---------- + Scalable Tensor Methods for Nonuniform Hypergraphs, + Sinan Aksoy, Ilya Amburg, Stephen Young, + https://doi.org/10.48550/arXiv.2306.17825 + + Three Hypergraph Eigenvector Centralities, + Austin R. Benson, + https://doi.org/10.1137/18M1203031 + """ + from ..algorithms import is_connected + from ..convert import to_hyperedge_dict + + # if there aren't any nodes, return an empty dict + n = H.num_nodes + if n == 0: + return dict() + # if the hypergraph is not connected, + # this metric doesn't make sense and should return nan. + if not is_connected(H): + return {n: np.nan for n in H.nodes} + + new_H = convert_labels_to_integers(H, "old-label") + edge_dict = to_hyperedge_dict(new_H) + r = H.edges.size.max() + pairs_dict = pairwise_incidence(edge_dict, r) + x = np.ones(n) / n + + if fast: + + def LR_evec(A): + """Compute the largest real eigenvalue of the matrix A""" + _, v = eigsh(A, k=1, which="LM", tol=1e-5, maxiter=200) + evec = np.array([_v for _v in v[:, 0]]) + if evec[0] < 0: + evec = -evec + return evec / np.linalg.norm(evec, 1) + + def f(u): + return LR_evec(ttsv2(pairs_dict, edge_dict, r, u, n)) - u + + h = 0.5 + converged = False + it = 0 + while it < max_iter and not converged: + print(f"{iter} of {max_iter-1}", flush=True) + x_new = x + h * f(x) + s = np.array([a / b for a, b in zip(x_new, x)]) + converged = (np.max(s) - np.min(s)) / np.min(s) < tol + x = x_new + else: + warn("Iteration did not converge!") + return { + new_H.nodes[n]["old-label"]: c + for n, c in zip(new_H.nodes, x / np.linalg.norm(x, 1)) + } def apply(H, x, g=lambda v, e: np.sum(v[list(e)])): diff --git a/xgi/utils/__init__.py b/xgi/utils/__init__.py index 8c3aad65a..c81d022f8 100644 --- a/xgi/utils/__init__.py +++ b/xgi/utils/__init__.py @@ -1,2 +1,3 @@ -from . import utilities +from . import tensor, utilities +from .tensor import * from .utilities import * diff --git a/xgi/utils/tensor.py b/xgi/utils/tensor.py new file mode 100644 index 000000000..3e125c2ff --- /dev/null +++ b/xgi/utils/tensor.py @@ -0,0 +1,201 @@ +## Tensor times same vector in all but one (TTSV1) and all but two (TTSV2) +from math import factorial + +import numpy as np +from numpy import prod +from scipy.signal import convolve +from scipy.sparse import coo_array +from scipy.special import binom as binomial + + +__all__ = [ + "pairwise_incidence", + "ttsv1", + "ttsv2", +] + + +def pairwise_incidence(H, r): + """Create pairwise adjacency dictionary from hyperedge list dictionary + + Parameters + ---------- + H : xgi.Hypergraph + The hypergraph of interest + r : int + maximum hyperedge size + + Returns + ------- + E : dict + a dictionary with node pairs as keys and the hyperedges they appear in as values + """ + E = {} + for e, edge in H.items(): + l = len(edge) + for i in range(0, l - 1): + for j in range(i + 1, l): + if (edge[i], edge[j]) not in E: + E[(edge[i], edge[j])] = [e] + else: + E[(edge[i], edge[j])].append(e) + if l < r: + for node in edge: + if (node, node) not in E: + E[(node, node)] = [e] + else: + E[(node, node)].append(e) + return E + + +def COEF(l, r): + """Return the Banerjee alpha coefficient + + Parameters + ---------- + l : int + size of given hyperedge + r : int + maximum hyperedge size + + Returns + ------- + float + the Banerjee coefficient + """ + return sum(((-1) ** j) * binomial(l, j) * (l - j) ** r for j in range(l + 1)) + + +def get_gen_coef_subset_expansion(edge_values, node_value, r): + k = len(edge_values) + subset_vector = [0] + subset_lengths = [0] + for i in range(k): + for t in range(len(subset_vector)): + subset_vector.append(subset_vector[t] + edge_values[i]) + subset_lengths.append(subset_lengths[t] + 1) + for i in range(len(subset_lengths)): + subset_lengths[i] = (-1) ** (k - subset_lengths[i]) + # subset_lengths[i] = -1 if (k - subset_lengths[i]) % 2 == 1 else 1 + total = sum( + [ + (node_value + subset_vector[i]) ** r * subset_lengths[i] + for i in range(len(subset_lengths)) + ] + ) + return total / factorial(r) + + +def get_gen_coef_fft(edge_without_node, a, node, l, r): + coefs = np.array([(a[node] ** i) / factorial(i) for i in range(r)]) + for u in edge_without_node: + _coefs = np.array([a[u] ** i / factorial(i) for i in range(r - l + 2)]) + _coefs[0] = 0 + coefs = convolve(coefs, _coefs)[0:r] + gen_fun_coef = coefs[-1] + return gen_fun_coef + + +def get_gen_coef_fft_fast_array(edge_without_node, a, node, l, r): + coefs = [1] + for i in range(1, r): + coefs.append(coefs[-1] * a[node] / i) + coefs = np.array(coefs) + for u in edge_without_node: + _coefs = [1] + for i in range(1, r - l + 2): + _coefs.append(_coefs[-1] * a[u] / i) + _coefs = np.array(_coefs) + _coefs[0] = 0 + coefs = convolve(coefs, _coefs)[0:r] + gen_fun_coef = coefs[-1] + return gen_fun_coef + + +def ttsv1(E, H, r, a): + n = len(E) + s = np.zeros(n) + r_minus_1_factorial = factorial(r - 1) + for node, edges in E.items(): + c = 0 + for e in edges: + l = len(H[e]) + alpha = COEF(l, r) + edge_without_node = [v for v in H[e] if v != node] + if l == r: + gen_fun_coef = prod(a[edge_without_node]) + elif 2 ** (l - 1) < r * (l - 1): + gen_fun_coef = get_gen_coef_subset_expansion( + a[edge_without_node], a[node], r - 1 + ) + else: + gen_fun_coef = get_gen_coef_fft_fast_array( + edge_without_node, a, node, l, r + ) + c += r_minus_1_factorial * l * gen_fun_coef / alpha + s[node] = c + return s + + +def ttsv2(E, H, r, a, n): + s = {} + r_minus_2_factorial = factorial(r - 2) + for nodes, edges in E.items(): + v1 = nodes[0] + v2 = nodes[1] + c = 0 + for e in edges: + l = len(H[e]) + alpha = COEF(l, r) + edge_without_node = [v for v in H[e] if v != v1 and v != v2] + if v1 != v2: + if 2 ** (l - 2) < (r - 2) * (l - 2): + gen_fun_coef = get_gen_coef_subset_expansion( + a[edge_without_node], a[v1] + a[v2], r - 2 + ) + else: + coefs = [1] + for i in range(1, r - 1): + coefs.append(coefs[-1] * (a[v1] + a[v2]) / i) + coefs = np.array(coefs) + for u in H[e]: + if u != v1 and u != v2: + _coefs = [1] + for i in range(1, r - l + 2): + _coefs.append(_coefs[-1] * a[u] / i) + _coefs = np.array(_coefs) + _coefs[0] = 0 + coefs = convolve(coefs, _coefs)[0 : r - 1] + gen_fun_coef = coefs[-1] + else: + if 2 ** (l - 1) < (r - 2) * (l - 1): + gen_fun_coef = get_gen_coef_subset_expansion( + a[edge_without_node], a[v1], r - 2 + ) + else: + coefs = [1] + for i in range(1, r - 1): + coefs.append(coefs[-1] * (a[v1]) / i) + coefs = np.array(coefs) + for u in H[e]: + if u != v1 and u != v2: + _coefs = [1] + for i in range(1, r - l + 1): + _coefs.append(_coefs[-1] * a[v1] / i) + _coefs = np.array(_coefs) + _coefs[0] = 0 + coefs = convolve(coefs, _coefs)[0 : r - 1] + gen_fun_coef = coefs[-1] + c += r_minus_2_factorial * l * gen_fun_coef / alpha + s[nodes] = c + if v1 == v2: + s[nodes] /= 2 + first = np.zeros(len(s)) + second = np.zeros(len(s)) + value = np.zeros(len(s)) + for i, k in enumerate(s.keys()): + first[i] = k[0] + second[i] = k[1] + value[i] = s[k] + Y = coo_array((value, (first, second)), (n, n)) + return Y + Y.T