From 1938d04d9fb0fcbf80a816028b3770c7cb260b17 Mon Sep 17 00:00:00 2001 From: Nicholas Landry Date: Sat, 19 Oct 2024 16:53:36 -0400 Subject: [PATCH] added unit tests --- tests/algorithms/test_centrality.py | 160 ++++++++++++++++++++++------ xgi/algorithms/centrality.py | 15 ++- xgi/utils/tensor.py | 35 +++--- 3 files changed, 149 insertions(+), 61 deletions(-) diff --git a/tests/algorithms/test_centrality.py b/tests/algorithms/test_centrality.py index 80fecabe..95b82e07 100644 --- a/tests/algorithms/test_centrality.py +++ b/tests/algorithms/test_centrality.py @@ -28,12 +28,12 @@ def test_clique_eigenvector_centrality(): H = xgi.sunflower(3, 1, 3) c = H.nodes.clique_eigenvector_centrality.asnumpy() assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(3, 3, kind="CEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(3, 3, kind="CEC")) < 1e-4 H = xgi.sunflower(5, 1, 7) c = H.nodes.clique_eigenvector_centrality.asnumpy() assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(5, 7, kind="CEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(5, 7, kind="CEC")) < 1e-4 @pytest.mark.slow @@ -59,12 +59,12 @@ def test_h_eigenvector_centrality(): H = xgi.sunflower(3, 1, 5) c = H.nodes.h_eigenvector_centrality(max_iter=1000).asnumpy() assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(3, 5, kind="HEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(3, 5, kind="HEC")) < 1e-4 H = xgi.sunflower(5, 1, 7) c = H.nodes.h_eigenvector_centrality(max_iter=1000).asnumpy() assert norm(c[1:] - c[1]) < 1e-4 - assert abs(c[0] / c[1] - ratio(5, 7, kind="HEC")) < 1e-4 + assert abs(c[0] / c[1] - _ratio(5, 7, kind="HEC")) < 1e-4 with pytest.raises(XGIError): H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) @@ -128,36 +128,6 @@ def test_line_vector_centrality(): xgi.line_vector_centrality(H) -def ratio(r, m, kind="CEC"): - """Generate the ratio between largest and second largest centralities - for the sunflower hypergraph with one core node. - - Parameters - ---------- - r : int - Number of petals - m : int - Size of edges - kind : str, default: "CEC" - "CEC" or "HEC" - - Returns - ------- - float - Ratio - - References - ---------- - Three Hypergraph Eigenvector Centralities, - Austin R. Benson, - https://doi.org/10.1137/18M1203031 - """ - if kind == "CEC": - return 2 * r * (m - 1) / (np.sqrt(m**2 + 4 * (m - 1) * (r - 1)) + m - 2) - elif kind == "HEC": - return r ** (1.0 / m) - - def test_katz_centrality(edgelist1, edgelist8): # test hypergraph with no edge H = xgi.Hypergraph() @@ -195,3 +165,125 @@ def test_katz_centrality(edgelist1, edgelist8): } for n in c: assert np.allclose(c[n], expected_c[n]) + + +@pytest.mark.slow +def test_h_eigenvector_tensor_centrality(): + # test empty hypergraph + H = xgi.Hypergraph() + c = xgi.h_eigenvector_tensor_centrality(H) + assert c == dict() + + # Test no edges + H.add_nodes_from([0, 1, 2]) + hec = xgi.h_eigenvector_tensor_centrality(H) + for i in hec: + assert np.isnan(hec[i]) + + # test disconnected + H.add_edge([0, 1]) + hec = xgi.h_eigenvector_tensor_centrality(H) + assert set(hec) == {0, 1, 2} + for i in hec: + assert np.isnan(hec[i]) + + H = xgi.sunflower(3, 1, 5) + c = xgi.h_eigenvector_tensor_centrality(H, max_iter=1000) + assert ( + max([abs(c[0] / c[i + 1] - _ratio(3, 5, kind="HEC")) for i in range(12)]) < 1e-4 + ) + + H = xgi.sunflower(5, 1, 7) + print(H.num_nodes) + c = xgi.h_eigenvector_tensor_centrality(H, max_iter=1000) + assert ( + max([abs(c[0] / c[i + 1] - _ratio(5, 7, kind="HEC")) for i in range(29)]) < 1e-4 + ) + + H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) + c = xgi.h_eigenvector_tensor_centrality(H) + true_c = { + 1: 0.24458437592396465, + 2: 0.3014043407819482, + 3: 0.22700561916516002, + 4: 0.22700566412892714, + } + for i in c: + assert np.allclose(c[i], true_c[i]) + + +@pytest.mark.slow +def test_z_eigenvector_tensor_centrality(): + # test empty hypergraph + H = xgi.Hypergraph() + c = xgi.z_eigenvector_tensor_centrality(H) + assert c == dict() + + # Test no edges + H.add_nodes_from([0, 1, 2]) + hec = xgi.z_eigenvector_tensor_centrality(H) + for i in hec: + assert np.isnan(hec[i]) + + # test disconnected + H.add_edge([0, 1]) + hec = xgi.z_eigenvector_tensor_centrality(H) + assert set(hec) == {0, 1, 2} + for i in hec: + assert np.isnan(hec[i]) + + H = xgi.sunflower(3, 1, 5) + c = xgi.z_eigenvector_tensor_centrality(H, max_iter=1000) + assert ( + max([abs(c[0] / c[i + 1] - _ratio(3, 5, kind="ZEC")) for i in range(12)]) < 1e-4 + ) + + H = xgi.sunflower(5, 1, 7) + print(H.num_nodes) + c = xgi.z_eigenvector_tensor_centrality(H, max_iter=1000) + assert ( + max([abs(c[0] / c[i + 1] - _ratio(5, 7, kind="ZEC")) for i in range(29)]) < 1e-4 + ) + + H = xgi.Hypergraph([[1, 2], [2, 3, 4]]) + c = xgi.z_eigenvector_tensor_centrality(H, max_iter=10000) + true_c = { + 1: 0.45497398635982933, + 2: 0.45900452108663403, + 3: 0.04301074627676834, + 4: 0.04301074627676829, + } + for i in c: + assert np.allclose(c[i], true_c[i]) + + +def _ratio(r, m, kind="CEC"): + """Generate the _ratio between largest and second largest centralities + for the sunflower hypergraph with one core node. + + Parameters + ---------- + r : int + Number of petals + m : int + Size of edges + kind : str, default: "CEC" + "CEC" or "HEC" + + Returns + ------- + float + Ratio + + References + ---------- + Three Hypergraph Eigenvector Centralities, + Austin R. Benson, + https://doi.org/10.1137/18M1203031 + """ + if kind == "CEC": + return 2 * r * (m - 1) / (np.sqrt(m**2 + 4 * (m - 1) * (r - 1)) + m - 2) + elif kind == "HEC": + return r ** (1.0 / m) + elif kind == "ZEC": + return r**0.5 diff --git a/xgi/algorithms/centrality.py b/xgi/algorithms/centrality.py index d5c021f4..c19f4d2a 100644 --- a/xgi/algorithms/centrality.py +++ b/xgi/algorithms/centrality.py @@ -446,7 +446,6 @@ def h_eigenvector_tensor_centrality(H, max_iter=100, tol=1e-6): converged = False it = 0 while it < max_iter and not converged: - print(f"{it + 1} of {max_iter}", flush=True) y_scaled = [_y ** (1 / (r - 1)) for _y in y] x = y_scaled / norm(y_scaled, 1) y = np.abs(np.array(ttsv1(node_dict, edge_dict, r, x))) @@ -457,7 +456,10 @@ def h_eigenvector_tensor_centrality(H, max_iter=100, tol=1e-6): it += 1 else: warn("Iteration did not converge!") - return {new_H.nodes[n]["old-label"]: c for n, c in zip(new_H.nodes, x / norm(x, 1))} + return { + new_H.nodes[n]["old-label"]: c.item() + for n, c in zip(new_H.nodes, x / norm(x, 1)) + } def z_eigenvector_tensor_centrality(H, max_iter=100, tol=1e-6): @@ -509,7 +511,8 @@ def z_eigenvector_tensor_centrality(H, max_iter=100, tol=1e-6): return {n: np.nan for n in H.nodes} new_H = convert_labels_to_integers(H, "old-label") edge_dict = new_H.edges.members(dtype=dict) - pairs_dict = pairwise_incidence(edge_dict, r) + pairs_dict = pairwise_incidence(edge_dict) + r = H.edges.size.max() def LR_evec(A): @@ -529,7 +532,6 @@ def f(u): converged = False it = 0 while it < max_iter and not converged: - print(f"{it + 1} of {max_iter}", flush=True) x_new = x + h * f(x) s = np.array([a / b for a, b in zip(x_new, x)]) converged = (np.max(s) - np.min(s)) / np.min(s) < tol @@ -539,4 +541,7 @@ def f(u): it += 1 else: warn("Iteration did not converge!") - return {new_H.nodes[n]["old-label"]: c for n, c in zip(new_H.nodes, x / norm(x, 1))} + return { + new_H.nodes[n]["old-label"]: c.item() + for n, c in zip(new_H.nodes, x / norm(x, 1)) + } diff --git a/xgi/utils/tensor.py b/xgi/utils/tensor.py index 17246edd..aa0f9bbf 100644 --- a/xgi/utils/tensor.py +++ b/xgi/utils/tensor.py @@ -1,4 +1,6 @@ ## Tensor times same vector in all but one (TTSV1) and all but two (TTSV2) +from collections import defaultdict +from itertools import combinations from math import factorial import numpy as np @@ -14,37 +16,26 @@ ] -def pairwise_incidence(H, r): +def pairwise_incidence(edgedict): """Create pairwise adjacency dictionary from hyperedge list dictionary Parameters ---------- - H : xgi.Hypergraph - The hypergraph of interest - r : int - maximum hyperedge size + edgedict : dict + edge IDs are keys, edges are values Returns ------- - E : dict + pairs : dict a dictionary with node pairs as keys and the hyperedges they appear in as values """ - E = {} - for e, edge in H.items(): - l = len(edge) - for i in range(0, l - 1): - for j in range(i + 1, l): - if (edge[i], edge[j]) not in E: - E[(edge[i], edge[j])] = [e] - else: - E[(edge[i], edge[j])].append(e) - if l < r: - for node in edge: - if (node, node) not in E: - E[(node, node)] = [e] - else: - E[(node, node)].append(e) - return E + pairs = defaultdict(set) + for e, edge in edgedict.items(): + for i, j in combinations(sorted(edge), 2): + pairs[(i, j)].add(e) + for n in edge: + pairs[(n, n)].add(e) + return pairs def banerjee_coeff(l, r):