diff --git a/fitting_ipn.ipynb b/fitting_ipn.ipynb new file mode 100644 index 0000000..9850315 --- /dev/null +++ b/fitting_ipn.ipynb @@ -0,0 +1,155 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from lcs import *\n", + "import networkx as nx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "G = nx.karate_club_graph()\n", + "A = nx.adjacency_matrix(G, weight=None).todense()\n", + "n = A.shape[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "b0 = 0.5\n", + "a = 0.01\n", + "alpha = 1\n", + "max_iter = 100\n", + "tol = 1e-3\n", + "\n", + "cf1 = lambda nu, b: 1 - (1 - b) ** nu\n", + "cf2 = lambda nu, b: b * (nu >= 2)\n", + "\n", + "gamma = 0.1\n", + "b = 0.03\n", + "rho0 = 1\n", + "tmax = 1000\n", + "\n", + "realizations = 100\n", + "\n", + "x0 = np.zeros(n)\n", + "x0[list(random.sample(range(n), int(rho0 * n)))] = 1\n", + "c1 = cf1(np.arange(n), b)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# mean fitting\n", + "mode = \"mean\"\n", + "ipn_c1 = 0\n", + "for _ in range(realizations):\n", + " x = contagion_process(A, gamma, c1, x0, tmin=0, tmax=tmax)\n", + " ipn_c1 += infections_per_node(x, mode) / realizations\n", + "\n", + "f = lambda b: ipn_func(b, ipn_c1, cf2, gamma, A, rho0, 100, tmax, mode)\n", + "b1, bvec1, fvec1 = robbins_monro_solve(\n", + " f, b0, a, alpha, max_iter, tol, verbose=True, loss=\"function\", return_values=True\n", + ")\n", + "\n", + "# median fitting\n", + "mode = \"median\"\n", + "ipn_c1 = 0\n", + "for _ in range(realizations):\n", + " x = contagion_process(A, gamma, c1, x0, tmin=0, tmax=tmax)\n", + " ipn_c1 += infections_per_node(x, mode) / realizations\n", + "\n", + "f = lambda b: ipn_func(b, ipn_c1, cf2, gamma, A, rho0, 100, tmax, mode)\n", + "b2, bvec2, fvec2 = robbins_monro_solve(\n", + " f, b0, a, alpha, max_iter, tol, verbose=True, loss=\"function\", return_values=True\n", + ")\n", + "\n", + "# max fitting\n", + "mode = \"max\"\n", + "ipn_c1 = 0\n", + "for _ in range(realizations):\n", + " x = contagion_process(A, gamma, c1, x0, tmin=0, tmax=tmax)\n", + " ipn_c1 += infections_per_node(x, mode) / realizations\n", + "\n", + "f = lambda b: ipn_func(b, ipn_c1, cf2, gamma, A, rho0, 100, tmax, mode)\n", + "b3, bvec3, fvec3 = robbins_monro_solve(\n", + " f, b0, a, alpha, max_iter, tol, verbose=True, loss=\"function\", return_values=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.subplot(121)\n", + "plt.plot(bvec1, label=\"mean\")\n", + "plt.plot(bvec2, label=\"median\")\n", + "plt.plot(bvec3, label=\"max\")\n", + "plt.xlabel(\"iterations\")\n", + "plt.ylabel(\"fitted probability\")\n", + "\n", + "plt.subplot(122)\n", + "plt.plot(fvec1, label=\"mean\")\n", + "plt.plot(fvec2, label=\"median\")\n", + "plt.plot(fvec3, label=\"max\")\n", + "plt.legend()\n", + "plt.xlabel(\"iterations\")\n", + "plt.ylabel(\"diff. between expected IPNs\")\n", + "plt.tight_layout()\n", + "\n", + "plt.savefig(\"test.png\", dpi=1000)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hyper", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lcs/contagion.py b/lcs/contagion.py index 2dc8f81..0ab8c49 100644 --- a/lcs/contagion.py +++ b/lcs/contagion.py @@ -40,6 +40,66 @@ def contagion_process(A, gamma, c, x0, tmin=0, tmax=20, dt=1, random_seed=None): if not isinstance(A, ndarray): A = A.todense() + if A.dtype != "int": + A = np.array(A, dtype=int) + + if random_seed is not None: + np.random.seed(random_seed) + + n, m = np.shape(A) + if n != m: + raise Exception("Matrix must be square!") + + T = int((tmax - tmin) / dt) + x = np.zeros((T, n), dtype=int) + x[0] = x0.copy() + + for t in range(T - 1): + u = np.random.random(n) + nu = A @ x[t] + x[t + 1] = (1 - x[t]) * (u <= c[nu] * dt) + x[t] * (u > gamma * dt) + return x + + +def contagion_process_loop(A, gamma, c, x0, tmin=0, tmax=20, dt=1, random_seed=None): + """A neighborhood-based contagion process on pairwise networks + + Parameters + ---------- + A : Numpy ndarray + The adjacency matrix + gamma : float + The healing rate + c : Numpy ndarray + A 1d vector of the contagion rates. Should be N x 1. + x0 : Numpy ndarray + A 1D vector of the initial nodal states, either 0 or 1. + Should be N x 1. + tmin : int, optional + The time at which to start the simulation, by default 0 + tmax : float, optional + The time at which to terminate the simulation, by default 20 + dt : float, optional + The time step, by default 1 + random_seed : int, optional + The seed for the random process, by default None + + Returns + ------- + Numpy ndarray + The time series of the contagion process. Should be T x N. + + Raises + ------ + Exception + If adjacency matrix isn't square + """ + if not isinstance(A, ndarray): + A = A.todense() + + if A.dtype != "int": + A = np.array(A, dtype=int) + if random_seed is not None: random.seed(random_seed) @@ -48,7 +108,7 @@ def contagion_process(A, gamma, c, x0, tmin=0, tmax=20, dt=1, random_seed=None): raise Exception("Matrix must be square!") T = int((tmax - tmin) / dt) - x = np.zeros((T, n)) + x = np.zeros((T, n), dtype=int) x[0] = x0.copy() for t in range(T - 1): @@ -59,7 +119,7 @@ def contagion_process(A, gamma, c, x0, tmin=0, tmax=20, dt=1, random_seed=None): if x[t, i] == 1 and random.random() <= gamma * dt: x[t + 1, i] = 0 elif x[t, i] == 0: - infected_nbrs = int(np.round(A[i] @ x[t])) + infected_nbrs = A[i] @ x[t] if random.random() <= c[infected_nbrs] * dt: x[t + 1, i] = 1 return x diff --git a/lcs/inference.py b/lcs/inference.py index 356e16c..db9e733 100644 --- a/lcs/inference.py +++ b/lcs/inference.py @@ -57,9 +57,16 @@ def infer_adjacency_matrix( return_likelihood=False, ): # form initial adjacency matrix - if not isinstance(A0, ndarray): - A0 = A0.todense() - A = np.array(A0, dtype=int) + A = A0.copy() + if not isinstance(A, ndarray): + A = A.todense() + + if A.dtype != "int": + A = np.array(A, dtype=int) + + if x.dtype != "int": + x = np.array(x, dtype=int) + n, m = A.shape p_rho = _check_beta_parameters(p_rho, [2]) @@ -152,6 +159,15 @@ def infer_adjacency_matrix( def infer_dynamics(x, A, p_gamma=None, p_c=None, nsamples=1): # Our priors are drawn from a beta distribution such that # the posteriors are also from a beta distribution + if not isinstance(A, ndarray): + A = A.todense() + + if A.dtype != "int": + A = np.array(A, dtype=int) + + if x.dtype != "int": + x = np.array(x, dtype=int) + n = A.shape[0] p_gamma = _check_beta_parameters(p_gamma, [2]) @@ -182,7 +198,7 @@ def infer_dynamics(x, A, p_gamma=None, p_c=None, nsamples=1): def count_all_infection_events(x, A): n = x.shape[1] - nus = np.round(A @ x[:-1].T).astype(int) + nus = A @ x[:-1].T # 1 if node i was infected at time t, 0 otherwise was_infected = x[1:] * (1 - x[:-1]) @@ -198,7 +214,7 @@ def count_all_infection_events(x, A): def count_local_infection_events(i, x, A): n = x.shape[1] - nus_i = np.round(A[i] @ x[:-1].T).astype(int) + nus_i = A[i] @ x[:-1].T x_i = x[:, i] # select node i from all time steps # 1 if node i was infected at time t, 0 otherwise @@ -241,6 +257,50 @@ def update_adjacency_matrix(i, j, A): return -1 +def _count_mask(array, boolean_mask, axis, max_val): + """ + Count the occurrences of values in `array` that correspond to `True` values in `boolean_mask`, + along the specified axis `axis`. + + Parameters + ---------- + array : numpy.ndarray + The input array to count values from. + boolean_mask : numpy.ndarray + A boolean mask with the same shape as `array`, indicating which values to count. + axis : int + The axis along which to count values. + Returns + ------- + numpy.ndarray + An array of counts, with shape `(n,)` where `n` is the number of unique values in `array`. + """ + boolean_mask = boolean_mask.astype(int) + array = array.astype(int) + # assign all values that fail the boolean mask to n+1, + # these should get removed before returning result + masked_arr = np.where(boolean_mask, array.T, max_val + 1) + return np.apply_along_axis( + np.bincount, axis=axis, arr=masked_arr, minlength=max_val + 2 + ).T + + +def _check_beta_parameters(p, size): + if isinstance(p, (list, tuple)): + p = np.array(p) + + if p is None: + p = np.ones(size) + elif np.any(np.array(p) <= 0): + raise Exception("Parameters in a beta distribution must be greater than 0.") + + if p.shape != tuple(size): + raise Exception("Parameters are in the wrong shape.") + + return p + + +############## EXTRA ##################### def infer_dyamics_loop(x, A, p_gamma, p_c): T, n = x.shape @@ -280,7 +340,6 @@ def infer_dyamics_loop(x, A, p_gamma, p_c): return gamma, c -@jit(nopython=True) def count_all_infection_events_loop(x, A): """ Counts the number of infection events between all pairs of nodes in a network over time. @@ -297,20 +356,19 @@ def count_all_infection_events_loop(x, A): T = x.shape[0] n = x.shape[1] - nl = np.zeros((n, n)) + nl = np.zeros((n, n), dtype=int) ml = np.zeros((n, n)) for t in range(T - 1): nus = A.dot(x[t]) for i, nu in enumerate(nus): - nu = int(round(nu)) + nu = int(np.round(nu)) nl[i, nu] += x[t + 1, i] * (1 - x[t, i]) ml[i, nu] += (1 - x[t + 1, i]) * (1 - x[t, i]) return nl, ml -@jit(nopython=True) def count_local_infection_events_loop(i, x, A): T = x.shape[0] n = x.shape[1] @@ -319,51 +377,7 @@ def count_local_infection_events_loop(i, x, A): ml = np.zeros(n) for t in range(T - 1): - nu = int(round(A[i].dot(x[t]))) + nu = int(np.round(A[i].dot(x[t]))) nl[nu] += x[t + 1, i] * (1 - x[t, i]) ml[nu] += (1 - x[t + 1, i]) * (1 - x[t, i]) return nl, ml - - -def _count_mask(array, boolean_mask, my_axis, max_val): - """ - Count the occurrences of values in `array` that correspond to `True` values in `boolean_mask`, - along the specified axis `my_axis`. - - Parameters - ---------- - array : numpy.ndarray - The input array to count values from. - boolean_mask : numpy.ndarray - A boolean mask with the same shape as `array`, indicating which values to count. - my_axis : int - The axis along which to count values. - Returns - ------- - numpy.ndarray - An array of counts, with shape `(n,)` where `n` is the number of unique values in `array`. - """ - n = array.shape[0] - boolean_mask = boolean_mask.astype(int) - array = array.astype(int) - # assign all values that fail the boolean mask to n+1, - # these should get removed before returning result - masked_arr = np.where(boolean_mask, array.T, max_val + 1) - return np.apply_along_axis( - np.bincount, axis=my_axis, arr=masked_arr, minlength=max_val + 2 - ).T - - -def _check_beta_parameters(p, size): - if isinstance(p, (list, tuple)): - p = np.array(p) - - if p is None: - p = np.ones(size) - elif np.any(np.array(p) <= 0): - raise Exception("Parameters in a beta distribution must be greater than 0.") - - if p.shape != tuple(size): - raise Exception("Parameters are in the wrong shape.") - - return p diff --git a/lcs/utilities.py b/lcs/utilities.py index d63fad0..84a53a9 100644 --- a/lcs/utilities.py +++ b/lcs/utilities.py @@ -39,12 +39,15 @@ def hamming_distance(A1, A2): def infections_per_node(x, mode="mean"): - if mode == "mean": - return np.mean(np.sum(x[1:] - x[:-1] > 0, axis=0)) - if mode == "median": - return np.median(np.sum(x[1:] - x[:-1] > 0, axis=0)) - if mode == "max": - return np.max(np.sum(x[1:] - x[:-1] > 0, axis=0)) + match mode: + case "mean": + return np.mean(np.sum(x[1:] - x[:-1] > 0, axis=0)) + case "median": + return np.median(np.sum(x[1:] - x[:-1] > 0, axis=0)) + case "max": + return np.max(np.sum(x[1:] - x[:-1] > 0, axis=0)) + case _: + raise Exception("Invalid loss!") def nu_distribution(x, A): @@ -65,7 +68,7 @@ def degrees(A): return A.sum(axis=0) -def powerlaw(n, minval, maxval, r): +def power_law(n, minval, maxval, r): u = np.random.random(n) a = minval ** (1 - r) b = maxval ** (1 - r) @@ -87,46 +90,59 @@ def mean_power_law(minval, maxval, r): return num / den -def match_contagion_rates( - cf1, cf2, gamma, b, A, tmax, realizations=100, tol=0.01, max_iter=10, mode="mean" -): +def ipn_func(b, ipn_target, cf, gamma, A, rho0, realizations, tmax, mode): n = A.shape[0] - rho0 = 1 x0 = np.zeros(n) x0[list(random.sample(range(n), int(rho0 * n)))] = 1 - c1 = cf1(np.arange(n), b) + c = cf(np.arange(n), b) - ipn_c1 = 0 - for _ in range(realizations): - x = contagion_process(A, gamma, c1, x0, tmin=0, tmax=tmax) - ipn_c1 += infections_per_node(x, mode) / realizations - - blo = 0 - bhi = 1 - ipn_lo = 0 - ipn_hi = 0 - c2_hi = cf2(np.arange(n), bhi) + ipn = 0 for _ in range(realizations): - x = contagion_process(A, gamma, c2_hi, x0, tmin=0, tmax=tmax) - ipn_hi += infections_per_node(x, mode) / realizations - - it = 0 - bnew = (bhi - blo) / 2 - while it < max_iter and bhi - blo > tol: - c2_new = cf2(np.arange(n), bnew) - ipn_new = 0 - for _ in range(realizations): - x = contagion_process(A, gamma, c2_new, x0, tmin=0, tmax=tmax) - ipn_new += infections_per_node(x, mode) / realizations - - if ipn_new > ipn_c1: - bhi = bnew - elif ipn_new < ipn_c1: - blo = bnew - bnew = (bhi - blo) / 2 + x = contagion_process(A, gamma, c, x0, tmin=0, tmax=tmax) + ipn += infections_per_node(x, mode) / realizations + return ipn - ipn_target + + +def robbins_monro_solve( + f, + x0, + a, + alpha, + max_iter=100, + tol=1e-3, + loss="function", + verbose=False, + return_values=True, +): + x = x0 + val = f(x0) + + it = 1 + xvec = [x] + fvec = [val] + diff = np.inf + while diff > tol and it <= max_iter: + a_n = a * it**-alpha + x -= a_n * val + x = np.clip(x, 0, 1) + val = f(x) + xvec.append(x) # save results + fvec.append(val) + if it % 3 == 0: + match loss: + case "function": + diff = abs(x - xvec[it - 2]) + case "arg": + diff = abs(val) + case _: + raise Exception("Invalid loss type!") + + if verbose: + print(it, diff) it += 1 - print(blo, bhi, ipn_new, ipn_c1) - - return bnew, bhi - blo + if return_values: + return x, xvec, fvec + else: + return x diff --git a/setup.py b/setup.py index 2f6a700..1771bc3 100644 --- a/setup.py +++ b/setup.py @@ -5,8 +5,8 @@ __version__ = "0.0" -if sys.version_info < (3, 8): - sys.exit("lcs requires Python 3.8 or later.") +if sys.version_info < (3, 10): + sys.exit("lcs requires Python 3.10 or later.") name = "lcs" diff --git a/tests/test_inference.py b/tests/test_inference.py index 109476a..0429be5 100644 --- a/tests/test_inference.py +++ b/tests/test_inference.py @@ -16,14 +16,12 @@ def test_infer_adjacency_matrix(x4, A4): def test_count_all_infection_events(x4, A4): - A4 = np.array(A4, dtype=float) assert np.array_equal( count_all_infection_events(x4, A4), count_all_infection_events_loop(x4, A4) ) def test_count_local_infection_events(x4, A4): - A4 = np.array(A4, dtype=float) assert np.array_equal( count_local_infection_events(1, x4, A4), count_local_infection_events_loop(1, x4, A4), diff --git a/tests/test_utilities.py b/tests/test_utilities.py index a5e5a45..59de1d3 100644 --- a/tests/test_utilities.py +++ b/tests/test_utilities.py @@ -22,3 +22,12 @@ def test_hamming_distance(A1, A2, A3): def test_infections_per_node(x1): assert infections_per_node(x1) == 0.3 + assert infections_per_node(x1, "max") == 1 + assert infections_per_node(x1, "median") == 0 + + +def test_degrees(A1): + d1 = degrees(A1) + true_d1 = np.array([0, 2, 2, 1, 1]) + assert len(d1.shape) == 1 + assert np.all(d1 == true_d1)