diff --git a/src/resp_ode/utils.py b/src/resp_ode/utils.py index 73ca141..7d2900c 100644 --- a/src/resp_ode/utils.py +++ b/src/resp_ode/utils.py @@ -2,7 +2,6 @@ import datetime import glob -import json import os import sys @@ -785,7 +784,7 @@ def find_vax_bin(vax_shots: int, max_doses: int) -> int: return min(vax_shots, max_doses) -def convert_hist(strains: str, STRAIN_IDX: IntEnum, num_strains: int) -> int: +def convert_hist(strains: str, STRAIN_IDX: IntEnum) -> int: """ a function that transforms a comma separated list of strains and transform them into an immune history state. Any unrecognized strain strings inside of `strains` do not contiribute to the returned state. @@ -1133,722 +1132,6 @@ def load_age_demographics( return demographic_data -def prep_serology_data( - path, num_historical_strains, historical_time_breakpoints -): - """ - DEPRECATED: USE ABM INFORMED INITIALIZATION ROUTINES - - reads serology data from path, filters to only USA site, - filters Date Ranges from Sep 2020 - Feb 2022, - calculates monotonically increasing rates of change (to combat sero-reversion from the assay), - and converts string dates to datetime.dt.date objects. Then interpolates all time spans into individual days. - - TODO: change method of combatting sero-reversion to one outlined here: - https://www.nature.com/articles/s41467-023-37944-5 - - Parameters - ---------- - path: str - relative path to serology data sourced from - https://data.cdc.gov/Laboratory-Surveillance/Nationwide-Commercial-Laboratory-Seroprevalence-Su/d2tw-32xv - - num_historical_strains: int - the number of historical strains to be used in the "strain_select" column of the output. - most recent strain will always be placed as value num_historical_strains - 1. While oldest at 0. - - historical_time_breakpoints: list[datetime.date] - list of datetime.date breakpoints on which an older strain transitions to a newer one. - for example, omicron took off on (2021, 11, 19), meaning anything before that date is delta, on or after is omicron. - - - Returns - ---------- - serology table containing the following additional columns: - `collection_start` = assay collection start date \n - `collection_end` = assay collection end date \n - `age0_age1_diff` = difference in `Rate (%) [Anti-N, age1-age2 Years Prevalence]` from current and previous collection. - enforced to be positive or 0 to combat sero-reversion. Columns repeats for age bins [0-17, 18-49, 50-64, 65+] \n - `strain_select` = the strain index value for sero conversion on that day. As decided by historical_time_breakpoints and - the `num_historical_strains` - - Modifies - ---------- - `Rate (%) [Anti-N, age1-age2 Years Prevalence, Rounds 1-30 only]` columns to enforce monotonicity. - """ - serology = pd.read_csv(path) - # filter down to USA and pick a date after omicron surge to load serology from. - serology = serology[serology["Site"] == "US"] - dates_of_interest = pd.read_csv("data/dates_of_interest.csv")[ - "date_name" - ].values - # pick date ranges from the dates of interest list - serology = serology[ - [ - date in dates_of_interest - for date in serology["Date Range of Specimen Collection"] - ] - ] - # focus on anti-n sero prevalence in all age groups - columns_of_interest = [ - "Date Range of Specimen Collection", - "Rate (%) [Anti-N, 0-17 Years Prevalence]", - "Rate (%) [Anti-N, 18-49 Years Prevalence, Rounds 1-30 only]", - "Rate (%) [Anti-N, 50-64 Years Prevalence, Rounds 1-30 only]", - "Rate (%) [Anti-N, 65+ Years Prevalence, Rounds 1-30 only]", - ] - serology = serology[columns_of_interest] - # enforce monotonicity to combat sero-reversion in early pandemic serology assays - # start at index 1 in columns of interest to avoid date column - # TODO https://www.nature.com/articles/s41467-023-37944-5 use this method for combating sero-reversion - for diff_column in columns_of_interest[1:]: - for idx in range(1, len(serology[diff_column])): - serology[diff_column].iloc[idx] = max( - serology[diff_column].iloc[idx - 1], - serology[diff_column].iloc[idx], - ) - serology[diff_column] = serology[diff_column] / 100.0 - # lets create datetime objects out of collection range - years = [ - x.split(",")[-1] for x in serology["Date Range of Specimen Collection"] - ] - serology["collection_start"] = pd.to_datetime( - [ - # edge case Date = Dec 27, 2021 - Jan 29, 2022 need years - ( - date.split("-")[0].strip() + "," + year - if len(date.split(",")) == 2 - else date.split("-")[0].strip() - ) - for date, year in zip( - serology["Date Range of Specimen Collection"], years - ) - ], - format="%b %d, %Y", - ) - - serology["collection_end"] = pd.to_datetime( - [ - x.split("-")[1].strip() - for x in serology["Date Range of Specimen Collection"] - ], - format="%b %d, %Y", - ) - - # transform from datetime to date obj - serology["collection_start"] = serology["collection_start"].dt.date - serology["collection_end"] = serology["collection_end"].dt.date - # pick the date between collection start and end as the point estimate for date of collection - serology["collection_date"] = [ - start + ((end - start) / 2) - for start, end in zip( - serology["collection_start"], serology["collection_end"] - ) - ] - # after we interpolate down to daily precision, rebin into waning compartments - serology.index = pd.to_datetime(serology["collection_date"]) - serology = serology[columns_of_interest[1:]] # filter to only int cols - # possible reimplementation of variable waning compartment bin width - # will probably need to return to [::-x] slicing with a variable x or something. - serology = ( - serology.resample( - "1d" - ).interpolate() # downsample to daily freq # linear interpolate between days - # .resample( - # str(waning_time) + "d", origin="end" - # ) # resample to waning compart width - # .max() - ) - strain_select = ( - num_historical_strains - 1 - ) # initialize as most recent strain - - strain_select_array = [ - strain_select - - sum( - [ - date < pd.Timestamp(historical_breakpoint) - for historical_breakpoint in historical_time_breakpoints - ] - ) - for date in serology.index - ] - - serology["strain_select"] = strain_select_array - # we will use the absolute change in % serology prevalence to initialize wane compartments - serology["0_17_diff"] = serology[ - "Rate (%) [Anti-N, 0-17 Years Prevalence]" - ].diff() - serology["18_49_diff"] = serology[ - "Rate (%) [Anti-N, 18-49 Years Prevalence, Rounds 1-30 only]" - ].diff() - serology["50_64_diff"] = serology[ - "Rate (%) [Anti-N, 50-64 Years Prevalence, Rounds 1-30 only]" - ].diff() - serology["65_diff"] = serology[ - "Rate (%) [Anti-N, 65+ Years Prevalence, Rounds 1-30 only]" - ].diff() - - return serology - - -def prep_abm_data( - abm_population: pd.DataFrame, - max_vax_count: int, - age_limits: list[int], - waning_times: list[int], - num_strains: int, - STRAIN_IDXs: IntEnum, -) -> pd.DataFrame: - """ - A helper function called by past_immune_dist_from_abm() that takes as input a path to some abm data with schema specified by the README, - and applies transformations to the table, adding some columns so individuals within the ABM data are able to be placed - in the correct partial immunity bins. This includes vaccination, age binning, waning bins, and conversion of strain exposure history - into an immune history. - - Parameters - ---------- - abm_population: pd.Dataframe - ABM data input with schema specified by project README. - max_vax_count: int - the number of doses maximum before all subsequent doses are no longer counted. ex: 2 -> 0, 1, 2+ doses (3 bins) - age_limits: list(int) - The age limits of your model that you wish to initialize compartments of. - Example: for bins of 0-17, 18-49, 50-64, 65+ age_limits = [0, 18, 50, 65] - waning_times: list(int) - Time in days it takes for a person to wane from a waning compartment to the next level of protection. - len(waning_times) == num_waning_compartments, ending in 0. - num_strains: int - number of distinct strains in your model, used to inform the `state` column in output - STRAIN_IDX: intEnum - an enum containing the name of each strain and its associated strain index, as initialized by ConfigBase. - - Returns - ---------- - A pandas dataframe read in from abm_path with 4 added columns: vax_bin, age_bin, waning_compartment_bin, and state. - The first 3 are simple transformations made to bin a domain according to the parameters of a model. - While the last converts a list of strain exposures into a integer state representing immune history. - """ - # replace N/A values with empty string so that convert_state() works correctly. - abm_population["strains"] = abm_population["strains"].fillna("") - abm_population["vax_bin"] = abm_population["num_doses"].apply( - lambda x: find_vax_bin(x, max_vax_count) - ) - abm_population["age_bin"] = abm_population["age"].apply( - lambda x: find_age_bin(x, age_limits) - ) - abm_population["waning_compartment_bin"] = abm_population["TSLIE"].apply( - lambda x: find_waning_compartment(x, waning_times) - ) - abm_population["state"] = abm_population["strains"].apply( - lambda x: convert_hist(x, STRAIN_IDXs, num_strains) - ) - return abm_population - - -def set_serology_timeline(num_strains, num_historical_strains): - """ - DEPRECATED: USE ABM INFORMED INITIALIZATION ROUTINES - - a helper method which does the logic of setting historical strain breakpoint dates. - - Takes the number of strains serology data will be used to initialize, and collapses certain strains together - if needed. Returning the number of strains which are counted individually (after collapse). - This value may be different than num_strains if num_strains > 3, as only 3 historical timelines are supported. - - Parameters - ---------- - num_strains: int - total number of strains in the model - num_historical_strains: int - number of strains serology data is supposed to initialize for - - Returns - ----------- - The number of historical strains to be loaded as an int. - an array of datetime.dates representing the breakpoints between each historical date. - - Example - ---------- - if you wish to initialize omicron, delta, and alpha strains. Num strains must be set to 3 or higher - will return (3, [datetime.date(2021, 6, 25), datetime.date(2021, 11, 19)]) - with each date representing the date at which alpha -> delta and then delta -> omicron - - """ - # breakpoints for each historical strain, oldest first, alpha - delta, delta - omicron - omicron_date = datetime.date(2021, 11, 19) # as omicron took off - delta_date = datetime.date(2021, 6, 25) # as the delta wave took off. - historical_time_breakpoints = [delta_date, omicron_date] - # small modifications needed so this does not break 2 and 1 strain models - if num_historical_strains == 1: - # no breakpoints when only 1 historical strain - historical_time_breakpoints = [] - elif num_historical_strains == 2: - # if we are only looking at 2 historical strains, only take most recent breakpoint - historical_time_breakpoints = [historical_time_breakpoints[-1]] - assert ( - num_historical_strains <= num_strains - ), "you are attempting to find sero data for more historical strains than total strains alloted to the model" - - assert ( - num_historical_strains == len(historical_time_breakpoints) + 1 - ), "set breakpoints for each of the historical strains you want to initialize with sero data" - return num_historical_strains, historical_time_breakpoints - - -def imply_immune_history_dist_from_strains( - strain_exposure_dist, - num_strains, - num_historical_strains, - repeat_inf_rate=0.5, -): - """ - DEPRECATED: USE ABM INFORMED INITIALIZATION ROUTINES - - takes a matrix of shape (age, strain, waning) and converts it to - (age, immune_hist, waning). It does this by assuming the following: - Any individuals who are infected by a single strain, - half of those individuals will be re-infected by all incoming future strains. - Immune hist is a integer state representing a history of all past infections. - - Parameters - ---------- - strain_exposure_dist: np.array - a numpy array of proportions of persons exposed to a variety of strains. - stratified by age, strain, and waning compartment. - num_strains: int - number of strains for which sero data is being loaded - repeat_inf_rate: float - the rate at which those infected by one strain are re-infected by a strain in the future. - - Returns - ---------- - immune_history_dist: np.array - a numpy array representing proportions of the population in each immune state as informed by the - strain_exposure_dist. Waning compartments and age structure are preserved. Strain dimension is - modified to represent immune history, predicting multiple infections and more complex immune states. - """ - return_shape = ( - strain_exposure_dist.shape[0], - 2**num_strains, - 3, # TODO remove this and all MAGIC 0s after adding vax - strain_exposure_dist.shape[2], - ) # immune states equal to 2^num_strains - immune_history_dist = np.zeros(return_shape) - immune_states = [] - for strain in range(0, num_historical_strains): - # fill in single strain immune state first. no repeated exposures yet. - single_strain_state = new_immune_state(0, strain) - # TODO remove 0 - immune_history_dist[ - :, single_strain_state, 0, : - ] = strain_exposure_dist[:, strain, :] - # now grab individuals from previous states and infect 1/2 of them with this strain - multi_strain_states = [] - for prev_state in immune_states: - multi_strain_state = new_immune_state( - prev_state, - strain, - ) - multi_strain_states.append(multi_strain_state) - age_summed = np.sum(strain_exposure_dist[:, strain, :], axis=0) - waning_compartments_with_strain = np.where(age_summed > 0) - # TODO remove 0s - # following for loop assumes reinfection of previous states with the incoming strain. - # will pull `repeat_inf_rate`% people from all previous waning compartments before the current - for waning_compartment in waning_compartments_with_strain[::-1]: - immune_history_dist[ - :, multi_strain_state, 0, waning_compartment - ] += np.sum( - repeat_inf_rate - * immune_history_dist[ - :, prev_state, 0, waning_compartment + 1 : - ], # waning_compartment + 1 selects prev waning compartments - axis=2, - ) - # TODO remove 0s - immune_history_dist[:, prev_state, 0, :] -= ( - repeat_inf_rate - * immune_history_dist[ - :, prev_state, 0, waning_compartment + 1 : - ] - ) - immune_states.append(single_strain_state) - immune_states = immune_states + multi_strain_states - # now that we have taken all the strain stratified ages and waning compartments - # place the fully susceptible people into [:, 0, 0]. - partial_immunity_proportion = np.sum(immune_history_dist, axis=(1, 2, 3)) - fully_susceptible_by_age = 1 - partial_immunity_proportion - # TODO remove 0 - immune_history_dist[:, 0, 0, 0] = fully_susceptible_by_age - return immune_history_dist - - -def past_immune_dist_from_serology_demographics( - sero_path, - age_path, - age_limits, - waning_times, - num_waning_compartments, - max_vaccine_count, - num_strains, - num_historical_strains, - initialization_date=datetime.date(2022, 2, 12), -): - """ - DEPRECATED: USE ABM INFORMED INITIALIZATION ROUTINES - - initializes and returns the immune history for a model based on __covid__ serological data. - - Parameters - ---------- - sero_path: str - relative or absolute path to serological data from which to initialize compartments - age_path: str - relative or absolute path to demographic data folder for age distributions - age_limits: list(int) - The age limits of your model that you wish to initialize compartments of. - Example: for bins of 0-17, 18-49, 50-64, 65+ age_limits = [0, 18, 50, 65] - waning_times: list(int) - Time in days it takes for a person to wane from a waning compartment to the next level of protection. - len(waning_times) == num_waning_compartments, ending in 0. - num_waning_compartments: int - number of waning compartments in your model that you wish to initialize. - max_vaccination_count: int - maximum number of vaccinations you want to actively keep track of. - example val 2: keep track of 0, 1, 2+ shots. - num_strains: int - number of strains in your model that you wish to initialize. - Note: people will be distributed across 3 strains if num_strains >= 3 - The 3 strains account for omicron, delta, and alpha waves. - The total number of cells used to represent immune history of all strains = 2^num_strains - if num_strains < 3, will collapse earlier strains into one another. - - Returns - ---------- - immune_history_dist: np.array - the proportions of the total population for each age bin stratified by immune history (natural and vaccine). - immune history consists of previous infection history as well as number of vaccinations. - The more recent of infection vs vaccination decides the waning compartment of that individual. - """ - # we will need population data for weighted averages - age_distributions = np.loadtxt( - age_path + "United_States_country_level_age_distribution_85.csv", - delimiter=",", - dtype=np.float64, - skiprows=0, - ) - # serology data only comes in these age bins, exclusive, min age 0 - serology_age_limits = [18, 50, 65] - ( - num_historical_strains, - historical_time_breakpoints, - ) = set_serology_timeline(num_strains, num_historical_strains) - # prep the sero data into daily resolution, pass historical breakpoints to mark the strain - # that each day of sero contributes to. - serology = prep_serology_data( - sero_path, num_historical_strains, historical_time_breakpoints - ) - # age_to_diff_dict will be used to average age bins when our datas age bins collide with serology datas - # for example hypothetical 10-20 age bin, needs to be weighted average of 0-17 and 18-49 age bins based on population - age_to_sero_dict = {} - age_groups = generate_yearly_age_bins_from_limits(age_limits) - - # return these after filling it with the proprtion of individuals - # exposed to each strain of the total population - strain_exposure_distribution = np.zeros( - (len(age_limits), num_strains, num_waning_compartments) - ) - # begin at the initialization date, move back from there - prev_waning_compartment_date = initialization_date - # for each waning index fill in its (age x strain) matrix based on weighted sero data for that age bin - for waning_index, waning_time in zip( - range(0, num_waning_compartments), waning_times - ): - # go back `waning_time` days at a time and use our diff columns to populate recoved/waning - # initialization_date is the date our chosen serology begins, based on post-omicron peak. - waning_compartment_date = prev_waning_compartment_date - ( - datetime.timedelta(days=waning_time) - ) - # if the waning time for this compartment is zero, we never wane out of this compartment - # select one day back, remember time slices are inclusive on BOTH sides! - if waning_compartment_date == prev_waning_compartment_date: - select = serology.loc[ - waning_compartment_date - - datetime.timedelta(days=1) : prev_waning_compartment_date - - datetime.timedelta(days=1) - ] - else: - # grab a time range for construction of the waning compartment - select = serology.loc[ - waning_compartment_date : prev_waning_compartment_date - - datetime.timedelta(days=1) - ] - assert ( - len(select) > 0 - ), "serology data does not exist for this waning date " + str( - waning_compartment_date - ) - # we have now selected the information for current waning compartment, set the pointer here for next loop - prev_waning_compartment_date = waning_compartment_date - # `select` is now an array spaning from the beginning of the current compartment, up until the begining of the previous one. - # however, this compartment can span multiple strains, depending on its size, do calculations for each strain! - for strain_select in select["strain_select"].unique(): - select_strained = select[select["strain_select"] == strain_select] - - # fill our age_to_sero_dict so each age maps to its sero change we just selected - # if we are in the last waning compartment, use sero-prevalence at that date instead - # effectively combining all persons with previous infection on or before that date together - for age in range(85): - if age < serology_age_limits[0]: - age_to_sero_dict[age] = ( - sum(select_strained["0_17_diff"]) - if waning_index < num_waning_compartments - 1 - else max( - select_strained[ - "Rate (%) [Anti-N, 0-17 Years Prevalence]" - ] - ) - ) - elif age < serology_age_limits[1]: - age_to_sero_dict[age] = ( - sum(select_strained["18_49_diff"]) - if waning_index < num_waning_compartments - 1 - else max( - select[ - "Rate (%) [Anti-N, 18-49 Years Prevalence, Rounds 1-30 only]" - ] - ) - ) - elif age < serology_age_limits[2]: - age_to_sero_dict[age] = ( - sum(select_strained["50_64_diff"]) - if waning_index < num_waning_compartments - 1 - else max( - select[ - "Rate (%) [Anti-N, 50-64 Years Prevalence, Rounds 1-30 only]" - ] - ) - ) - else: - age_to_sero_dict[age] = ( - sum(select_strained["65_diff"]) - if waning_index < num_waning_compartments - 1 - else max( - select[ - "Rate (%) [Anti-N, 65+ Years Prevalence, Rounds 1-30 only]" - ] - ) - ) - # finally, sum over age groups, weighting sero by the population of each age. - for age_group_idx, age_group in enumerate(age_groups): - serology_age_group = [ - age_to_sero_dict[age] for age in age_group - ] - population_age_group = [ - age_distributions[age][1] for age in age_group - ] - serology_weighted = np.average( - serology_age_group, weights=population_age_group - ) - # add to a waning compartment - strain_exposure_distribution[ - age_group_idx, strain_select, waning_index - ] = serology_weighted - # we now have the timing of when each proportion of the population was exposed to each strain - # lets make some assumptions about repeat infections to produce immune history. - immune_history_dist = imply_immune_history_dist_from_strains( - strain_exposure_distribution, num_strains, num_historical_strains - ) - # TODO add vaccinations here too. - - return immune_history_dist - - -def past_immune_dist_from_abm( - abm_path: str, - num_age_groups: int, - age_limits: list[int], - max_vaccination_count: int, - waning_times: list[int], - num_waning_compartments: int, - num_strains: int, - STRAIN_IDXs: IntEnum, -) -> np.ndarray: - """ - A function used to initialize susceptible and partially susceptible distributions for a model via ABM (agent based model) data. - Given a path to an ABM state as CSV (schema for this data specified in README), read in dataframe, bin individuals according - to model parameters (age/wane/vax binning), and place individuals into strata. - Finally normalize by age group such that proportions within a single bin sum to 1. - - Parameters - ---------- - abm_path: str - path to the abm input data, stored as a csv. - num_age_groups: int - number of age bins in the model being initialized. - age_limits: list(int) - The age limits of your model that you wish to initialize compartments of. - Example: for bins of 0-17, 18-49, 50-64, 65+ age_limits = [0, 18, 50, 65] - max_vaccination_count: int - the number of doses maximum before all subsequent doses are no longer counted. ex: 2 -> 0, 1, 2+ doses (3 bins) - waning_times: list(int) - Time in days it takes for a person to wane from a waning compartment to the next level of protection. - len(waning_times) == num_waning_compartments, ending in 0. - num_waning_compartments: int - The number of waning bins in the model being initialized. - num_strains: int - number of distinct strains in your model, used to inform the `state` column in output - STRAIN_IDX: intEnum - an enum containing the name of each strain and its associated strain index, as initialized by ConfigBase. - - - Returns: - A numpy matrix stratified by age bin, immune history, vaccine bin, and waning bin. Where proportions within an single age bin sum to 1. - Representing the distributions of people within that age bin who belong to each strata of immune history, vaccination, and waning. - """ - num_immune_hist = 2**num_strains - abm_population = pd.read_csv(abm_path) - # remove those with active infections, those are designated for exposed/infected - abm_population = abm_population[abm_population["TSLIE"] >= 0] - abm_population = prep_abm_data( - abm_population, - max_vaccination_count, - age_limits, - waning_times, - num_strains, - STRAIN_IDXs, - ) - immune_hist = np.zeros( - ( - num_age_groups, - num_immune_hist, - max_vaccination_count + 1, - num_waning_compartments, - ) - ) - # get the number of people who fall in each age_bin, state, vax_bin, and waning_bin combination - stratas, counts = np.unique( - abm_population[ - ["age_bin", "state", "vax_bin", "waning_compartment_bin"] - ], - axis=0, - return_counts=True, - ) - # place people into their correct bins using the counts from above - for strata, count in zip(stratas, counts): - age_bin, state, vax_bin, waning_compartment_bin = strata - immune_hist[age_bin, state, vax_bin, waning_compartment_bin] += count - - pop_by_age_bin = np.sum(immune_hist, axis=(1, 2, 3)) - # normalize for each age bin, all individual age bins sum to 1. - immune_hist_normalized = ( - immune_hist / pop_by_age_bin[:, np.newaxis, np.newaxis, np.newaxis] - ) - return immune_hist_normalized - - -def init_infections_from_abm( - abm_path: str, - num_age_groups: int, - age_limits: list[int], - max_vaccination_count: int, - waning_times: list[int], - num_strains: int, - STRAIN_IDXs: IntEnum, -) -> tuple[np.ndarray, np.ndarray, np.ndarray, float]: - """ - A function that uses ABM state data to inform initial infections and distribute them across infected and exposed compartments - according to the ratio of exposed to infectious individuals found in the abm at model initialization date. - Returns proportions of new infections belonging to each strata, all attributed to STRAIN_IDX.omicron as that was the dominant - strain during the initialization date. - - Parameters - ---------- - abm_path: str - path to the abm input data, stored as a csv. - num_age_groups: int - number of age bins in the model being initialized. - age_limits: list(int) - The age limits of your model that you wish to initialize compartments of. - Example: for bins of 0-17, 18-49, 50-64, 65+ age_limits = [0, 18, 50, 65] - max_vaccination_count: int - the number of doses maximum before all subsequent doses are no longer counted. ex: 2 -> 0, 1, 2+ doses (3 bins) - waning_times: list(int) - Time in days it takes for a person to wane from a waning compartment to the next level of protection. - len(waning_times) == num_waning_compartments, ending in 0. - num_waning_compartments: int - The number of waning bins in the model being initialized. - num_strains: int - number of distinct strains in your model, used to inform the `state` column in output - STRAIN_IDX: intEnum - an enum containing the name of each strain and its associated strain index, as initialized by ConfigBase. - - Returns - ---------- - (infections, exposed, infected, proportion_infected) - - infections = exposed + infected - - proportion_infected = % of total pop infected or exposed. - - Each np.ndarray represents the proportions of each initial infection belonging to each strata, meaning sum(infections) == 1. - All numpy arrays stratified by age, immune history, vaccination, and infecting strain (always omicron). - """ - num_immune_hist = 2**num_strains - abm_population = pd.read_csv(abm_path) - # select for those with active infections, aka TSLIE < 0 - active_infections_abm = abm_population[abm_population["TSLIE"] < 0] - # since we are looking at active infections, the last element in the strains array will be the current infecting strain - # thus we separate it into its own column so it does not soil the immune history pre-infection of the individual - active_infections_abm["infecting_strain"] = active_infections_abm[ - "strains" - ].apply(lambda x: convert_strain(x.split(",")[-1], STRAIN_IDXs)) - - active_infections_abm["strains"] = active_infections_abm["strains"].apply( - lambda x: ",".join(x.split(",")[:-1]) - ) - active_infections_abm = prep_abm_data( - active_infections_abm, - max_vaccination_count, - age_limits, - waning_times, - num_strains, - STRAIN_IDXs, - ) - proportion_infected = len(active_infections_abm) / len(abm_population) - infections = np.zeros( - ( - num_age_groups, - num_immune_hist, - max_vaccination_count + 1, - num_strains, - ) - ) - stratas, counts = np.unique( - active_infections_abm[ - ["age_bin", "state", "vax_bin", "infecting_strain"] - ], - axis=0, - return_counts=True, - ) - for strata, count in zip(stratas, counts): - age_bin, state, vax_bin, infecting_strain = strata - infections[age_bin, state, vax_bin, infecting_strain] += count - - total_pop = np.sum(infections, axis=(0, 1, 2, 3)) - # normalize so all infections sum to 1, getting proportions of each strata - infections_normalized = infections / total_pop - # column called "infectious" == 1 if person is actively infectious, 0 if just exposed and not yet infectious - infected_to_exposed_ratio = sum(active_infections_abm["infectious"]) / len( - active_infections_abm - ) - exposed = infections_normalized * (1 - infected_to_exposed_ratio) - infected = infections_normalized * infected_to_exposed_ratio - - return infections_normalized, exposed, infected, proportion_infected - - # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ # Plotting CODE # @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @@ -2017,36 +1300,6 @@ def is_close(x): return is_close_v(np.sum(compartment, axis=dimensions_to_sum_over)), label -def from_json(j_str): - """ - Given a JSON string returned from BasicMechanisticModel.to_json() - """ - j = json.loads(j_str) - model_dict = {} - for key, param in j.items(): - # if we specify a special type as a dict, lets cast it to that type - if isinstance(param, dict) and "type" in param.keys(): - param_type = param["type"] - param_val = param["val"] - if param_type == "date": - param_val = datetime.datetime.strptime( - param_val, "%d-%m-%y" - ).date() - elif param_type == "jax": - param_val = jnp.array(param_val) - elif param_type == "enum": - enum_vals = [x.split(".")[-1] for x in param_val.keys()] - enum_name = [x.split(".")[0] for x in param_val.keys()][0] - param_val = IntEnum(enum_name, enum_vals, start=0) - elif param_type == "state": - param_val = tuple( - jnp.array(compartment["val"]) for compartment in param_val - ) - param = param_val - model_dict[key] = param - return model_dict - - def get_var_proportions(inferer, solution): """ Calculate _daily_ variant proportions based on a simulation run.