diff --git a/.github/workflows/commit-ci.yml b/.github/workflows/commit-ci.yml index b8d81e51..24c685eb 100644 --- a/.github/workflows/commit-ci.yml +++ b/.github/workflows/commit-ci.yml @@ -27,7 +27,7 @@ jobs: - uses: mamba-org/setup-micromamba@v1 with: - micromamba-version: latest + micromamba-version: '1.5.10-0' environment-name: ${{ github.event.repository.name }}-ubuntu-latest-312-${{ hashFiles('requirements/dev.txt') }} environment-file: requirements/base.txt create-args: >- diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index c989b30c..cefeb865 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -45,7 +45,7 @@ jobs: - uses: mamba-org/setup-micromamba@v1 with: - micromamba-version: latest + micromamba-version: '1.5.10-0' environment-name: ${{ github.event.repository.name }}-${{ matrix.os }}-3${{ matrix.py3version }}-${{ hashFiles('requirements/dev.txt') }} environment-file: requirements/base.txt create-args: >- @@ -108,7 +108,7 @@ jobs: - uses: actions/checkout@v4 - uses: mamba-org/setup-micromamba@v1 with: - micromamba-version: latest + micromamba-version: '1.5.10-0' environment-file: .github/workflows/pr-ci-pipbuild-environment.yml post-cleanup: all cache-environment: true @@ -127,4 +127,4 @@ jobs: TEST_PYPI_API_TOKEN: ${{ secrets.TEST_PYPI_API_TOKEN }} with: package_name: calliope - version: ${{needs.pre-release-version.outputs.version}} \ No newline at end of file + version: ${{needs.pre-release-version.outputs.version}} diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d06ac4f..92163728 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,9 @@ |new| (non-NaN) Default values and data types for parameters appear in math documentation (if they appear in the model definition schema) (#677). +|changed| `data_sources` -> `data_tables` and `data_sources.source` -> `data_tables.data`. +This change has occurred to avoid confusion between data "sources" and model energy "sources" (#673). + ## 0.7.0.dev4 (2024-09-10) ### User-facing changes diff --git a/docs/creating/data_sources.md b/docs/creating/data_tables.md similarity index 90% rename from docs/creating/data_sources.md rename to docs/creating/data_tables.md index f8735ed7..e549be03 100644 --- a/docs/creating/data_sources.md +++ b/docs/creating/data_tables.md @@ -1,17 +1,17 @@ -# Loading tabular data (`data_sources`) +# Loading tabular data (`data_tables`) We have chosen YAML syntax to define Calliope models as it is human-readable. However, when you have a large dataset, the YAML files can become large and ultimately not as readable as we would like. For instance, for parameters that vary in time we would have a list of 8760 values and timestamps to put in our YAML file! -Therefore, alongside your YAML model definition, you can load tabular data from CSV files (or from in-memory [pandas.DataFrame][] objects) under the `data_sources` top-level key. +Therefore, alongside your YAML model definition, you can load tabular data from CSV files (or from in-memory [pandas.DataFrame][] objects) under the `data_tables` top-level key. As of Calliope v0.7.0, this tabular data can be of _any_ kind. Prior to this, loading from file was limited to timeseries data. -The full syntax from loading tabular data can be found in the associated [schema][data-source-schema]. +The full syntax from loading tabular data can be found in the associated [schema][data-table-schema]. In brief it is: -* **source**: path to file or reference name for an in-memory object. +* **data**: path to file or reference name for an in-memory object. * **rows**: the dimension(s) in your table defined per row. * **columns**: the dimension(s) in your table defined per column. * **select**: values within dimensions that you want to select from your tabular data, discarding the rest. @@ -126,9 +126,9 @@ In this section we will show some examples of loading data and provide the equiv YAML definition to load data: ```yaml - data_sources: + data_tables: pv_capacity_factor_data: - source: data_sources/pv_resource.csv + data: data_tables/pv_resource.csv rows: timesteps add_dims: techs: pv @@ -181,9 +181,9 @@ In this section we will show some examples of loading data and provide the equiv YAML definition to load data: ```yaml - data_sources: + data_tables: tech_data: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: [techs, parameters] ``` @@ -224,9 +224,9 @@ In this section we will show some examples of loading data and provide the equiv YAML definition to load data: ```yaml - data_sources: + data_tables: tech_data: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: [techs, parameters] add_dims: costs: monetary @@ -272,7 +272,7 @@ In this section we will show some examples of loading data and provide the equiv 1. To limit repetition, we have defined [templates](templates.md) for our costs. !!! info "See also" - Our [data source loading tutorial][loading-tabular-data] has more examples of loading tabular data into your model. + Our [data table loading tutorial][loading-tabular-data] has more examples of loading tabular data into your model. ## Selecting dimension values and dropping dimensions @@ -290,9 +290,9 @@ Data in file: YAML definition to load only data from nodes 1 and 2: ```yaml -data_sources: +data_tables: tech_data: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: [techs, parameters] columns: nodes select: @@ -312,9 +312,9 @@ You will also need to `drop` the dimension so that it doesn't appear in the fina YAML definition to load only data from scenario 1: ```yaml -data_sources: +data_tables: tech_data: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: [techs, parameters] columns: scenarios select: @@ -322,12 +322,12 @@ data_sources: drop: scenarios ``` -You can then also tweak just one line of your data source YAML with an [override](scenarios.md) to point to your other scenario: +You can then also tweak just one line of your data table YAML with an [override](scenarios.md) to point to your other scenario: ```yaml override: switch_to_scenario2: - data_sources.tech_data.select.scenarios: scenario2 # (1)! + data_tables.tech_data.select.scenarios: scenario2 # (1)! ``` 1. We use the dot notation as a shorthand for [abbreviate nested dictionaries](yaml.md#abbreviated-nesting). @@ -348,9 +348,9 @@ For example, to define costs for the parameter `cost_flow_cap`: | tech3 | monetary | cost_flow_cap | 20 | 45 | 50 | ```yaml - data_sources: + data_tables: tech_data: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: [techs, costs, parameters] columns: nodes ``` @@ -364,9 +364,9 @@ For example, to define costs for the parameter `cost_flow_cap`: | tech3 | 20 | 45 | 50 | ```yaml - data_sources: + data_tables: tech_data: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: techs columns: nodes add_dims: @@ -384,9 +384,9 @@ Or to define the same timeseries source data for two technologies at different n | 2005-01-01 01:00 | 200 | 200 | ```yaml - data_sources: + data_tables: tech_data: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: timesteps columns: [nodes, techs, parameters] ``` @@ -401,16 +401,16 @@ Or to define the same timeseries source data for two technologies at different n | 2005-01-01 01:00 | 200 | ```yaml - data_sources: + data_tables: tech_data_1: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: timesteps add_dims: techs: tech1 nodes: node1 parameters: source_use_max tech_data_2: - source: data_sources/tech_data.csv + data: data_tables/tech_data.csv rows: timesteps add_dims: techs: tech2 @@ -420,10 +420,10 @@ Or to define the same timeseries source data for two technologies at different n ## Loading CSV files vs `pandas` dataframes -To load from CSV, set the filepath in `source` to point to your file. +To load from CSV, set the filepath in `data` to point to your file. This filepath can either be relative to your `model.yaml` file (as in the above examples) or an absolute path. -To load from a [pandas.DataFrame][], you can specify the `data_source_dfs` dictionary of objects when you initialise your model: +To load from a [pandas.DataFrame][], you can specify the `data_table_dfs` dictionary of objects when you initialise your model: ```python import calliope @@ -433,19 +433,19 @@ df2 = pd.DataFrame(...) model = calliope.Model( "path/to/model.yaml", - data_source_dfs={"data_source_1": df1, "data_source_2": df2} + data_table_dfs={"data_source_1": df1, "data_source_2": df2} ) ``` -And then you point to those dictionary keys in the `source` for your data source: +And then you point to those dictionary keys in the `data` for your data table: ```yaml -data_sources: +data_tables: ds1: - source: data_source_1 + data: data_source_1 ... ds2: - source: data_source_2 + data: data_source_2 ... ``` @@ -454,7 +454,7 @@ data_sources: Rows correspond to your dataframe index levels and columns to your dataframe column levels. You _cannot_ specify [pandas.Series][] objects. - Ensure you convert them to dataframes (`to_frame()`) before adding them to your data source dictionary. + Ensure you convert them to dataframes (`to_frame()`) before adding them to your data table dictionary. ## Important considerations @@ -468,8 +468,8 @@ This could be defined in `rows`, `columns`, or `add_dims`. 3. `add_dims` to add dimensions. This means you can technically select value "A" from dimensions `nodes`, then drop `nodes`, then add `nodes` back in with the value "B". This effectively replaces "A" with "B" on that dimension. -3. The order of tabular data loading is in the order you list the sources. -If a new table has data which clashes with preceding data sources, it will override that data. +3. The order of tabular data loading is in the order you list the tables. +If a new table has data which clashes with preceding tables, it will override that data. This may have unexpected results if the files have different dimensions as the dimensions will be broadcast to match each other. 4. CSV files must have `.csv` in their filename (even if compressed, e.g., `.csv.zip`). If they don't, they won't be picked up by Calliope. @@ -481,7 +481,7 @@ E.g., nodes: node1.techs: {tech1, tech2, tech3} node2.techs: {tech1, tech2} - data_sources: + data_tables: ... ``` 6. We process dimension data after loading it in according to a limited set of heuristics: diff --git a/docs/creating/index.md b/docs/creating/index.md index c8af0587..35ddefb2 100644 --- a/docs/creating/index.md +++ b/docs/creating/index.md @@ -35,7 +35,7 @@ We distinguish between: - the model **definition** (your representation of a physical system in YAML). Model configuration is everything under the top-level YAML key [`config`](config.md). -Model definition is everything else, under the top-level YAML keys [`parameters`](parameters.md), [`techs`](techs.md), [`nodes`](nodes.md), [`templates`](templates.md), and [`data_sources`](data_sources.md). +Model definition is everything else, under the top-level YAML keys [`parameters`](parameters.md), [`techs`](techs.md), [`nodes`](nodes.md), [`templates`](templates.md), and [`data_tables`](data_tables.md). It is possible to define alternatives to the model configuration/definition that you can refer to when you initialise your model. These are defined under the top-level YAML keys [`scenarios` and `overrides`](scenarios.md). @@ -52,7 +52,7 @@ The layout of that directory typically looks roughly like this (`+` denotes dire + model_definition - nodes.yaml - techs.yaml - + data_sources + + data_tables - solar_resource.csv - electricity_demand.csv - model.yaml @@ -63,7 +63,7 @@ In the above example, the files `model.yaml`, `nodes.yaml` and `techs.yaml` toge This definition could be in one file, but it is more readable when split into multiple. We use the above layout in the example models. -Inside the `data_sources` directory, tabular data are stored as CSV files. +Inside the `data_tables` directory, tabular data are stored as CSV files. !!! note The easiest way to create a new model is to use the `calliope new` command, which makes a copy of one of the built-in examples models: @@ -85,4 +85,4 @@ The rest of this section discusses everything you need to know to set up a model - More details on the [model configuration](config.md). - The key parts of the model definition, first, the [technologies](techs.md), then, the [nodes](nodes.md), the locations in space where technologies can be placed. - How to use [technology and node templates](templates.md) to reduce repetition in the model definition. -- Other important features to be aware of when defining your model: defining [indexed parameters](parameters.md), i.e. parameter which are not indexed over technologies and nodes, [loading tabular data](data_sources.md), and defining [scenarios and overrides](scenarios.md). +- Other important features to be aware of when defining your model: defining [indexed parameters](parameters.md), i.e. parameter which are not indexed over technologies and nodes, [loading tabular data](data_tables.md), and defining [scenarios and overrides](scenarios.md). diff --git a/docs/examples/loading_tabular_data.py b/docs/examples/loading_tabular_data.py index edaaf819..35fe8398 100644 --- a/docs/examples/loading_tabular_data.py +++ b/docs/examples/loading_tabular_data.py @@ -178,7 +178,7 @@ # ## Defining data in the tabular CSV format # %% [markdown] -# We could have defined these same tables in CSV files and loaded them using `data-sources`. +# We could have defined these same tables in CSV files and loaded them using `data_tables`. # We don't yet have those CSV files ready, so we'll create them programmatically. # In practice, you would likely write these files using software like Excel. @@ -189,14 +189,14 @@ # Some are long and thin with all the dimensions grouped in each row (or the `index`), while others have dimensions grouped in the columns. # This is to show what is possible. # You might choose to always have long and thin data, or to always have certain dimensions in the rows and others in the columns. -# So long as you then define your data source correctly in the model definition, so that Calliope knows exactly how to process your data, it doesn't matter what shape it is stored in. +# So long as you then define your data table correctly in the model definition, so that Calliope knows exactly how to process your data, it doesn't matter what shape it is stored in. # %% [markdown] # First, we create a directory to hold the tabular data we are about to generate. # %% -data_source_path = Path(".") / "outputs" / "loading_tabular_data" -data_source_path.mkdir(parents=True, exist_ok=True) +data_table_path = Path(".") / "outputs" / "loading_tabular_data" +data_table_path.mkdir(parents=True, exist_ok=True) # %% [markdown] # Next we group together **technology data where no extra dimensions are needed**. @@ -220,7 +220,7 @@ }, } ) -tech_data.to_csv(data_source_path / "tech_data.csv") +tech_data.to_csv(data_table_path / "tech_data.csv") tech_data # %% [markdown] @@ -238,7 +238,7 @@ }, } ) -tech_timestep_data.to_csv(data_source_path / "tech_timestep_data.csv") +tech_timestep_data.to_csv(data_table_path / "tech_timestep_data.csv") tech_timestep_data # %% [markdown] @@ -258,7 +258,7 @@ ("transmission_tech", "carrier_out"): 1, } ) -tech_carrier_data.to_csv(data_source_path / "tech_carrier_data.csv") +tech_carrier_data.to_csv(data_table_path / "tech_carrier_data.csv") tech_carrier_data # %% [markdown] # And the **technology data with the `nodes` dimension**: @@ -266,7 +266,7 @@ tech_node_data = pd.Series( {("supply_tech", "B", "flow_cap_max"): 8, ("supply_tech", "A", "flow_cap_max"): 10} ) -tech_node_data.to_csv(data_source_path / "tech_node_data.csv") +tech_node_data.to_csv(data_table_path / "tech_node_data.csv") tech_node_data # %% [markdown] # Finally, we deal with the **technology data with the `costs` dimension**. @@ -281,40 +281,40 @@ "supply_tech": {"cost_flow_cap": 2}, } ) -tech_cost_data.to_csv(data_source_path / "tech_cost_data.csv") +tech_cost_data.to_csv(data_table_path / "tech_cost_data.csv") tech_cost_data # %% [markdown] -# Now our YAML model definition can simply link to each of the CSV files we created in the `data_sources`` section, instead of needing to define the data in YAML directly: +# Now our YAML model definition can simply link to each of the CSV files we created in the `data_tables` section, instead of needing to define the data in YAML directly: # # ```yaml -# data_sources: +# data_tables: # tech_data: -# source: outputs/loading_tabular_data/tech_data.csv +# data: outputs/loading_tabular_data/tech_data.csv # rows: parameters # columns: techs # tech_node_data: -# source: outputs/loading_tabular_data/tech_node_data.csv +# data: outputs/loading_tabular_data/tech_node_data.csv # rows: [techs, nodes, parameters] # tech_timestep_data: -# source: outputs/loading_tabular_data/tech_timestep_data.csv +# data: outputs/loading_tabular_data/tech_timestep_data.csv # rows: timesteps # columns: [techs, parameters] # tech_carrier_data: -# source: outputs/loading_tabular_data/tech_carrier_data.csv +# data: outputs/loading_tabular_data/tech_carrier_data.csv # rows: [techs, parameters] # add_dims: # carriers: electricity # tech_cost_data: -# source: outputs/loading_tabular_data/tech_cost_data.csv +# data: outputs/loading_tabular_data/tech_cost_data.csv # rows: parameters # columns: techs # add_dims: # costs: monetary # ``` # -# When loading data sources, assigning techs to nodes is done automatically to some extent. -# That is, if a tech is defined at a node in a data source (in this case, only for `supply_tech`), then Calliope assumes that this tech should be allowed to exist at the corresponding node. +# When loading data tables, assigning techs to nodes is done automatically to some extent. +# That is, if a tech is defined at a node in a data table (in this case, only for `supply_tech`), then Calliope assumes that this tech should be allowed to exist at the corresponding node. # Since it is easy to lose track of which parameters you've defined at nodes and which ones not, it is _much_ safer to explicitly define a list of technologies at each node in your YAML definition: # # ```yaml @@ -326,25 +326,25 @@ # %% model_def = calliope.AttrDict.from_yaml_string( """ -data_sources: +data_tables: tech_data: - source: outputs/loading_tabular_data/tech_data.csv + data: outputs/loading_tabular_data/tech_data.csv rows: parameters columns: techs tech_node_data: - source: outputs/loading_tabular_data/tech_node_data.csv + data: outputs/loading_tabular_data/tech_node_data.csv rows: [techs, nodes, parameters] tech_timestep_data: - source: outputs/loading_tabular_data/tech_timestep_data.csv + data: outputs/loading_tabular_data/tech_timestep_data.csv rows: timesteps columns: [techs, parameters] tech_carrier_data: - source: outputs/loading_tabular_data/tech_carrier_data.csv + data: outputs/loading_tabular_data/tech_carrier_data.csv rows: [techs, parameters] add_dims: carriers: electricity tech_cost_data: - source: outputs/loading_tabular_data/tech_cost_data.csv + data: outputs/loading_tabular_data/tech_cost_data.csv rows: parameters columns: techs add_dims: @@ -354,35 +354,35 @@ B.techs: {supply_tech, demand_tech} """ ) -model_from_data_sources = calliope.Model(model_def) +model_from_data_tables = calliope.Model(model_def) # %% [markdown] # ### Loading directly from in-memory dataframes # If you create your tabular data in an automated manner in a Python script, you may want to load it directly into Calliope rather than saving it to file first. -# You can do that by setting the data source as the name of a key in a dictionary that you supply when you load the model: +# You can do that by setting `data` as the name of a key in a dictionary that you supply when you load the model: # %% model_def = calliope.AttrDict.from_yaml_string( """ -data_sources: +data_tables: tech_data: - source: tech_data_df + data: tech_data_df rows: parameters columns: techs tech_node_data: - source: tech_node_data_df + data: tech_node_data_df rows: [techs, nodes, parameters] tech_timestep_data: - source: tech_timestep_data_df + data: tech_timestep_data_df rows: timesteps columns: [techs, parameters] tech_carrier_data: - source: tech_carrier_data_df + data: tech_carrier_data_df rows: [techs, parameters] add_dims: carriers: electricity tech_cost_data: - source: tech_cost_data_df + data: tech_cost_data_df rows: parameters columns: techs add_dims: @@ -392,9 +392,9 @@ B.techs: {supply_tech, demand_tech} """ ) -model_from_data_sources = calliope.Model( +model_from_data_tables = calliope.Model( model_def, - data_source_dfs={ + data_table_dfs={ "tech_data_df": tech_data, # NOTE: inputs must be dataframes. # pandas Series objects must therefore be converted: @@ -414,15 +414,15 @@ model_from_yaml.solve(force=True) # %% -model_from_data_sources.build(force=True) -model_from_data_sources.solve(force=True) +model_from_data_tables.build(force=True) +model_from_data_tables.solve(force=True) # %% [markdown] # **Input data**. Now we check if the input data are exactly the same across both models:" # %% for variable_name, variable_data in model_from_yaml.inputs.data_vars.items(): - if variable_data.broadcast_equals(model_from_data_sources.inputs[variable_name]): + if variable_data.broadcast_equals(model_from_data_tables.inputs[variable_name]): print(f"Great work, {variable_name} matches") else: print(f"!!! Something's wrong! {variable_name} doesn't match !!!") @@ -433,20 +433,20 @@ # %% for variable_name, variable_data in model_from_yaml.results.data_vars.items(): - if variable_data.broadcast_equals(model_from_data_sources.results[variable_name]): + if variable_data.broadcast_equals(model_from_data_tables.results[variable_name]): print(f"Great work, {variable_name} matches") else: print(f"!!! Something's wrong! {variable_name} doesn't match !!!") # %% [markdown] -# ## Mixing YAML and data source definitions +# ## Mixing YAML and data table definitions # It is possible to only put some data into CSV files and define the rest in YAML. # In fact, it almost always makes sense to build these hybrid definitions. For smaller models, you may only want to store timeseries data stored in CSV files and everything else in YAML: # # ```yaml -# data_sources: +# data_tables: # tech_timestep_data: -# source: outputs/loading_tabular_data/tech_timestep_data.csv +# data: outputs/loading_tabular_data/tech_timestep_data.csv # rows: timesteps # columns: [techs, parameters] # techs: @@ -499,13 +499,13 @@ # # # ```yaml -# data_sources: +# data_tables: # tech_timestep_data: -# source: outputs/loading_tabular_data/tech_timestep_data.csv +# data: outputs/loading_tabular_data/tech_timestep_data.csv # rows: timesteps # columns: [techs, parameters] # tech_cost_data: -# source: outputs/loading_tabular_data/tech_cost_data.csv +# data: outputs/loading_tabular_data/tech_cost_data.csv # rows: parameters # columns: techs # add_dims: @@ -548,31 +548,31 @@ # %% [markdown] # ## Overriding tabular data with YAML # -# Another reason to mix tabular data sources with YAML is to allow you to keep track of overrides to specific parts of the model definition. +# Another reason to mix tabular data with YAML is to allow you to keep track of overrides to specific parts of the model definition. # # For instance, we could change the number of a couple of parameters: # # # ```yaml -# data_sources: +# data_tables: # tech_data: -# source: outputs/loading_tabular_data/tech_data.csv +# data: outputs/loading_tabular_data/tech_data.csv # rows: parameters # columns: techs # tech_node_data: -# source: outputs/loading_tabular_data/tech_node_data.csv +# data: outputs/loading_tabular_data/tech_node_data.csv # rows: [techs, nodes, parameters] # tech_timestep_data: -# source: outputs/loading_tabular_data/tech_timestep_data.csv +# data: outputs/loading_tabular_data/tech_timestep_data.csv # rows: timesteps # columns: [techs, parameters] # tech_carrier_data: -# source: outputs/loading_tabular_data/tech_carrier_data.csv +# data: outputs/loading_tabular_data/tech_carrier_data.csv # rows: [techs, parameters] # add_dims: # carriers: electricity # tech_cost_data: -# source: outputs/loading_tabular_data/tech_cost_data.csv +# data: outputs/loading_tabular_data/tech_cost_data.csv # rows: parameters # columns: techs # add_dims: @@ -592,25 +592,25 @@ # %% model_def = calliope.AttrDict.from_yaml_string( """ -data_sources: +data_tables: tech_data: - source: outputs/loading_tabular_data/tech_data.csv + data: outputs/loading_tabular_data/tech_data.csv rows: parameters columns: techs tech_node_data: - source: outputs/loading_tabular_data/tech_node_data.csv + data: outputs/loading_tabular_data/tech_node_data.csv rows: [techs, nodes, parameters] tech_timestep_data: - source: outputs/loading_tabular_data/tech_timestep_data.csv + data: outputs/loading_tabular_data/tech_timestep_data.csv rows: timesteps columns: [techs, parameters] tech_carrier_data: - source: outputs/loading_tabular_data/tech_carrier_data.csv + data: outputs/loading_tabular_data/tech_carrier_data.csv rows: [techs, parameters] add_dims: carriers: electricity tech_cost_data: - source: outputs/loading_tabular_data/tech_cost_data.csv + data: outputs/loading_tabular_data/tech_cost_data.csv rows: parameters columns: techs add_dims: @@ -626,39 +626,39 @@ B.techs: {supply_tech, demand_tech} """ ) -model_from_data_sources_w_override = calliope.Model(model_def) +model_from_data_tables_w_override = calliope.Model(model_def) # Let's compare the two after overriding `flow_cap_max` -flow_cap_old = model_from_data_sources.inputs.flow_cap_max.to_series().dropna() +flow_cap_old = model_from_data_tables.inputs.flow_cap_max.to_series().dropna() flow_cap_new = ( - model_from_data_sources_w_override.inputs.flow_cap_max.to_series().dropna() + model_from_data_tables_w_override.inputs.flow_cap_max.to_series().dropna() ) pd.concat([flow_cap_old, flow_cap_new], axis=1, keys=["old", "new"]) # %% [markdown] -# We can also switch off technologies / nodes that would otherwise be introduced by our data sources: +# We can also switch off technologies / nodes that would otherwise be introduced by our data tables: # # # ```yaml -# data_sources: +# data_tables: # tech_data: -# source: outputs/loading_tabular_data/tech_data.csv +# data: outputs/loading_tabular_data/tech_data.csv # rows: parameters # columns: techs # tech_node_data: -# source: outputs/loading_tabular_data/tech_node_data.csv +# data: outputs/loading_tabular_data/tech_node_data.csv # rows: [techs, nodes, parameters] # tech_timestep_data: -# source: outputs/loading_tabular_data/tech_timestep_data.csv +# data: outputs/loading_tabular_data/tech_timestep_data.csv # rows: timesteps # columns: [techs, parameters] # tech_carrier_data: -# source: outputs/loading_tabular_data/tech_carrier_data.csv +# data: outputs/loading_tabular_data/tech_carrier_data.csv # rows: [techs, parameters] # add_dims: # carriers: electricity # tech_cost_data: -# source: outputs/loading_tabular_data/tech_cost_data.csv +# data: outputs/loading_tabular_data/tech_cost_data.csv # rows: parameters # columns: techs # add_dims: @@ -678,25 +678,25 @@ # %% model_def = calliope.AttrDict.from_yaml_string( """ -data_sources: +data_tables: tech_data: - source: outputs/loading_tabular_data/tech_data.csv + data: outputs/loading_tabular_data/tech_data.csv rows: parameters columns: techs tech_node_data: - source: outputs/loading_tabular_data/tech_node_data.csv + data: outputs/loading_tabular_data/tech_node_data.csv rows: [techs, nodes, parameters] tech_timestep_data: - source: outputs/loading_tabular_data/tech_timestep_data.csv + data: outputs/loading_tabular_data/tech_timestep_data.csv rows: timesteps columns: [techs, parameters] tech_carrier_data: - source: outputs/loading_tabular_data/tech_carrier_data.csv + data: outputs/loading_tabular_data/tech_carrier_data.csv rows: [techs, parameters] add_dims: carriers: electricity tech_cost_data: - source: outputs/loading_tabular_data/tech_cost_data.csv + data: outputs/loading_tabular_data/tech_cost_data.csv rows: parameters columns: techs add_dims: @@ -712,11 +712,13 @@ active: false """ ) -model_from_data_sources_w_deactivations = calliope.Model(model_def) +model_from_data_tables_w_deactivations = calliope.Model(model_def) # Let's compare the two after overriding `flow_cap_max` definition_matrix_old = ( - model_from_data_sources.inputs.definition_matrix.to_series().dropna() + model_from_data_tables.inputs.definition_matrix.to_series().dropna() +) +definition_matrix_new = ( + model_from_data_tables_w_deactivations.inputs.definition_matrix.to_series().dropna() ) -definition_matrix_new = model_from_data_sources_w_deactivations.inputs.definition_matrix.to_series().dropna() pd.concat([definition_matrix_old, definition_matrix_new], axis=1, keys=["old", "new"]) diff --git a/docs/examples/urban_scale/index.md b/docs/examples/urban_scale/index.md index 318cc7f2..499c9932 100644 --- a/docs/examples/urban_scale/index.md +++ b/docs/examples/urban_scale/index.md @@ -1,6 +1,6 @@ --- demand: - file: "src/calliope/example_models/urban_scale/data_sources/demand.csv" + file: "src/calliope/example_models/urban_scale/data_tables/demand.csv" header: [0, 1] index_col: 0 --- @@ -50,10 +50,10 @@ The import section in our file looks like this: ### Referencing tabular data As of Calliope v0.7.0 it is possible to load tabular data completely separately from the YAML model definition. -To do this we reference data tables under the `data_sources` key: +To do this we reference data tables under the `data_tables` key: ```yaml ---8<-- "src/calliope/example_models/urban_scale/model.yaml:data-sources" +--8<-- "src/calliope/example_models/urban_scale/model.yaml:data-tables" ``` In the Calliope example models, we only load timeseries data from file, including for [energy demand](#demand-technologies), [electricity export price](#revenue-by-export) and [solar PV resource availability](#supply-technologies). @@ -63,7 +63,7 @@ As an example, the data in the energy demand CSV file looks like this: {{ read_csv(page.meta.demand.file, header=page.meta.demand.header, index_col=page.meta.demand.index_col) }} You'll notice that in each row there is reference to a timestep, and in each column to a technology and a node. -Therefore, we reference `timesteps` in our data source `rows` and `nodes` and `techs` in our data source columns. +Therefore, we reference `timesteps` in our data table _rows_, and `nodes` and `techs` in our data table _columns_. Since all the data refers to the one parameter `sink_use_equals`, we don't add that information in the CSV file, but instead add it on as a dimension when loading the file. !!! info diff --git a/docs/hooks/dummy_model/model.yaml b/docs/hooks/dummy_model/model.yaml index d0c53a29..81f4fdd2 100644 --- a/docs/hooks/dummy_model/model.yaml +++ b/docs/hooks/dummy_model/model.yaml @@ -16,29 +16,29 @@ techs: from: A to: B -data_sources: +data_tables: techs: - source: techs.csv + data: techs.csv rows: [techs, parameters] nodes: - source: nodes.csv + data: nodes.csv rows: parameters add_dims: nodes: [A, B] tech_carrier: - source: tech_carrier.csv + data: tech_carrier.csv rows: [techs, carriers, parameters] costs: - source: costs.csv + data: costs.csv rows: [techs, parameters] add_dims: costs: monetary time_varying: - source: time_varying.csv + data: time_varying.csv rows: timesteps add_dims: parameters: sink_use_max diff --git a/docs/hooks/generate_math_docs.py b/docs/hooks/generate_math_docs.py index 32a4ef21..4849da88 100644 --- a/docs/hooks/generate_math_docs.py +++ b/docs/hooks/generate_math_docs.py @@ -29,7 +29,7 @@ In the expressions, terms in **bold** font are [decision variables](#decision-variables) and terms in *italic* font are [parameters](#parameters). The [decision variables](#decision-variables) and [parameters](#parameters) are listed at the end of the page; they also refer back to the global expressions / constraints in which they are used. -Those parameters which are defined over time (`timesteps`) in the expressions can be defined by a user as a single, time invariant value, or as a timeseries that is [loaded from file or dataframe](../creating/data_sources.md). +Those parameters which are defined over time (`timesteps`) in the expressions can be defined by a user as a single, time invariant value, or as a timeseries that is [loaded from file or dataframe](../creating/data_tables.md). !!! note diff --git a/docs/hooks/generate_readable_schema.py b/docs/hooks/generate_readable_schema.py index 296242e2..89ae232e 100644 --- a/docs/hooks/generate_readable_schema.py +++ b/docs/hooks/generate_readable_schema.py @@ -22,7 +22,7 @@ "config_schema": schema.CONFIG_SCHEMA, "model_schema": schema.MODEL_SCHEMA, "math_schema": schema.MATH_SCHEMA, - "data_source_schema": schema.DATA_SOURCE_SCHEMA, + "data_table_schema": schema.DATA_TABLE_SCHEMA, } diff --git a/docs/migrating.md b/docs/migrating.md index f033fb55..face9418 100644 --- a/docs/migrating.md +++ b/docs/migrating.md @@ -67,9 +67,9 @@ Instead, you define all your technology parameters at the same level. dims: costs ``` -### `file=`/`df=` → `data_sources` section +### `file=`/`df=` → `data_tables` section -`file=/df=` parameter values as references to timeseries data is replaced with loading tabular data at the top-level using the `data_sources` key. +`file=/df=` parameter values as references to timeseries data is replaced with loading tabular data at the top-level using the `data_tables` key. Assuming you have these two files: @@ -108,9 +108,9 @@ supply_file.csv: === "v0.7" ```yaml - data_sources: + data_tables: demand_data: - source: demand_file.csv + data: demand_file.csv rows: timesteps columns: nodes add_dims: @@ -118,7 +118,7 @@ supply_file.csv: parameters: sink_equals supply_data: - source: supply_file.csv + data: supply_file.csv rows: timesteps columns: nodes add_dims: @@ -127,7 +127,7 @@ supply_file.csv: ``` !!! info "See also" - [`data_sources` introduction](creating/data_sources.md); [`data_sources` tutorial][loading-tabular-data]. + [`data_tables` introduction](creating/data_tables.md); [`data_tables` tutorial][loading-tabular-data]. ### Negative → positive demand and carrier consumption values @@ -358,7 +358,7 @@ Instead of defining the binary trigger `force_resource` to enforce the productio If you want these resource uses to be upper or lower bounds, use the equivalent `_max`/`_min` parameters. -You can find an example of this change [above](#filedf-→-data_sources-section). +You can find an example of this change [above](#filedf-→-data_tables-section). ### `units` + `purchased` → `purchased_units` @@ -690,8 +690,8 @@ We have re-implemented all these constraints as tested additional math snippets, ### Configuration options -* With the [change in how timeseries data is defined](#filedf-→-data_sources-section), we have removed the reference to a `timeseries_data_path`. -Instead, data source filepaths should always be relative to the `model.yaml` file or they should be absolute paths. +* With the [change in how timeseries data is defined](#filedf-→-data_tables-section), we have removed the reference to a `timeseries_data_path`. +Instead, data table filepaths should always be relative to the `model.yaml` file or they should be absolute paths. * We have removed `run.relax_constraint` alongside [removing group constraints](#group-constraints). * We have removed `model.file_allowed`, which many users will not even know existed (it was a largely internal configuration option)! Instead, it is possible to index any parameter over the time dimension. @@ -957,11 +957,11 @@ nodes: ### Loading non-timeseries tabular data -With the [change in loading timeseries data](#filedf-→-data_sources-section), we have expanded loading of tabular data to allow any data input. +With the [change in loading timeseries data](#filedf-→-data_tables-section), we have expanded loading of tabular data to allow any data input. Technically, you can now define all your data in tables (although we would still recommend a mix of YAML and tabular model definition). !!! info "See also" - `data_sources` [introduction](creating/data_sources.md) and [tutorial][loading-tabular-data]. + `data_tables` [introduction](creating/data_tables.md) and [tutorial][loading-tabular-data]. ### YAML-based math syntax diff --git a/docs/user_defined_math/examples/max_time_varying.yaml b/docs/user_defined_math/examples/max_time_varying.yaml index 1ffc83f2..6dcb1cce 100644 --- a/docs/user_defined_math/examples/max_time_varying.yaml +++ b/docs/user_defined_math/examples/max_time_varying.yaml @@ -4,7 +4,7 @@ # Set per-timestep variations in limits to out/inflows, which would otherwise be limited by a static value. # For example, `flow_cap` can be made to fluctuate per timestep above/below its rated value. # User-defined timeseries parameters need to be in the model inputs for these constraints. -# This can be achieved by defining them for each relevant technology in a CSV file and loading that as a [data source][loading-tabular-data-data_sources]. +# This can be achieved by defining them for each relevant technology in a CSV file and loading that as a [data table][loading-tabular-data-data_tables]. # # New indexed parameters: # @@ -29,4 +29,4 @@ constraints: equations: - expression: > flow_out <= - flow_cap_max_relative_per_ts * flow_cap * flow_out_parasitic_eff \ No newline at end of file + flow_cap_max_relative_per_ts * flow_cap * flow_out_parasitic_eff diff --git a/mkdocs.yml b/mkdocs.yml index 1ea0bbad..fa677f27 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -106,7 +106,7 @@ nav: - Nodes: creating/nodes.md - Inheriting from templates: creating/templates.md - Indexed parameters: creating/parameters.md - - Loading tabular data: creating/data_sources.md + - Loading data tables: creating/data_tables.md - Scenarios and overrides: creating/scenarios.md - Running a model: running.md - Analysing a model: analysing.md @@ -157,7 +157,7 @@ nav: - reference/api/exceptions.md - reference/api/logging.md - reference/config_schema.md - - reference/data_source_schema.md + - reference/data_table_schema.md - reference/model_schema.md - reference/math_schema.md - migrating.md diff --git a/src/calliope/config/config_schema.yaml b/src/calliope/config/config_schema.yaml index e9797429..463dbe4c 100644 --- a/src/calliope/config/config_schema.yaml +++ b/src/calliope/config/config_schema.yaml @@ -179,7 +179,7 @@ properties: patternProperties: '^[^_^\d][\w]*$': {} - data_sources: + data_tables: type: [object, "null"] description: >- Reference to files from which to load parts (or all) of the model definition. diff --git a/src/calliope/config/data_source_schema.yaml b/src/calliope/config/data_table_schema.yaml similarity index 92% rename from src/calliope/config/data_source_schema.yaml rename to src/calliope/config/data_table_schema.yaml index c11f025c..cf647124 100644 --- a/src/calliope/config/data_source_schema.yaml +++ b/src/calliope/config/data_table_schema.yaml @@ -2,13 +2,13 @@ # yaml-language-server: $schema=https://json-schema.org/draft/2020-12/schema# $schema: https://json-schema.org/draft/2020-12/schema# -title: Data source schema +title: Data table schema description: All options available to load model definition data from file. type: object additionalProperties: false -required: ["source"] +required: ["data"] $defs: - DataSourceVals: + DataTableVals: oneOf: - type: "null" - type: string @@ -25,14 +25,14 @@ properties: Names of dimensions defined row-wise. Each name should correspond to a column in your data that contains index items. These columns must be to the left of the columns containing your data. - $ref: "#/$defs/DataSourceVals" + $ref: "#/$defs/DataTableVals" columns: description: >- Names of dimensions defined column-wise. Each name should correspond to a row in your data that contains index items. These rows must be above the rows containing your data. - $ref: "#/$defs/DataSourceVals" - source: + $ref: "#/$defs/DataTableVals" + data: description: >- Relative or absolute filepath. If relative, will be relative to the model config file used to initialise the model. @@ -52,7 +52,7 @@ properties: type: [string, boolean, number] drop: type: [string, array] - $ref: "#/$defs/DataSourceVals" + $ref: "#/$defs/DataTableVals" description: >- Dimensions in the rows and/or columns that contain metadata and should therefore not be passed on to the loaded model dataset. These could include comments on the source of the data, the data license, or the parameter units. @@ -70,4 +70,4 @@ properties: '^[^_^\d][\w]*$': type: [string, array] description: Keys are dimension names (must not be in `rows` or `columns`), values are index items of that dimension to add. - $ref: "#/$defs/DataSourceVals" \ No newline at end of file + $ref: "#/$defs/DataTableVals" diff --git a/src/calliope/config/protected_parameters.yaml b/src/calliope/config/protected_parameters.yaml index 8ed48d94..6d0efbd3 100644 --- a/src/calliope/config/protected_parameters.yaml +++ b/src/calliope/config/protected_parameters.yaml @@ -1,4 +1,4 @@ -# Parameters for which loading from file via `data_sources` is prohibited +# Parameters for which loading from file via `data_tables` is prohibited active: >- Technology/Node activation (`active`) can only be used in the YAML model definition. diff --git a/src/calliope/example_models/national_scale/data_sources/cluster_days.csv b/src/calliope/example_models/national_scale/data_tables/cluster_days.csv similarity index 100% rename from src/calliope/example_models/national_scale/data_sources/cluster_days.csv rename to src/calliope/example_models/national_scale/data_tables/cluster_days.csv diff --git a/src/calliope/example_models/national_scale/data_sources/time_varying_params.csv b/src/calliope/example_models/national_scale/data_tables/time_varying_params.csv similarity index 100% rename from src/calliope/example_models/national_scale/data_sources/time_varying_params.csv rename to src/calliope/example_models/national_scale/data_tables/time_varying_params.csv diff --git a/src/calliope/example_models/national_scale/model.yaml b/src/calliope/example_models/national_scale/model.yaml index 18912aca..654a7194 100644 --- a/src/calliope/example_models/national_scale/model.yaml +++ b/src/calliope/example_models/national_scale/model.yaml @@ -33,9 +33,9 @@ parameters: bigM: 1e6 # --8<-- [end:parameters] -data_sources: +data_tables: time_varying_parameters: - source: data_sources/time_varying_params.csv + data: data_tables/time_varying_params.csv rows: timesteps columns: [comment, nodes, techs, parameters] - drop: comment \ No newline at end of file + drop: comment diff --git a/src/calliope/example_models/national_scale/scenarios.yaml b/src/calliope/example_models/national_scale/scenarios.yaml index a0763950..58a3dc81 100644 --- a/src/calliope/example_models/national_scale/scenarios.yaml +++ b/src/calliope/example_models/national_scale/scenarios.yaml @@ -27,7 +27,7 @@ overrides: init: name: "National-scale example model with time clustering" time_subset: null # No time subsetting - time_cluster: data_sources/cluster_days.csv + time_cluster: data_tables/cluster_days.csv spores: config: diff --git a/src/calliope/example_models/urban_scale/data_sources/demand.csv b/src/calliope/example_models/urban_scale/data_tables/demand.csv similarity index 100% rename from src/calliope/example_models/urban_scale/data_sources/demand.csv rename to src/calliope/example_models/urban_scale/data_tables/demand.csv diff --git a/src/calliope/example_models/urban_scale/data_sources/export_power.csv b/src/calliope/example_models/urban_scale/data_tables/export_power.csv similarity index 100% rename from src/calliope/example_models/urban_scale/data_sources/export_power.csv rename to src/calliope/example_models/urban_scale/data_tables/export_power.csv diff --git a/src/calliope/example_models/urban_scale/data_sources/pv_resource.csv b/src/calliope/example_models/urban_scale/data_tables/pv_resource.csv similarity index 100% rename from src/calliope/example_models/urban_scale/data_sources/pv_resource.csv rename to src/calliope/example_models/urban_scale/data_tables/pv_resource.csv diff --git a/src/calliope/example_models/urban_scale/model.yaml b/src/calliope/example_models/urban_scale/model.yaml index fb892c1d..e282bad6 100644 --- a/src/calliope/example_models/urban_scale/model.yaml +++ b/src/calliope/example_models/urban_scale/model.yaml @@ -33,16 +33,16 @@ parameters: bigM: 1e6 # --8<-- [end:parameters] -# --8<-- [start:data-sources] -data_sources: +# --8<-- [start:data-tables] +data_tables: demand: - source: data_sources/demand.csv + data: data_tables/demand.csv rows: timesteps columns: [techs, nodes] add_dims: parameters: sink_use_equals pv_resource: - source: data_sources/pv_resource.csv + data: data_tables/pv_resource.csv rows: timesteps columns: [comment, scaler] add_dims: @@ -52,7 +52,7 @@ data_sources: scaler: per_area drop: [comment, scaler] export_power: - source: data_sources/export_power.csv + data: data_tables/export_power.csv rows: timesteps columns: nodes add_dims: @@ -60,4 +60,4 @@ data_sources: techs: chp costs: monetary carriers: electricity -# --8<-- [end:data-sources] +# --8<-- [end:data-tables] diff --git a/src/calliope/model.py b/src/calliope/model.py index 3840830a..3b1ea3db 100644 --- a/src/calliope/model.py +++ b/src/calliope/model.py @@ -17,7 +17,7 @@ from calliope.attrdict import AttrDict from calliope.postprocess import postprocess as postprocess_results from calliope.preprocess import load -from calliope.preprocess.data_sources import DataSource +from calliope.preprocess.data_tables import DataTable from calliope.preprocess.model_data import ModelDataFactory from calliope.util.logging import log_time from calliope.util.schema import ( @@ -52,7 +52,7 @@ def __init__( model_definition: str | Path | dict | xr.Dataset, scenario: str | None = None, override_dict: dict | None = None, - data_source_dfs: dict[str, pd.DataFrame] | None = None, + data_table_dfs: dict[str, pd.DataFrame] | None = None, **kwargs, ): """Returns a new Model from YAML model configuration files or a fully specified dictionary. @@ -69,8 +69,8 @@ def __init__( Additional overrides to apply to `config`. These will be applied *after* applying any defined `scenario` overrides. Defaults to None. - data_source_dfs (dict[str, pd.DataFrame] | None, optional): - Model definition `data_source` entries can reference in-memory pandas DataFrames. + data_table_dfs (dict[str, pd.DataFrame] | None, optional): + Model definition `data_table` entries can reference in-memory pandas DataFrames. The referenced data must be supplied here as a dictionary of those DataFrames. Defaults to None. **kwargs: initialisation overrides. @@ -99,7 +99,7 @@ def __init__( ) ) self._init_from_model_def_dict( - model_def, applied_overrides, scenario, data_source_dfs + model_def, applied_overrides, scenario, data_table_dfs ) self._model_data.attrs["timestamp_model_creation"] = timestamp_model_creation @@ -143,7 +143,7 @@ def _init_from_model_def_dict( model_definition: calliope.AttrDict, applied_overrides: str, scenario: str | None, - data_source_dfs: dict[str, pd.DataFrame] | None = None, + data_table_dfs: dict[str, pd.DataFrame] | None = None, ) -> None: """Initialise the model using pre-processed YAML files and optional dataframes/dicts. @@ -151,7 +151,7 @@ def _init_from_model_def_dict( model_definition (calliope.AttrDict): preprocessed model configuration. applied_overrides (str): overrides specified by users scenario (str | None): scenario specified by users - data_source_dfs (dict[str, pd.DataFrame] | None, optional): files with additional model information. Defaults to None. + data_table_dfs (dict[str, pd.DataFrame] | None, optional): files with additional model information. Defaults to None. """ # First pass to check top-level keys are all good validate_dict(model_definition, CONFIG_SCHEMA, "Model definition") @@ -184,21 +184,21 @@ def _init_from_model_def_dict( "defaults": param_metadata["default"], } - data_sources = [ - DataSource( + data_tables = [ + DataTable( init_config, source_name, source_dict, - data_source_dfs, + data_table_dfs, self._model_def_path, ) for source_name, source_dict in model_definition.pop( - "data_sources", {} + "data_tables", {} ).items() ] model_data_factory = ModelDataFactory( - init_config, model_definition, data_sources, attributes, param_metadata + init_config, model_definition, data_tables, attributes, param_metadata ) model_data_factory.build() diff --git a/src/calliope/preprocess/data_sources.py b/src/calliope/preprocess/data_tables.py similarity index 88% rename from src/calliope/preprocess/data_sources.py rename to src/calliope/preprocess/data_tables.py index f11bb668..b8151c1d 100644 --- a/src/calliope/preprocess/data_sources.py +++ b/src/calliope/preprocess/data_tables.py @@ -15,7 +15,7 @@ from calliope.attrdict import AttrDict from calliope.io import load_config from calliope.util.schema import ( - DATA_SOURCE_SCHEMA, + DATA_TABLE_SCHEMA, MODEL_SCHEMA, extract_from_schema, validate_dict, @@ -27,71 +27,71 @@ DTYPE_OPTIONS = {"str": str, "float": float} -class DataSourceDict(TypedDict): - """Uniform dictionary for data sources.""" +class DataTableDict(TypedDict): + """Uniform dictionary for data tables.""" rows: NotRequired[str | list[str]] columns: NotRequired[str | list[str]] - source: str + data: str df: NotRequired[str] add_dims: NotRequired[dict[str, str | list[str]]] select: dict[str, str | bool | int] drop: Hashable | list[Hashable] -class DataSource: +class DataTable: """Class for in memory data handling.""" - MESSAGE_TEMPLATE = "(data_sources, {name}) | {message}." + MESSAGE_TEMPLATE = "(data_tables, {name}) | {message}." PARAMS_TO_INITIALISE_YAML = ["base_tech", "to", "from"] def __init__( self, model_config: dict, - source_name: str, - data_source: DataSourceDict, - data_source_dfs: dict[str, pd.DataFrame] | None = None, + table_name: str, + data_table: DataTableDict, + data_table_dfs: dict[str, pd.DataFrame] | None = None, model_definition_path: Path | None = None, ): - """Load and format a data source from file / in-memory object. + """Load and format a data table from file / in-memory object. Args: model_config (dict): Model initialisation configuration dictionary. - source_name (str): name of the data source. - data_source (DataSourceDict): Data source definition dictionary. - data_source_dfs (dict[str, pd.DataFrame] | None, optional): - If given, a dictionary mapping source names in `data_source` to in-memory pandas DataFrames. + table_name (str): name of the data table. + data_table (DataTableDict): Data table definition dictionary. + data_table_dfs (dict[str, pd.DataFrame] | None, optional): + If given, a dictionary mapping table names in `data_table` to in-memory pandas DataFrames. Defaults to None. model_definition_path (Path | None, optional): - If given, the path to the model definition YAML file, relative to which data source filepaths will be set. - If None, relative data source filepaths will be considered relative to the current working directory. + If given, the path to the model definition YAML file, relative to which data table filepaths will be set. + If None, relative data table filepaths will be considered relative to the current working directory. Defaults to None. """ - validate_dict(data_source, DATA_SOURCE_SCHEMA, "data source") - self.input = data_source - self.dfs = data_source_dfs if data_source_dfs is not None else dict() + validate_dict(data_table, DATA_TABLE_SCHEMA, "data table") + self.input = data_table + self.dfs = data_table_dfs if data_table_dfs is not None else dict() self.model_definition_path = model_definition_path self.config = model_config self.columns = self._listify_if_defined("columns") self.index = self._listify_if_defined("rows") - self._name = source_name + self._name = table_name self.protected_params = load_config("protected_parameters.yaml") - if ".csv" in Path(self.input["source"]).suffixes: + if ".csv" in Path(self.input["data"]).suffixes: df = self._read_csv() else: - df = self.dfs[self.input["source"]] + df = self.dfs[self.input["data"]] self.dataset = self._df_to_ds(df) @property def name(self): - """Data source name.""" + """Data table name.""" return self._name def drop(self, name: str): - """Drop a data in-place from the data source. + """Drop a data in-place from the data table. Args: name (str): Name of data array to drop. @@ -118,15 +118,15 @@ def tech_dict(self) -> tuple[AttrDict, AttrDict]: return tech_dict, base_tech_data def node_dict(self, techs_incl_inheritance: AttrDict) -> AttrDict: - """Create a dummy node definition dictionary from the dimensions defined across all data sources. + """Create a dummy node definition dictionary from the dimensions defined across all data tables. This definition dictionary will ensure that the minimal YAML content is still possible. - This function should be run _after_ `self._update_tech_def_from_data_source`. + This function should be run _after_ `self._update_tech_def_from_data_table`. Args: techs_incl_inheritance (AttrDict): - Technology definition dictionary which is a union of any YAML definition and the result of calling `self.tech_dict` across all data sources. + Technology definition dictionary which is a union of any YAML definition and the result of calling `self.tech_dict` across all data tables. Technologies should have their entire definition inheritance chain resolved. """ node_tech_vars = self.dataset[ @@ -245,7 +245,7 @@ def _read_csv(self) -> pd.DataFrame: Returns: pd.DataFrame: Loaded data without any processing. """ - filename = self.input["source"] + filename = self.input["data"] if self.columns is None: self._log( @@ -272,7 +272,7 @@ def _df_to_ds(self, df: pd.DataFrame) -> xr.Dataset: """ if not isinstance(df, pd.DataFrame): self._raise_error( - "Data source must be a pandas DataFrame. " + "Data table must be a pandas DataFrame. " "If you are providing an in-memory object, ensure it is not a pandas Series by calling the method `to_frame()`" ) for axis, names in {"columns": self.columns, "index": self.index}.items(): @@ -342,7 +342,7 @@ def _check_for_protected_params(self, tdf: pd.Series): if not invalid_params.empty: extra_info = set(self.protected_params[k] for k in invalid_params) exceptions.print_warnings_and_raise_errors( - errors=list(extra_info), during=f"data source loading ({self.name})" + errors=list(extra_info), during=f"data table loading ({self.name})" ) def _check_processed_tdf(self, tdf: pd.Series): @@ -374,7 +374,7 @@ def _log(self, message, level="debug"): ) def _listify_if_defined(self, key: str) -> list | None: - """If `key` is in data source definition dictionary, return values as a list. + """If `key` is in data sourtablece definition dictionary, return values as a list. If values are not yet an iterable, they will be coerced to an iterable of length 1. If they are an iterable, they will be coerced to a list. @@ -384,7 +384,7 @@ def _listify_if_defined(self, key: str) -> list | None: default (Literal[None, 0]): Either zero or None Returns: - list | None: If `key` not defined in data source, return None, else return values as a list. + list | None: If `key` not defined in data table, return None, else return values as a list. """ vals = self.input.get(key, None) if vals is not None: @@ -392,14 +392,14 @@ def _listify_if_defined(self, key: str) -> list | None: return vals def _compare_axis_names(self, loaded_names: list, defined_names: list, axis: str): - """Check loaded axis level names compared to those given by `rows` and `columns` in data source definition dictionary. + """Check loaded axis level names compared to those given by `rows` and `columns` in data table definition dictionary. The data file / in-memory object does not need to have any level names defined, - but if they _are_ defined then they must match those given in the data source definition dictionary. + but if they _are_ defined then they must match those given in the data table definition dictionary. Args: loaded_names (list): Names as defined in the loaded data file / in-memory object. - defined_names (list): Names as defined in the data source dictionary. + defined_names (list): Names as defined in the data table dictionary. axis (str): Axis on which the names are levels. """ if any( diff --git a/src/calliope/preprocess/model_data.py b/src/calliope/preprocess/model_data.py index 442c6226..6de0aa3f 100644 --- a/src/calliope/preprocess/model_data.py +++ b/src/calliope/preprocess/model_data.py @@ -15,7 +15,7 @@ from calliope import exceptions from calliope.attrdict import AttrDict -from calliope.preprocess import data_sources, time +from calliope.preprocess import data_tables, time from calliope.util.schema import MODEL_SCHEMA, validate_dict from calliope.util.tools import listify @@ -72,7 +72,7 @@ def __init__( self, model_config: dict, model_definition: ModelDefinition, - data_sources: list[data_sources.DataSource], + data_tables: list[data_tables.DataTable], attributes: dict, param_attributes: dict[str, dict], ): @@ -83,15 +83,15 @@ def __init__( Args: model_config (dict): Model initialisation configuration (i.e., `config.init`). model_definition (ModelDefinition): Definition of model nodes and technologies, and their potential `templates`. - data_sources (list[data_sources.DataSource]): Pre-loaded data sources that will be used to initialise the dataset before handling definitions given in `model_definition`. + data_tables (list[data_tables.DataTable]): Pre-loaded data tables that will be used to initialise the dataset before handling definitions given in `model_definition`. attributes (dict): Attributes to attach to the model Dataset. param_attributes (dict[str, dict]): Attributes to attach to the generated model DataArrays. """ self.config: dict = model_config self.model_definition: ModelDefinition = model_definition.copy() self.dataset = xr.Dataset(attrs=AttrDict(attributes)) - self.tech_data_from_sources = AttrDict() - self.init_from_data_sources(data_sources) + self.tech_data_from_tables = AttrDict() + self.init_from_data_tables(data_tables) flipped_attributes: dict[str, dict] = dict() for key, val in param_attributes.items(): @@ -110,39 +110,39 @@ def build(self): self.update_time_dimension_and_params() self.assign_input_attr() - def init_from_data_sources(self, data_sources: list[data_sources.DataSource]): + def init_from_data_tables(self, data_tables: list[data_tables.DataTable]): """Initialise the model definition and dataset using data loaded from file / in-memory objects. - A basic skeleton of the dictionary format model definition is created from the data sources, + A basic skeleton of the dictionary format model definition is created from the data tables, namely technology and technology-at-node lists (without parameter definitions). Args: - data_sources (list[data_sources.DataSource]): Pre-loaded data sources. + data_tables (list[data_tables.DataTable]): Pre-loaded data tables. """ - for data_source in data_sources: - tech_dict, base_tech_data = data_source.tech_dict() + for data_table in data_tables: + tech_dict, base_tech_data = data_table.tech_dict() tech_dict.union( self.model_definition.get("techs", AttrDict()), allow_override=True ) self.model_definition["techs"] = tech_dict - self.tech_data_from_sources.union(base_tech_data) + self.tech_data_from_tables.union(base_tech_data) techs_incl_inheritance = self._inherit_defs("techs") - for data_source in data_sources: - node_dict = data_source.node_dict(techs_incl_inheritance) + for data_table in data_tables: + node_dict = data_table.node_dict(techs_incl_inheritance) node_dict.union( self.model_definition.get("nodes", AttrDict()), allow_override=True ) self.model_definition["nodes"] = node_dict for param, lookup_dim in self.LOOKUP_PARAMS.items(): - lookup_dict = data_source.lookup_dict_from_param(param, lookup_dim) - self.tech_data_from_sources.union(lookup_dict) + lookup_dict = data_table.lookup_dict_from_param(param, lookup_dim) + self.tech_data_from_tables.union(lookup_dict) if lookup_dict: - data_source.drop(param) + data_table.drop(param) - for data_source in data_sources: + for data_table in data_tables: self._add_to_dataset( - data_source.dataset, f"(data_sources, {data_source.name})" + data_table.dataset, f"(data_tables, {data_table.name})" ) def add_node_tech_data(self): @@ -219,7 +219,7 @@ def add_top_level_params(self): if name in self.dataset.data_vars: exceptions.warn( f"(parameters, {name}) | " - "A parameter with this name has already been defined in a data source or at a node/tech level. " + "A parameter with this name has already been defined in a data table or at a node/tech level. " f"Non-NaN data defined here will override existing data for this parameter." ) param_dict = self._prepare_param_dict(name, data) @@ -609,10 +609,10 @@ def _climb_template_tree( to_inherit = dim_item_dict.get("template", None) dim_groups = AttrDict(self.model_definition.get("templates", {})) if to_inherit is None: - if dim_name == "techs" and item_name in self.tech_data_from_sources: - _data_source_dict = deepcopy(self.tech_data_from_sources[item_name]) - _data_source_dict.union(dim_item_dict, allow_override=True) - dim_item_dict = _data_source_dict + if dim_name == "techs" and item_name in self.tech_data_from_tables: + _data_table_dict = deepcopy(self.tech_data_from_tables[item_name]) + _data_table_dict.union(dim_item_dict, allow_override=True) + dim_item_dict = _data_table_dict updated_dim_item_dict = dim_item_dict elif to_inherit not in dim_groups: raise KeyError( diff --git a/src/calliope/util/schema.py b/src/calliope/util/schema.py index a98f9bde..bd98cc77 100644 --- a/src/calliope/util/schema.py +++ b/src/calliope/util/schema.py @@ -16,7 +16,7 @@ CONFIG_SCHEMA = load_config("config_schema.yaml") MODEL_SCHEMA = load_config("model_def_schema.yaml") -DATA_SOURCE_SCHEMA = load_config("data_source_schema.yaml") +DATA_TABLE_SCHEMA = load_config("data_table_schema.yaml") MATH_SCHEMA = load_config("math_schema.yaml") diff --git a/tests/common/national_scale_from_data_sources/data_sources/costs_params.csv b/tests/common/national_scale_from_data_tables/data_tables/costs_params.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/costs_params.csv rename to tests/common/national_scale_from_data_tables/data_tables/costs_params.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/dimensionless_params.csv b/tests/common/national_scale_from_data_tables/data_tables/dimensionless_params.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/dimensionless_params.csv rename to tests/common/national_scale_from_data_tables/data_tables/dimensionless_params.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/links.csv b/tests/common/national_scale_from_data_tables/data_tables/links.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/links.csv rename to tests/common/national_scale_from_data_tables/data_tables/links.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/nodes_base_info.csv b/tests/common/national_scale_from_data_tables/data_tables/nodes_base_info.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/nodes_base_info.csv rename to tests/common/national_scale_from_data_tables/data_tables/nodes_base_info.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/techs_base_info.csv b/tests/common/national_scale_from_data_tables/data_tables/techs_base_info.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/techs_base_info.csv rename to tests/common/national_scale_from_data_tables/data_tables/techs_base_info.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/techs_carriers.csv b/tests/common/national_scale_from_data_tables/data_tables/techs_carriers.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/techs_carriers.csv rename to tests/common/national_scale_from_data_tables/data_tables/techs_carriers.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/techs_constraints.csv b/tests/common/national_scale_from_data_tables/data_tables/techs_constraints.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/techs_constraints.csv rename to tests/common/national_scale_from_data_tables/data_tables/techs_constraints.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/techs_costs_monetary.csv b/tests/common/national_scale_from_data_tables/data_tables/techs_costs_monetary.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/techs_costs_monetary.csv rename to tests/common/national_scale_from_data_tables/data_tables/techs_costs_monetary.csv diff --git a/tests/common/national_scale_from_data_sources/data_sources/techs_node_constraints.csv b/tests/common/national_scale_from_data_tables/data_tables/techs_node_constraints.csv similarity index 100% rename from tests/common/national_scale_from_data_sources/data_sources/techs_node_constraints.csv rename to tests/common/national_scale_from_data_tables/data_tables/techs_node_constraints.csv diff --git a/tests/common/national_scale_from_data_sources/model.yaml b/tests/common/national_scale_from_data_tables/model.yaml similarity index 76% rename from tests/common/national_scale_from_data_sources/model.yaml rename to tests/common/national_scale_from_data_tables/model.yaml index eec3b0c5..e9062f84 100644 --- a/tests/common/national_scale_from_data_sources/model.yaml +++ b/tests/common/national_scale_from_data_tables/model.yaml @@ -21,35 +21,35 @@ nodes: region1_2.techs: {csp} region1_3.techs: {csp} -data_sources: +data_tables: dimensionless_params: - source: data_sources/dimensionless_params.csv + data: data_tables/dimensionless_params.csv rows: parameters costs_params: - source: data_sources/costs_params.csv + data: data_tables/costs_params.csv rows: costs columns: parameters nodes_base_info: - source: data_sources/nodes_base_info.csv + data: data_tables/nodes_base_info.csv rows: nodes columns: parameters techs_carriers_at_nodes: - source: data_sources/techs_carriers.csv + data: data_tables/techs_carriers.csv rows: techs columns: parameters add_dims: carriers: power links: - source: data_sources/links.csv + data: data_tables/links.csv rows: techs columns: parameters techs_costs_monetary: - source: data_sources/techs_costs_monetary.csv + data: data_tables/techs_costs_monetary.csv rows: techs columns: parameters add_dims: @@ -57,22 +57,22 @@ data_sources: # will be loaded from the example model directory in calliope source code. time_varying_data_from_df: - source: time_varying_df + data: time_varying_df rows: timesteps columns: [comment, nodes, techs, parameters] drop: comment techs_base_info: - source: data_sources/techs_base_info.csv + data: data_tables/techs_base_info.csv rows: techs columns: parameters techs_constraints: - source: data_sources/techs_constraints.csv + data: data_tables/techs_constraints.csv rows: techs columns: parameters techs_node_constraints: - source: data_sources/techs_node_constraints.csv + data: data_tables/techs_node_constraints.csv rows: [nodes, techs] - columns: parameters \ No newline at end of file + columns: parameters diff --git a/tests/common/test_model/data_sources/cluster_days.csv b/tests/common/test_model/data_tables/cluster_days.csv similarity index 100% rename from tests/common/test_model/data_sources/cluster_days.csv rename to tests/common/test_model/data_tables/cluster_days.csv diff --git a/tests/common/test_model/data_sources/cluster_days_diff_dateformat.csv b/tests/common/test_model/data_tables/cluster_days_diff_dateformat.csv similarity index 100% rename from tests/common/test_model/data_sources/cluster_days_diff_dateformat.csv rename to tests/common/test_model/data_tables/cluster_days_diff_dateformat.csv diff --git a/tests/common/test_model/data_sources/demand_elec.csv b/tests/common/test_model/data_tables/demand_elec.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_elec.csv rename to tests/common/test_model/data_tables/demand_elec.csv diff --git a/tests/common/test_model/data_sources/demand_elec_15T_to_2h.csv b/tests/common/test_model/data_tables/demand_elec_15T_to_2h.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_elec_15T_to_2h.csv rename to tests/common/test_model/data_tables/demand_elec_15T_to_2h.csv diff --git a/tests/common/test_model/data_sources/demand_elec_15mins.csv b/tests/common/test_model/data_tables/demand_elec_15mins.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_elec_15mins.csv rename to tests/common/test_model/data_tables/demand_elec_15mins.csv diff --git a/tests/common/test_model/data_sources/demand_heat.csv b/tests/common/test_model/data_tables/demand_heat.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_heat.csv rename to tests/common/test_model/data_tables/demand_heat.csv diff --git a/tests/common/test_model/data_sources/demand_heat_diff_dateformat.csv b/tests/common/test_model/data_tables/demand_heat_diff_dateformat.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_heat_diff_dateformat.csv rename to tests/common/test_model/data_tables/demand_heat_diff_dateformat.csv diff --git a/tests/common/test_model/data_sources/demand_heat_wrong_dateformat.csv b/tests/common/test_model/data_tables/demand_heat_wrong_dateformat.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_heat_wrong_dateformat.csv rename to tests/common/test_model/data_tables/demand_heat_wrong_dateformat.csv diff --git a/tests/common/test_model/data_sources/demand_heat_wrong_length.csv b/tests/common/test_model/data_tables/demand_heat_wrong_length.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_heat_wrong_length.csv rename to tests/common/test_model/data_tables/demand_heat_wrong_length.csv diff --git a/tests/common/test_model/data_sources/demand_simple.csv b/tests/common/test_model/data_tables/demand_simple.csv similarity index 100% rename from tests/common/test_model/data_sources/demand_simple.csv rename to tests/common/test_model/data_tables/demand_simple.csv diff --git a/tests/common/test_model/data_sources/supply_plus_resource.csv b/tests/common/test_model/data_tables/supply_plus_resource.csv similarity index 100% rename from tests/common/test_model/data_sources/supply_plus_resource.csv rename to tests/common/test_model/data_tables/supply_plus_resource.csv diff --git a/tests/common/test_model/data_sources/supply_simple.csv b/tests/common/test_model/data_tables/supply_simple.csv similarity index 100% rename from tests/common/test_model/data_sources/supply_simple.csv rename to tests/common/test_model/data_tables/supply_simple.csv diff --git a/tests/common/test_model/model.yaml b/tests/common/test_model/model.yaml index aa905680..9eef09c5 100644 --- a/tests/common/test_model/model.yaml +++ b/tests/common/test_model/model.yaml @@ -23,9 +23,9 @@ config: parameters: bigM: 1e3 -data_sources: +data_tables: demand_elec: - source: data_sources/demand_elec.csv + data: data_tables/demand_elec.csv rows: timesteps columns: nodes add_dims: diff --git a/tests/common/test_model/scenarios.yaml b/tests/common/test_model/scenarios.yaml index f0f7f78b..f1531511 100644 --- a/tests/common/test_model/scenarios.yaml +++ b/tests/common/test_model/scenarios.yaml @@ -29,9 +29,9 @@ overrides: test_supply_elec: simple_supply_plus: # does not have a solution - data_sources: + data_tables: supply_plus_resource: - source: data_sources/supply_plus_resource.csv + data: data_tables/supply_plus_resource.csv rows: timesteps columns: nodes add_dims: @@ -39,9 +39,9 @@ overrides: techs: test_supply_plus simple_supply_and_supply_plus: - data_sources: + data_tables: supply_plus_resource: - source: data_sources/supply_plus_resource.csv + data: data_tables/supply_plus_resource.csv rows: timesteps columns: nodes add_dims: @@ -53,9 +53,9 @@ overrides: b.techs.test_supply_elec: supply_and_supply_plus_milp: - data_sources: + data_tables: supply_plus_resource: - source: data_sources/supply_plus_resource.csv + data: data_tables/supply_plus_resource.csv rows: timesteps columns: nodes select: @@ -134,9 +134,9 @@ overrides: test_supply_elec: simple_conversion: - data_sources: + data_tables: demand_heat: - source: data_sources/demand_heat.csv + data: data_tables/demand_heat.csv rows: timesteps columns: nodes add_dims: @@ -157,9 +157,9 @@ overrides: test_conversion: conversion_and_conversion_plus: - data_sources: + data_tables: demand_heat: - source: data_sources/demand_heat.csv + data: data_tables/demand_heat.csv rows: timesteps columns: nodes select: @@ -180,9 +180,9 @@ overrides: templates.test_transmission.active: false conversion_plus_milp: - data_sources: + data_tables: demand_heat: - source: data_sources/demand_heat.csv + data: data_tables/demand_heat.csv rows: timesteps columns: nodes select: @@ -209,9 +209,9 @@ overrides: templates.test_transmission.active: false conversion_milp: - data_sources: + data_tables: demand_heat: - source: data_sources/demand_heat.csv + data: data_tables/demand_heat.csv rows: timesteps columns: nodes select: @@ -234,9 +234,9 @@ overrides: templates.test_transmission.active: false conversion_plus_purchase: - data_sources: + data_tables: demand_heat: - source: data_sources/demand_heat.csv + data: data_tables/demand_heat.csv rows: timesteps columns: nodes select: @@ -260,9 +260,9 @@ overrides: templates.test_transmission.active: false simple_conversion_plus: - data_sources: + data_tables: demand_heat: - source: data_sources/demand_heat.csv + data: data_tables/demand_heat.csv rows: timesteps columns: nodes select: @@ -281,9 +281,9 @@ overrides: templates.test_transmission.active: false simple_chp: - data_sources: + data_tables: demand_heat: - source: data_sources/demand_heat.csv + data: data_tables/demand_heat.csv rows: timesteps columns: nodes select: @@ -440,7 +440,7 @@ overrides: dims: costs demand_elec_max: - data_sources: + data_tables: demand_elec: add_dims: parameters: sink_use_max diff --git a/tests/common/util.py b/tests/common/util.py index 1a15ff49..5f0fe356 100644 --- a/tests/common/util.py +++ b/tests/common/util.py @@ -12,14 +12,14 @@ def build_test_model( override_dict=None, scenario=None, model_file="model.yaml", - data_source_dfs=None, + data_table_dfs=None, **init_kwargs, ): return calliope.Model( os.path.join(os.path.dirname(__file__), "test_model", model_file), override_dict=override_dict, scenario=scenario, - data_source_dfs=data_source_dfs, + data_table_dfs=data_table_dfs, **init_kwargs, ) diff --git a/tests/conftest.py b/tests/conftest.py index 6719e299..b01dc5ba 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,7 +42,7 @@ def model_defaults(): @pytest.fixture(scope="session") def data_source_dir(): - return Path(__file__).parent / "common" / "test_model" / "data_sources" + return Path(__file__).parent / "common" / "test_model" / "data_tables" @pytest.fixture(scope="session") diff --git a/tests/test_backend_pyomo.py b/tests/test_backend_pyomo.py index 794683ee..88f1e0ee 100755 --- a/tests/test_backend_pyomo.py +++ b/tests/test_backend_pyomo.py @@ -1519,7 +1519,7 @@ def cluster_model( ): override = { "config.init.time_subset": ["2005-01-01", "2005-01-04"], - "config.init.time_cluster": "data_sources/cluster_days.csv", + "config.init.time_cluster": "data_tables/cluster_days.csv", "config.init.add_math": ( ["storage_inter_cluster"] if storage_inter_cluster else [] ), diff --git a/tests/test_core_preprocess.py b/tests/test_core_preprocess.py index 5cfd8339..54592bd5 100644 --- a/tests/test_core_preprocess.py +++ b/tests/test_core_preprocess.py @@ -26,8 +26,8 @@ def test_model_from_dict(self, data_source_dir): } ) model_dict.union(node_dict) - for src in model_dict["data_sources"].values(): - src["source"] = (model_dir / src["source"]).as_posix() + for src in model_dict["data_tables"].values(): + src["data"] = (model_dir / src["data"]).as_posix() # test as AttrDict calliope.Model(model_dict) @@ -202,7 +202,7 @@ def test_inconsistent_time_indices_fails(self): """ # should fail: wrong length of demand_heat csv vs demand_elec override = AttrDict.from_yaml_string( - "data_sources.demand_elec.source: data_sources/demand_heat_wrong_length.csv" + "data_tables.demand_elec.data: data_tables/demand_heat_wrong_length.csv" ) # check in output error that it points to: 07/01/2005 10:00:00 with pytest.warns(exceptions.ModelWarning) as excinfo: @@ -213,7 +213,7 @@ def test_inconsistent_time_indices_fails(self): def test_inconsistent_time_indices_passes_thanks_to_time_subsetting(self): override = AttrDict.from_yaml_string( - "data_sources.demand_elec.source: data_sources/demand_heat_wrong_length.csv" + "data_tables.demand_elec.data: data_tables/demand_heat_wrong_length.csv" ) # should pass: wrong length of demand_heat csv, but time subsetting removes the difference with warnings.catch_warnings(): @@ -345,7 +345,7 @@ def test_clustering_and_cyclic_storage(self): """ override = { "config.init.time_subset": ["2005-01-01", "2005-01-04"], - "config.init.time_cluster": "data_sources/cluster_days.csv", + "config.init.time_cluster": "data_tables/cluster_days.csv", "config.build.cyclic_storage": True, } diff --git a/tests/test_example_models.py b/tests/test_example_models.py index 6f19464e..507e3d50 100755 --- a/tests/test_example_models.py +++ b/tests/test_example_models.py @@ -37,11 +37,11 @@ def test_preprocess_operate(self): class TestNationalScaleExampleModelSenseChecks: @pytest.fixture(scope="class") - def nat_model_from_data_sources(self): + def nat_model_from_data_tables(self): df = pd.read_csv( calliope.examples._EXAMPLE_MODEL_DIR / "national_scale" - / "data_sources" + / "data_tables" / "time_varying_params.csv", index_col=0, header=[0, 1, 2, 3], @@ -49,9 +49,9 @@ def nat_model_from_data_sources(self): model = calliope.Model( Path(__file__).parent / "common" - / "national_scale_from_data_sources" + / "national_scale_from_data_tables" / "model.yaml", - data_source_dfs={"time_varying_df": df}, + data_table_dfs={"time_varying_df": df}, time_subset=["2005-01-01", "2005-01-01"], ) model.build() @@ -65,7 +65,7 @@ def nat_model(self): model.build() return model - @pytest.fixture(params=["nat_model", "nat_model_from_data_sources"]) + @pytest.fixture(params=["nat_model", "nat_model_from_data_tables"]) def example_tester(self, request): def _example_tester(solver="cbc", solver_io=None): model = request.getfixturevalue(request.param) @@ -131,7 +131,7 @@ def test_nationalscale_example_results_glpk(self, example_tester): pytest.skip("GLPK not installed") def test_fails_gracefully_without_timeseries(self): - override = {"data_sources": {"_REPLACE_": {}}} + override = {"data_tables": {"_REPLACE_": {}}} with pytest.raises(calliope.exceptions.ModelError) as excinfo: calliope.examples.national_scale(override_dict=override) @@ -397,10 +397,10 @@ def test_nationalscale_resampled_example_results_glpk(self): class TestUrbanScaleExampleModelSenseChecks: def example_tester(self, source_unit, solver="cbc", solver_io=None): - data_sources = f"data_sources.pv_resource.select.scaler: {source_unit}" + data_tables = f"data_tables.pv_resource.select.scaler: {source_unit}" unit_override = { "techs.pv.source_unit": source_unit, - **calliope.AttrDict.from_yaml_string(data_sources), + **calliope.AttrDict.from_yaml_string(data_tables), } model = calliope.examples.urban_scale( diff --git a/tests/test_math.py b/tests/test_math.py index 20d2ae3e..f27fe107 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -770,9 +770,9 @@ class TestNetImportShare(CustomMathExamples): YAML_FILEPATH = "net_import_share.yaml" shared_overrides = { "parameters.net_import_share": 1.5, - "data_sources": { + "data_tables": { "demand_heat": { - "source": "data_sources/demand_heat.csv", + "data": "data_tables/demand_heat.csv", "rows": "timesteps", "columns": "nodes", "select": {"nodes": "a"}, diff --git a/tests/test_preprocess_data_sources.py b/tests/test_preprocess_data_sources.py index 818eb760..ae9598da 100644 --- a/tests/test_preprocess_data_sources.py +++ b/tests/test_preprocess_data_sources.py @@ -4,7 +4,7 @@ import pytest import calliope -from calliope.preprocess import data_sources +from calliope.preprocess import data_tables from calliope.util.schema import CONFIG_SCHEMA, extract_from_schema from .common.util import check_error_or_warning @@ -17,55 +17,55 @@ def init_config(): @pytest.fixture(scope="class") def data_dir(tmp_path_factory): - filepath = tmp_path_factory.mktemp("data_sources") + filepath = tmp_path_factory.mktemp("data_tables") return filepath @pytest.fixture(scope="class") -def generate_data_source_dict(data_dir): - def _generate_data_source_dict(filename, df, rows, columns): +def generate_data_table_dict(data_dir): + def _generate_data_table_dict(filename, df, rows, columns): filepath = data_dir / filename df.rename_axis(index=rows).to_csv(filepath) return { - "source": filepath.as_posix(), + "data": filepath.as_posix(), "rows": rows, "columns": columns, "add_dims": {"parameters": "test_param"}, } - return _generate_data_source_dict + return _generate_data_table_dict -class TestDataSourceUtils: +class TestDataTableUtils: @pytest.fixture(scope="class") - def source_obj(self, init_config, generate_data_source_dict): + def table_obj(self, init_config, generate_data_table_dict): df = pd.Series({"bar": 0, "baz": 1}) - source_dict = generate_data_source_dict( + table_dict = generate_data_table_dict( "foo.csv", df, rows="test_row", columns=None ) - ds = data_sources.DataSource(init_config, "ds_name", source_dict) + ds = data_tables.DataTable(init_config, "ds_name", table_dict) ds.input["foo"] = ["foobar"] return ds - def test_name(self, source_obj): - assert source_obj.name == "ds_name" + def test_name(self, table_obj): + assert table_obj.name == "ds_name" - def test_raise_error(self, data_dir, source_obj): + def test_raise_error(self, data_dir, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj._raise_error("bar") - assert check_error_or_warning(excinfo, "(data_sources, ds_name) | bar.") + table_obj._raise_error("bar") + assert check_error_or_warning(excinfo, "(data_tables, ds_name) | bar.") - def test_log_message(self, caplog, data_dir, source_obj): + def test_log_message(self, caplog, data_dir, table_obj): caplog.set_level(logging.INFO) - source_obj._log("bar", "info") - assert "(data_sources, ds_name) | bar." in caplog.text + table_obj._log("bar", "info") + assert "(data_tables, ds_name) | bar." in caplog.text @pytest.mark.parametrize( ("key", "expected"), [("rows", ["test_row"]), ("columns", None), ("foo", ["foobar"])], ) - def test_listify_if_defined(self, source_obj, key, expected): - output = source_obj._listify_if_defined(key) + def test_listify_if_defined(self, table_obj, key, expected): + output = table_obj._listify_if_defined(key) if expected is None: assert output is expected else: @@ -81,8 +81,8 @@ def test_listify_if_defined(self, source_obj, key, expected): ([None, 1], ["foo", "bar"]), ], ) - def test_compare_axis_names_passes(self, source_obj, loaded, defined): - source_obj._compare_axis_names(loaded, defined, "foobar") + def test_compare_axis_names_passes(self, table_obj, loaded, defined): + table_obj._compare_axis_names(loaded, defined, "foobar") @pytest.mark.parametrize( ("loaded", "defined"), @@ -92,46 +92,46 @@ def test_compare_axis_names_passes(self, source_obj, loaded, defined): (["bar", 1], ["foo", "bar"]), ], ) - def test_compare_axis_names_fails(self, source_obj, loaded, defined): + def test_compare_axis_names_fails(self, table_obj, loaded, defined): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj._compare_axis_names(loaded, defined, "foobar") + table_obj._compare_axis_names(loaded, defined, "foobar") assert check_error_or_warning(excinfo, "Trying to set names for foobar") -class TestDataSourceInitOneLevel: +class TestDataTableInitOneLevel: @pytest.fixture(scope="class") - def multi_row_no_col_data(self, generate_data_source_dict): + def multi_row_no_col_data(self, generate_data_table_dict): df = pd.Series({"bar": 0, "baz": 1}) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "multi_row_no_col_file.csv", df, rows="test_row", columns=None ) @pytest.fixture(scope="class") - def multi_row_one_col_data(self, generate_data_source_dict): + def multi_row_one_col_data(self, generate_data_table_dict): df = pd.DataFrame({"foo": {"bar": 0, "baz": 1}}) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "multi_row_one_col_file.csv", df, rows="test_row", columns="test_col" ) @pytest.fixture(scope="class") - def one_row_multi_col_data(self, generate_data_source_dict): + def one_row_multi_col_data(self, generate_data_table_dict): df = pd.DataFrame({"foo": {"bar": 0}, "foobar": {"bar": 1}}) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "one_row_multi_col_file.csv", df, rows="test_row", columns="test_col" ) @pytest.fixture(scope="class") - def multi_row_multi_col_data(self, generate_data_source_dict): + def multi_row_multi_col_data(self, generate_data_table_dict): df = pd.DataFrame( {"foo": {"bar": 0, "baz": 10}, "foobar": {"bar": 0, "baz": 20}} ) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "multi_row_multi_col_file.csv", df, rows="test_row", columns="test_col" ) def test_multi_row_no_col(self, init_config, multi_row_no_col_data): - expected_df, source_dict = multi_row_no_col_data - ds = data_sources.DataSource(init_config, "ds_name", source_dict) + expected_df, table_dict = multi_row_no_col_data + ds = data_tables.DataTable(init_config, "ds_name", table_dict) test_param = ds.dataset["test_param"] assert not set(["test_row"]).symmetric_difference(test_param.dims) pd.testing.assert_series_equal( @@ -139,16 +139,16 @@ def test_multi_row_no_col(self, init_config, multi_row_no_col_data): ) @pytest.mark.parametrize( - "data_source_ref", + "data_table_ref", [ "multi_row_one_col_data", "one_row_multi_col_data", "multi_row_multi_col_data", ], ) - def test_multi_row_one_col(self, init_config, request, data_source_ref): - expected_df, source_dict = request.getfixturevalue(data_source_ref) - ds = data_sources.DataSource(init_config, "ds_name", source_dict) + def test_multi_row_one_col(self, init_config, request, data_table_ref): + expected_df, table_dict = request.getfixturevalue(data_table_ref) + ds = data_tables.DataTable(init_config, "ds_name", table_dict) test_param = ds.dataset["test_param"] assert not set(["test_row", "test_col"]).symmetric_difference(test_param.dims) pd.testing.assert_series_equal( @@ -156,21 +156,21 @@ def test_multi_row_one_col(self, init_config, request, data_source_ref): ) @pytest.mark.parametrize( - "data_source_ref", + "data_table_ref", [ "multi_row_one_col_data", "one_row_multi_col_data", "multi_row_multi_col_data", ], ) - def test_load_from_df(self, init_config, request, data_source_ref): - expected_df, source_dict = request.getfixturevalue(data_source_ref) - source_dict["source"] = data_source_ref - ds = data_sources.DataSource( + def test_load_from_df(self, init_config, request, data_table_ref): + expected_df, table_dict = request.getfixturevalue(data_table_ref) + table_dict["data"] = data_table_ref + ds = data_tables.DataTable( init_config, "ds_name", - source_dict, - data_source_dfs={data_source_ref: expected_df}, + table_dict, + data_table_dfs={data_table_ref: expected_df}, ) test_param = ds.dataset["test_param"] assert not set(["test_row", "test_col"]).symmetric_difference(test_param.dims) @@ -179,25 +179,20 @@ def test_load_from_df(self, init_config, request, data_source_ref): ) def test_load_from_df_must_be_df(self, init_config, multi_row_no_col_data): - expected_df, source_dict = multi_row_no_col_data - source_dict["source"] = "foo" + expected_df, table_dict = multi_row_no_col_data + table_dict["data"] = "foo" with pytest.raises(calliope.exceptions.ModelError) as excinfo: - data_sources.DataSource( - init_config, - "ds_name", - source_dict, - data_source_dfs={"foo": expected_df}, + data_tables.DataTable( + init_config, "ds_name", table_dict, data_table_dfs={"foo": expected_df} ) - assert check_error_or_warning( - excinfo, "Data source must be a pandas DataFrame." - ) + assert check_error_or_warning(excinfo, "Data table must be a pandas DataFrame.") -class TestDataSourceInitMultiLevel: +class TestDataTableInitMultiLevel: @pytest.fixture(scope="class") - def multi_row_no_col_data(self, generate_data_source_dict): + def multi_row_no_col_data(self, generate_data_table_dict): df = pd.Series({("bar1", "bar2"): 0, ("baz1", "baz2"): 1}) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "multi_row_no_col_file.csv", df, rows=["test_row1", "test_row2"], @@ -205,9 +200,9 @@ def multi_row_no_col_data(self, generate_data_source_dict): ) @pytest.fixture(scope="class") - def multi_row_one_col_data(self, generate_data_source_dict): + def multi_row_one_col_data(self, generate_data_table_dict): df = pd.DataFrame({"foo": {("bar1", "bar2"): 0, ("baz1", "baz2"): 1}}) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "multi_row_one_col_file.csv", df, rows=["test_row1", "test_row2"], @@ -215,11 +210,11 @@ def multi_row_one_col_data(self, generate_data_source_dict): ) @pytest.fixture(scope="class") - def one_row_multi_col_data(self, generate_data_source_dict): + def one_row_multi_col_data(self, generate_data_table_dict): df = pd.DataFrame( {("foo1", "foo2"): {"bar": 0}, ("foobar1", "foobar2"): {"bar": 1}} ) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "one_row_multi_col_file.csv", df, rows=["test_row"], @@ -227,14 +222,14 @@ def one_row_multi_col_data(self, generate_data_source_dict): ) @pytest.fixture(scope="class") - def multi_row_multi_col_data(self, generate_data_source_dict): + def multi_row_multi_col_data(self, generate_data_table_dict): df = pd.DataFrame( { ("foo1", "foo2"): {("bar1", "bar2"): 0, ("baz1", "baz2"): 10}, ("foobar1", "foobar2"): {("bar1", "bar2"): 0, ("baz1", "baz2"): 20}, } ) - return df, generate_data_source_dict( + return df, generate_data_table_dict( "multi_row_multi_col_file.csv", df, rows=["test_row1", "test_row2"], @@ -242,8 +237,8 @@ def multi_row_multi_col_data(self, generate_data_source_dict): ) def test_multi_row_no_col(self, init_config, multi_row_no_col_data): - expected_df, source_dict = multi_row_no_col_data - ds = data_sources.DataSource(init_config, "ds_name", source_dict) + expected_df, table_dict = multi_row_no_col_data + ds = data_tables.DataTable(init_config, "ds_name", table_dict) test_param = ds.dataset["test_param"] assert not set(["test_row1", "test_row2"]).symmetric_difference(test_param.dims) pd.testing.assert_series_equal( @@ -254,31 +249,31 @@ def test_multi_row_no_col(self, init_config, multi_row_no_col_data): ) @pytest.mark.parametrize( - "data_source_ref", + "data_table_ref", [ "multi_row_one_col_data", "one_row_multi_col_data", "multi_row_multi_col_data", ], ) - def test_multi_row_one_col(self, init_config, request, data_source_ref): - expected_df, source_dict = request.getfixturevalue(data_source_ref) - ds = data_sources.DataSource(init_config, "ds_name", source_dict) + def test_multi_row_one_col(self, init_config, request, data_table_ref): + expected_df, table_dict = request.getfixturevalue(data_table_ref) + ds = data_tables.DataTable(init_config, "ds_name", table_dict) test_param = ds.dataset["test_param"] - all_dims = source_dict["rows"] + source_dict["columns"] + all_dims = table_dict["rows"] + table_dict["columns"] assert not set(all_dims).symmetric_difference(test_param.dims) pd.testing.assert_frame_equal( - test_param.to_series().dropna().unstack(source_dict["columns"]), + test_param.to_series().dropna().unstack(table_dict["columns"]), expected_df, check_names=False, check_dtype=False, ) -class TestDataSourceSelectDropAdd: +class TestDataTableSelectDropAdd: @pytest.fixture(scope="class") - def source_obj(self, init_config): - def _source_obj(**source_dict_kwargs): + def table_obj(self, init_config): + def _table_obj(**table_dict_kwargs): df = pd.DataFrame( { "test_param": { @@ -289,80 +284,80 @@ def _source_obj(**source_dict_kwargs): } } ) - source_dict = { - "source": "df", + table_dict = { + "data": "df", "rows": ["test_row1", "test_row2"], "columns": "parameters", - **source_dict_kwargs, + **table_dict_kwargs, } - ds = data_sources.DataSource( - init_config, "ds_name", source_dict, data_source_dfs={"df": df} + ds = data_tables.DataTable( + init_config, "ds_name", table_dict, data_table_dfs={"df": df} ) return ds - return _source_obj + return _table_obj - def test_select_keep_one(self, source_obj): - data_source = source_obj(select={"test_row1": "bar1"}) + def test_select_keep_one(self, table_obj): + data_table = table_obj(select={"test_row1": "bar1"}) expected = pd.Series({("bar1", "baz1"): 0, ("bar1", "baz4"): 3}) - assert data_source.dataset.coords["test_row1"].item() == "bar1" + assert data_table.dataset.coords["test_row1"].item() == "bar1" pd.testing.assert_series_equal( - data_source.dataset.test_param.to_series().dropna(), + data_table.dataset.test_param.to_series().dropna(), expected.sort_index(), check_dtype=False, check_names=False, ) - def test_select_keep_two(self, source_obj): - data_source = source_obj(select={"test_row1": ["bar1", "bar2"]}) + def test_select_keep_two(self, table_obj): + data_table = table_obj(select={"test_row1": ["bar1", "bar2"]}) expected = pd.Series( {("bar1", "baz1"): 0, ("bar2", "baz2"): 1, ("bar1", "baz4"): 3} ) assert not set(["bar1", "bar2"]).symmetric_difference( - data_source.dataset.coords["test_row1"].values + data_table.dataset.coords["test_row1"].values ) pd.testing.assert_series_equal( - data_source.dataset.test_param.to_series().dropna(), + data_table.dataset.test_param.to_series().dropna(), expected.sort_index(), check_dtype=False, check_names=False, ) - def test_select_drop_one(self, source_obj): - data_source = source_obj( + def test_select_drop_one(self, table_obj): + data_table = table_obj( select={"test_row1": "bar2", "test_row2": "baz2"}, drop=["test_row1", "test_row2"], ) - assert not data_source.dataset.dims - assert data_source.dataset.test_param.item() == 1 + assert not data_table.dataset.dims + assert data_table.dataset.test_param.item() == 1 - def test_select_drop_two(self, source_obj): - data_source = source_obj(select={"test_row1": "bar1"}, drop="test_row1") + def test_select_drop_two(self, table_obj): + data_table = table_obj(select={"test_row1": "bar1"}, drop="test_row1") expected = pd.Series({"baz1": 0, "baz4": 3}) - assert "test_row1" not in data_source.dataset.dims + assert "test_row1" not in data_table.dataset.dims pd.testing.assert_series_equal( - data_source.dataset.test_param.to_series().dropna(), + data_table.dataset.test_param.to_series().dropna(), expected.sort_index(), check_dtype=False, check_names=False, ) - def test_drop_one(self, source_obj): - data_source = source_obj(drop="test_row1") + def test_drop_one(self, table_obj): + data_table = table_obj(drop="test_row1") expected = pd.Series({"baz1": 0, "baz2": 1, "baz3": 2, "baz4": 3}) - assert "test_row1" not in data_source.dataset.dims + assert "test_row1" not in data_table.dataset.dims pd.testing.assert_series_equal( - data_source.dataset.test_param.to_series().dropna(), + data_table.dataset.test_param.to_series().dropna(), expected.sort_index(), check_dtype=False, check_names=False, ) -class TestDataSourceMalformed: +class TestDataTableMalformed: @pytest.fixture(scope="class") - def source_obj(self, init_config): - def _source_obj(**source_dict_kwargs): + def table_obj(self, init_config): + def _table_obj(**table_dict_kwargs): df = pd.DataFrame( { "foo": { @@ -373,71 +368,71 @@ def _source_obj(**source_dict_kwargs): } } ) - source_dict = { - "source": "df", + table_dict = { + "data": "df", "rows": ["test_row1", "test_row2"], - **source_dict_kwargs, + **table_dict_kwargs, } - ds = data_sources.DataSource( - init_config, "ds_name", source_dict, data_source_dfs={"df": df} + ds = data_tables.DataTable( + init_config, "ds_name", table_dict, data_table_dfs={"df": df} ) return ds - return _source_obj + return _table_obj - def test_check_processed_tdf_no_parameters_dim(self, source_obj): + def test_check_processed_tdf_no_parameters_dim(self, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj() + table_obj() assert check_error_or_warning(excinfo, "The `parameters` dimension must exist") - def test_check_processed_tdf_duplicated_idx(self, source_obj): + def test_check_processed_tdf_duplicated_idx(self, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj(drop="test_row2", add_dims={"parameters": "test_param"}) + table_obj(drop="test_row2", add_dims={"parameters": "test_param"}) assert check_error_or_warning(excinfo, "Duplicate index items found:") - def test_check_processed_tdf_duplicated_dim_name(self, source_obj): + def test_check_processed_tdf_duplicated_dim_name(self, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj(add_dims={"test_row2": "foo", "parameters": "test_param"}) + table_obj(add_dims={"test_row2": "foo", "parameters": "test_param"}) assert check_error_or_warning(excinfo, "Duplicate dimension names found:") - def test_too_many_called_cols(self, source_obj): + def test_too_many_called_cols(self, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj(columns=["foo", "bar"]) + table_obj(columns=["foo", "bar"]) assert check_error_or_warning( excinfo, "Expected 2 columns levels in loaded data." ) - def test_too_few_called_rows(self, source_obj): + def test_too_few_called_rows(self, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj(rows=None) + table_obj(rows=None) assert check_error_or_warning( excinfo, "Expected a single index level in loaded data." ) - def test_check_for_protected_params(self, source_obj): + def test_check_for_protected_params(self, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj(add_dims={"parameters": "definition_matrix"}) + table_obj(add_dims={"parameters": "definition_matrix"}) assert check_error_or_warning( excinfo, "`definition_matrix` is a protected array" ) -class TestDataSourceLookupDictFromParam: +class TestDataTableLookupDictFromParam: @pytest.fixture(scope="class") - def source_obj(self, init_config): + def table_obj(self, init_config): df = pd.DataFrame( { "FOO": {("foo1", "bar1"): 1, ("foo1", "bar2"): 1}, "BAR": {("foo1", "bar1"): 1, ("foo2", "bar2"): 1}, } ) - source_dict = { - "source": "df", + table_dict = { + "data": "df", "rows": ["techs", "carriers"], "columns": "parameters", } - ds = data_sources.DataSource( - init_config, "ds_name", source_dict, data_source_dfs={"df": df} + ds = data_tables.DataTable( + init_config, "ds_name", table_dict, data_table_dfs={"df": df} ) return ds @@ -448,49 +443,49 @@ def source_obj(self, init_config): ("BAR", {"foo1": {"BAR": "bar1"}, "foo2": {"BAR": "bar2"}}), ], ) - def test_carrier_info_dict_from_model_data_var(self, source_obj, param, expected): - carrier_info = source_obj.lookup_dict_from_param(param, "carriers") + def test_carrier_info_dict_from_model_data_var(self, table_obj, param, expected): + carrier_info = table_obj.lookup_dict_from_param(param, "carriers") assert carrier_info == expected - def test_carrier_info_dict_from_model_data_var_missing_dim(self, source_obj): + def test_carrier_info_dict_from_model_data_var_missing_dim(self, table_obj): with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj.lookup_dict_from_param("FOO", "foobar") + table_obj.lookup_dict_from_param("FOO", "foobar") check_error_or_warning( excinfo, "Loading FOO with missing dimension(s). Must contain `techs` and `foobar`, received: ('techs', 'carriers')", ) -class TestDataSourceTechDict: +class TestDataTableTechDict: @pytest.fixture(scope="class") - def source_obj(self, init_config): - def _source_obj(df_dict, rows="techs"): + def table_obj(self, init_config): + def _table_obj(df_dict, rows="techs"): df = pd.DataFrame(df_dict) - source_dict = {"source": "df", "rows": rows, "columns": "parameters"} - ds = data_sources.DataSource( - init_config, "ds_name", source_dict, data_source_dfs={"df": df} + table_dict = {"data": "df", "rows": rows, "columns": "parameters"} + ds = data_tables.DataTable( + init_config, "ds_name", table_dict, data_table_dfs={"df": df} ) return ds - return _source_obj + return _table_obj - def test_tech_dict_from_one_param(self, source_obj): + def test_tech_dict_from_one_param(self, table_obj): df_dict = {"test_param": {"foo1": 1, "foo2": 2}} - tech_dict, base_dict = source_obj(df_dict).tech_dict() + tech_dict, base_dict = table_obj(df_dict).tech_dict() assert tech_dict == {"foo1": {}, "foo2": {}} assert base_dict == {} - def test_tech_dict_from_two_param(self, source_obj): + def test_tech_dict_from_two_param(self, table_obj): df_dict = {"foo": {"foo1": 1, "foo2": 2}, "bar": {"bar1": 1, "bar2": 2}} - tech_dict, base_dict = source_obj(df_dict).tech_dict() + tech_dict, base_dict = table_obj(df_dict).tech_dict() assert tech_dict == {"foo1": {}, "foo2": {}, "bar1": {}, "bar2": {}} assert base_dict == {} - def test_tech_dict_from_parent(self, source_obj): + def test_tech_dict_from_parent(self, table_obj): df_dict = {"base_tech": {"foo1": "transmission", "foo2": "supply"}} - tech_dict, base_dict = source_obj(df_dict).tech_dict() + tech_dict, base_dict = table_obj(df_dict).tech_dict() assert tech_dict == {"foo1": {}, "foo2": {}} assert base_dict == { @@ -498,19 +493,19 @@ def test_tech_dict_from_parent(self, source_obj): "foo2": {"base_tech": "supply"}, } - def test_tech_dict_from_parent_and_param(self, source_obj): + def test_tech_dict_from_parent_and_param(self, table_obj): df_dict = {"base_tech": {"foo1": "transmission"}, "other_param": {"bar1": 1}} - tech_dict, base_dict = source_obj(df_dict).tech_dict() + tech_dict, base_dict = table_obj(df_dict).tech_dict() assert tech_dict == {"foo1": {}, "bar1": {}} assert base_dict == {"foo1": {"base_tech": "transmission"}} - def test_tech_dict_from_to_from(self, source_obj): + def test_tech_dict_from_to_from(self, table_obj): df_dict = { "from": {"foo1": "bar1", "foo2": "bar2"}, "to": {"foo1": "bar2", "foo3": "bar1"}, } - tech_dict, base_dict = source_obj(df_dict).tech_dict() + tech_dict, base_dict = table_obj(df_dict).tech_dict() assert tech_dict == {"foo1": {}, "foo2": {}, "foo3": {}} assert base_dict == { @@ -519,56 +514,56 @@ def test_tech_dict_from_to_from(self, source_obj): "foo3": {"to": "bar1"}, } - def test_tech_dict_empty(self, source_obj): + def test_tech_dict_empty(self, table_obj): df_dict = {"available_area": {"foo1": 1}} - tech_dict, base_dict = source_obj(df_dict, rows="nodes").tech_dict() + tech_dict, base_dict = table_obj(df_dict, rows="nodes").tech_dict() assert not tech_dict assert not base_dict -class TestDataSourceNodeDict: +class TestDataTableNodeDict: @pytest.fixture(scope="class") - def source_obj(self, init_config): - def _source_obj(df_dict, rows=["nodes", "techs"]): + def table_obj(self, init_config): + def _table_obj(df_dict, rows=["nodes", "techs"]): df = pd.DataFrame(df_dict) - source_dict = {"source": "df", "rows": rows, "columns": "parameters"} - ds = data_sources.DataSource( - init_config, "ds_name", source_dict, data_source_dfs={"df": df} + table_dict = {"data": "df", "rows": rows, "columns": "parameters"} + ds = data_tables.DataTable( + init_config, "ds_name", table_dict, data_table_dfs={"df": df} ) return ds - return _source_obj + return _table_obj - def test_node_dict_from_one_param(self, source_obj): + def test_node_dict_from_one_param(self, table_obj): df_dict = {"available_area": {("foo1", "bar1"): 1, ("foo2", "bar2"): 2}} tech_dict = calliope.AttrDict({"bar1": {}, "bar2": {}}) - node_dict = source_obj(df_dict).node_dict(tech_dict) + node_dict = table_obj(df_dict).node_dict(tech_dict) assert node_dict == { "foo1": {"techs": {"bar1": None}}, "foo2": {"techs": {"bar2": None}}, } - def test_node_dict_from_two_param(self, source_obj): + def test_node_dict_from_two_param(self, table_obj): df_dict = { "available_area": {("foo1", "bar1"): 1, ("foo1", "bar2"): 2}, "other_param": {("foo2", "bar2"): 1}, } tech_dict = calliope.AttrDict({"bar1": {}, "bar2": {}}) - node_dict = source_obj(df_dict).node_dict(tech_dict) + node_dict = table_obj(df_dict).node_dict(tech_dict) assert node_dict == { "foo1": {"techs": {"bar1": None, "bar2": None}}, "foo2": {"techs": {"bar2": None}}, } - def test_node_dict_extra_dim_in_param(self, source_obj): + def test_node_dict_extra_dim_in_param(self, table_obj): df_dict = { "available_area": {("foo1", "bar1", "baz1"): 1, ("foo2", "bar2", "baz2"): 2} } tech_dict = calliope.AttrDict({"bar1": {}, "bar2": {}}) - node_dict = source_obj(df_dict, rows=["nodes", "techs", "carriers"]).node_dict( + node_dict = table_obj(df_dict, rows=["nodes", "techs", "carriers"]).node_dict( tech_dict ) @@ -577,12 +572,12 @@ def test_node_dict_extra_dim_in_param(self, source_obj): "foo2": {"techs": {"bar2": None}}, } - def test_node_dict_node_not_in_ds(self, source_obj): + def test_node_dict_node_not_in_ds(self, table_obj): node_tech_df_dict = {"my_param": {("foo1", "bar1"): 1, ("foo1", "bar2"): 2}} node_df_dict = {"available_area": {"foo2": 1}} tech_dict = calliope.AttrDict({"bar1": {}, "bar2": {}}) - node_tech_ds = source_obj(node_tech_df_dict) - node_ds = source_obj(node_df_dict, rows="nodes") + node_tech_ds = table_obj(node_tech_df_dict) + node_ds = table_obj(node_df_dict, rows="nodes") node_tech_ds.dataset = node_tech_ds.dataset.merge(node_ds.dataset) node_dict = node_tech_ds.node_dict(tech_dict) @@ -591,23 +586,23 @@ def test_node_dict_node_not_in_ds(self, source_obj): "foo2": {"techs": {}}, } - def test_node_dict_no_info(self, source_obj): + def test_node_dict_no_info(self, table_obj): df_dict = {"param": {"foo1": 1, "foo2": 2}} tech_dict = calliope.AttrDict( {"bar1": {"base_tech": "transmission"}, "bar2": {}} ) - node_dict = source_obj(df_dict, rows="techs").node_dict(tech_dict) + node_dict = table_obj(df_dict, rows="techs").node_dict(tech_dict) assert node_dict == {} - def test_transmission_tech_with_nodes(self, source_obj): + def test_transmission_tech_with_nodes(self, table_obj): df_dict = {"param": {("foo1", "bar1"): 1, ("foo2", "bar2"): 2}} tech_dict = calliope.AttrDict( {"bar1": {"base_tech": "transmission"}, "bar2": {}} ) with pytest.raises(calliope.exceptions.ModelError) as excinfo: - source_obj(df_dict).node_dict(tech_dict) + table_obj(df_dict).node_dict(tech_dict) check_error_or_warning( excinfo, diff --git a/tests/test_preprocess_model_data.py b/tests/test_preprocess_model_data.py index e68c30ef..391b797e 100644 --- a/tests/test_preprocess_model_data.py +++ b/tests/test_preprocess_model_data.py @@ -8,7 +8,7 @@ from calliope import exceptions from calliope.attrdict import AttrDict -from calliope.preprocess import data_sources, load +from calliope.preprocess import data_tables, load from calliope.preprocess.model_data import ModelDataFactory from .common.util import build_test_model as build_model @@ -28,10 +28,8 @@ def model_def(): def data_source_list(model_def, init_config): model_def_dict, model_def_path = model_def return [ - data_sources.DataSource( - init_config, source_name, source_dict, {}, model_def_path - ) - for source_name, source_dict in model_def_dict.pop("data_sources", {}).items() + data_tables.DataTable(init_config, source_name, source_dict, {}, model_def_path) + for source_name, source_dict in model_def_dict.pop("data_tables", {}).items() ] @@ -881,7 +879,7 @@ def test_parameter_already_exists(self): build_model({"parameters.flow_out_eff": 1}, "simple_supply,two_hours") assert check_error_or_warning( excinfo, - "A parameter with this name has already been defined in a data source or at a node/tech level.", + "A parameter with this name has already been defined in a data table or at a node/tech level.", ) @pytest.mark.parametrize("val", [1, 1.0, np.inf, "foo"]) diff --git a/tests/test_preprocess_time.py b/tests/test_preprocess_time.py index 7ef9a790..942d086f 100644 --- a/tests/test_preprocess_time.py +++ b/tests/test_preprocess_time.py @@ -17,9 +17,9 @@ def test_change_date_format(self): override = AttrDict.from_yaml_string( """ config.init.time_format: "%d/%m/%Y %H:%M" - data_sources: - demand_elec.source: data_sources/demand_heat_diff_dateformat.csv - demand_heat.source: data_sources/demand_heat_diff_dateformat.csv + data_tables: + demand_elec.data: data_tables/demand_heat_diff_dateformat.csv + demand_heat.data: data_tables/demand_heat_diff_dateformat.csv """ ) model = build_test_model(override_dict=override, scenario="simple_conversion") @@ -31,7 +31,7 @@ def test_change_date_format(self): def test_incorrect_date_format_one(self): # should fail: wrong dateformat input for one file override = AttrDict.from_yaml_string( - "data_sources.demand_elec.source: data_sources/demand_heat_diff_dateformat.csv" + "data_tables.demand_elec.data: data_tables/demand_heat_diff_dateformat.csv" ) with pytest.raises(exceptions.ModelError): @@ -47,7 +47,7 @@ def test_incorrect_date_format_multi(self): def test_incorrect_date_format_one_value_only(self): # should fail: one value wrong in file override = AttrDict.from_yaml_string( - "data_sources.test_demand_elec.source: data_sources/demand_heat_wrong_dateformat.csv" + "data_tables.test_demand_elec.data: data_tables/demand_heat_wrong_dateformat.csv" ) # check in output error that it points to: 07/01/2005 10:00:00 with pytest.raises(exceptions.ModelError): @@ -61,12 +61,12 @@ class TestClustering: def clustered_model(self, request): cluster_init = { "time_subset": ["2005-01-01", "2005-01-04"], - "time_cluster": f"data_sources/{request.param}.csv", + "time_cluster": f"data_tables/{request.param}.csv", } if "diff_dateformat" in request.param: cluster_init["override_dict"] = { - "data_sources": { - "demand_elec.source": "data_sources/demand_heat_diff_dateformat.csv" + "data_tables": { + "demand_elec.data": "data_tables/demand_heat_diff_dateformat.csv" } } cluster_init["time_format"] = "%d/%m/%Y %H:%M" @@ -127,7 +127,7 @@ def test_resampling_to_6h_then_clustering(self): scenario="simple_supply", time_subset=["2005-01-01", "2005-01-04"], time_resample="6h", - time_cluster="data_sources/cluster_days.csv", + time_cluster="data_tables/cluster_days.csv", ) dtindex = pd.DatetimeIndex( @@ -152,7 +152,7 @@ def test_15min_resampling_to_6h(self): # The data is identical for '2005-01-01' and '2005-01-03' timesteps, # it is only different for '2005-01-02' override = AttrDict.from_yaml_string( - "data_sources.demand_elec.source: data_sources/demand_elec_15mins.csv" + "data_tables.demand_elec.data: data_tables/demand_elec_15mins.csv" ) model = build_test_model(override, scenario="simple_supply", time_resample="6h") @@ -179,7 +179,7 @@ def test_15min_to_2h_resampling_to_2h(self): CSV has daily timeseries varying from 15min to 2h resolution, resample all to 2h """ override = AttrDict.from_yaml_string( - "data_sources.demand_elec.source: data_sources/demand_elec_15T_to_2h.csv" + "data_tables.demand_elec.data: data_tables/demand_elec_15T_to_2h.csv" ) model = build_test_model( @@ -214,12 +214,12 @@ def test_different_ts_resolutions_resampling_to_6h(self): # it is only different for '2005-01-02' override = AttrDict.from_yaml_string( """ - data_sources: + data_tables: demand_elec: select: nodes: a demand_elec_15m: - source: data_sources/demand_elec_15mins.csv + data: data_tables/demand_elec_15mins.csv rows: timesteps columns: nodes select: