From fe421a5dee1226df4d605ad9dc5b23d114d6c03b Mon Sep 17 00:00:00 2001
From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com>
Date: Wed, 30 Oct 2024 22:02:28 +0000
Subject: [PATCH 01/12] Update to using pydantic for config

---
 src/calliope/attrdict.py               |   4 +
 src/calliope/config.py                 | 318 +++++++++++++++++++++++++
 src/calliope/config/config_schema.yaml | 156 ------------
 src/calliope/model.py                  | 107 +++------
 src/calliope/preprocess/data_tables.py |   3 -
 5 files changed, 361 insertions(+), 227 deletions(-)
 create mode 100644 src/calliope/config.py

diff --git a/src/calliope/attrdict.py b/src/calliope/attrdict.py
index bd94df7b..f17cf0ef 100644
--- a/src/calliope/attrdict.py
+++ b/src/calliope/attrdict.py
@@ -9,6 +9,7 @@
 
 import numpy as np
 import ruamel.yaml as ruamel_yaml
+from ruamel.yaml.scalarstring import walk_tree
 from typing_extensions import Self
 
 from calliope.util.tools import relative_path
@@ -355,6 +356,9 @@ def to_yaml(self, path=None):
 
         result = result.as_dict()
 
+        # handle multi-line strings.
+        walk_tree(result)
+
         if path is not None:
             with open(path, "w") as f:
                 yaml_.dump(result, f)
diff --git a/src/calliope/config.py b/src/calliope/config.py
new file mode 100644
index 00000000..75a9bf79
--- /dev/null
+++ b/src/calliope/config.py
@@ -0,0 +1,318 @@
+# Copyright (C) since 2013 Calliope contributors listed in AUTHORS.
+# Licensed under the Apache 2.0 License (see LICENSE file).
+"""Implements the Calliope configuration class."""
+
+from collections.abc import Hashable
+from datetime import datetime
+from pathlib import Path
+from typing import Annotated, Literal, Self, TypeVar, overload
+
+from pydantic import AfterValidator, BaseModel, Field, model_validator
+from pydantic_core import PydanticCustomError
+
+from calliope.attrdict import AttrDict
+from calliope.util import tools
+
+MODES_T = Literal["plan", "operate", "spores"]
+CONFIG_T = Literal["init", "build", "solve"]
+
+# ==
+# Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909
+T = TypeVar("T", bound=Hashable)
+
+
+def _validate_unique_list(v: list[T]) -> list[T]:
+    if len(v) != len(set(v)):
+        raise PydanticCustomError("unique_list", "List must be unique")
+    return v
+
+
+UniqueList = Annotated[
+    list[T],
+    AfterValidator(_validate_unique_list),
+    Field(json_schema_extra={"uniqueItems": True}),
+]
+# ==
+
+
+def hide_from_schema(to_hide: list[str]):
+    """Hide fields from the generated schema.
+
+    Args:
+        to_hide (list[str]): List of fields to hide.
+    """
+
+    def _hide_from_schema(schema: dict):
+        for hide in to_hide:
+            schema.get("properties", {}).pop(hide, None)
+        return schema
+
+    return _hide_from_schema
+
+
+class ConfigBaseModel(BaseModel):
+    """A base class for creating pydantic models for Calliope configuration options."""
+
+    _kwargs: dict = {}
+
+    def update(self, update_dict: dict, deep: bool = False) -> Self:
+        """Return a new iteration of the model with updated fields.
+
+        Updates are validated and stored in the parent class in the `_kwargs` key.
+
+        Args:
+            update_dict (dict): Dictionary with which to update the base model.
+            deep (bool, optional): Set to True to make a deep copy of the model. Defaults to False.
+
+        Returns:
+            BaseModel: New model instance.
+        """
+        updated = super().model_copy(update=update_dict, deep=deep)
+        updated.model_validate(updated)
+        self._kwargs = update_dict
+        return updated
+
+    @overload
+    def model_yaml_schema(self, filepath: str | Path) -> None: ...
+
+    @overload
+    def model_yaml_schema(self, filepath: None = None) -> str: ...
+
+    def model_yaml_schema(self, filepath: str | Path | None = None) -> None | str:
+        """Generate a YAML schema for the class.
+
+        Args:
+            filepath (str | Path | None, optional): If given, save schema to given path. Defaults to None.
+
+        Returns:
+            None | str: If `filepath` is given, returns None. Otherwise, returns the YAML string.
+        """
+        return AttrDict(self.model_json_schema()).to_yaml(filepath)
+
+
+class ModeBaseModel(BaseModel):
+    """Mode-specific configuration, which will be hidden from the string representation of the model if that mode is not activated."""
+
+    _mode: str
+
+    @model_validator(mode="after")
+    def update_repr(self) -> Self:
+        """Hide config from model string representation if mode is not activated."""
+        for key, val in self.model_fields.items():
+            if key.startswith(self._mode):
+                val.repr = self.mode == self._mode
+        return self
+
+
+class Init(ConfigBaseModel):
+    """All configuration options used when initialising a Calliope model."""
+
+    model_config = {
+        "extra": "forbid",
+        "frozen": True,
+        "json_schema_extra": hide_from_schema(["def_path"]),
+        "revalidate_instances": "always",
+        "use_attribute_docstrings": True,
+    }
+
+    def_path: Path = Field(default=".", repr=False, exclude=True)
+    name: str | None = Field(default=None)
+    """Model name"""
+
+    calliope_version: str | None = Field(default=None)
+    """Calliope framework version this model is intended for"""
+
+    time_subset: tuple[datetime, datetime] | None = Field(default=None)
+    """
+    Subset of timesteps as an two-element list giving the **inclusive** range.
+    For example, ["2005-01", "2005-04"] will create a time subset from "2005-01-01 00:00:00" to "2005-04-31 23:59:59".
+
+    Strings must be ISO8601-compatible, i.e. of the form `YYYY-mm-dd HH:MM:SS` (e.g, '2005-01 ', '2005-01-01', '2005-01-01 00:00', ...)
+    """
+
+    time_resample: str | None = Field(default=None, pattern="^[0-9]+[a-zA-Z]")
+    """Setting to adjust time resolution, e.g. '2h' for 2-hourly"""
+
+    time_cluster: Path | None = Field(default=None)
+    """
+    Setting to cluster the timeseries.
+    Must be a path to a file where each date is linked to a representative date that also exists in the timeseries.
+    """
+
+    time_format: str = Field(default="ISO8601")
+    """
+    Timestamp format of all time series data when read from file.
+    'ISO8601' means '%Y-%m-%d %H:%M:%S'.
+    """
+
+    distance_unit: Literal["km", "m"] = Field(default="km")
+    """
+    Unit of transmission link `distance` (m - metres, km - kilometres).
+    Automatically derived distances from lat/lon coordinates will be given in this unit.
+    """
+
+    @model_validator(mode="before")
+    @classmethod
+    def abs_path(cls, data):
+        """Add model definition path."""
+        if "time_cluster" in data:
+            data["time_cluster"] = tools.relative_path(
+                data["def_path"], data["time_cluster"]
+            )
+        return data
+
+
+class BuildBase(BaseModel):
+    """Base configuration options used when building a Calliope optimisation problem (`calliope.Model.build`)."""
+
+    model_config = {"extra": "allow", "revalidate_instances": "always"}
+    add_math: UniqueList[str] = Field(default=[])
+    """
+    List of references to files which contain additional mathematical formulations to be applied on top of or instead of the base mode math.
+    If referring to an pre-defined Calliope math file (see documentation for available files), do not append the reference with ".yaml".
+    If referring to your own math file, ensure the file type is given as a suffix (".yaml" or ".yml").
+    Relative paths will be assumed to be relative to the model definition file given when creating a calliope Model (`calliope.Model(model_definition=...)`)
+    """
+
+    ignore_mode_math: bool = Field(default=False)
+    """
+    If True, do not initialise the mathematical formulation with the pre-defined math for the given run `mode`.
+    This option can be used to completely re-define the Calliope mathematical formulation.
+    """
+
+    backend: Literal["pyomo", "gurobi"] = Field(default="pyomo")
+    """Module with which to build the optimisation problem."""
+
+    ensure_feasibility: bool = Field(default=False)
+    """
+    Whether to include decision variables in the model which will meet unmet demand or consume unused supply in the model so that the optimisation solves successfully.
+    This should only be used as a debugging option (as any unmet demand/unused supply is a sign of improper model formulation).
+    """
+
+    mode: MODES_T = Field(default="plan")
+    """Mode in which to run the optimisation."""
+
+    objective: str = Field(default="min_cost_optimisation")
+    """Name of internal objective function to use, from those defined in the pre-defined math and any applied additional math."""
+
+    pre_validate_math_strings: bool = Field(default=True)
+    """
+    If true, the Calliope math definition will be scanned for parsing errors _before_ undertaking the much more expensive operation of building the optimisation problem.
+    You can switch this off (e.g., if you know there are no parsing errors) to reduce overall build time.
+    """
+
+
+class BuildOperate(ModeBaseModel):
+    """Operate mode configuration options used when building a Calliope optimisation problem (`calliope.Model.build`)."""
+
+    _mode = "operate"
+
+    operate_window: str = Field(default=None)
+    """
+    Operate mode rolling `window`, given as a pandas frequency string.
+    See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
+    """
+
+    operate_horizon: str = Field(default=None)
+    """
+    Operate mode rolling `horizon`, given as a pandas frequency string.
+    See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
+    Must be ≥ `operate_window`
+    """
+
+    operate_use_cap_results: bool = Field(default=False)
+    """If the model already contains `plan` mode results, use those optimal capacities as input parameters to the `operate` mode run."""
+
+
+class Build(ConfigBaseModel, BuildOperate, BuildBase):
+    """All configuration options used when building a Calliope optimisation problem (`calliope.Model.build`).
+
+    Additional configuration items will be passed onto math string parsing and can therefore be accessed in the `where` strings by `config.[item-name]`,
+    where "[item-name]" is the name of your own configuration item.
+    """
+
+
+class SolveBase(BaseModel):
+    """Base configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
+
+    model_config = {
+        "extra": "forbid",
+        "revalidate_instances": "always",
+        "json_schema_extra": hide_from_schema(["mode"]),
+    }
+
+    mode: Literal["plan", "spores", "operate"] = Field(default="plan", repr=False)
+
+    save_logs: Path | None = Field(default=None)
+    """If given, should be a path to a directory in which to save optimisation logs."""
+
+    solver_io: str | None = Field(default=None)
+    """
+    Some solvers have different interfaces that perform differently.
+    For instance, setting `solver_io="python"` when using the solver `gurobi` tends to reduce the time to send the optimisation problem to the solver.
+    """
+
+    solver_options: dict = Field(default={})
+    """Any solver options, as key-value pairs, to pass to the chosen solver"""
+
+    solver: str = Field(default="cbc")
+    """Solver to use. Any solvers that have Pyomo interfaces can be used. Refer to the Pyomo documentation for the latest list."""
+
+    zero_threshold: float = Field(default=1e-10)
+    """On postprocessing the optimisation results, values smaller than this threshold will be considered as optimisation artefacts and will be set to zero."""
+
+    shadow_prices: UniqueList[str] = Field(default=[])
+    """Names of model constraints."""
+
+
+class SolveSpores(ModeBaseModel):
+    """SPORES configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
+
+    _mode = "spores"
+
+    mode: MODES_T = Field(default=None)
+
+    spores_number: int = Field(default=3)
+    """SPORES mode number of iterations after the initial base run."""
+
+    spores_score_cost_class: str = Field(default="spores_score")
+    """SPORES mode cost class to vary between iterations after the initial base run."""
+
+    spores_slack_cost_group: str = Field(default=None)
+    """SPORES mode cost class to keep below the given `slack` (usually "monetary")."""
+
+    spores_save_per_spore: bool = Field(default=False)
+    """
+    Whether or not to save the result of each SPORES mode run between iterations.
+    If False, will consolidate all iterations into one dataset after completion of N iterations (defined by `spores_number`) and save that one dataset.
+    """
+
+    spores_save_per_spore_path: Path | None = Field(default=None)
+    """If saving per spore, the path to save to."""
+
+    spores_skip_cost_op: bool = Field(default=False)
+    """If the model already contains `plan` mode results, use those as the initial base run results and start with SPORES iterations immediately."""
+
+    @model_validator(mode="after")
+    def save_per_spore_path(self) -> Self:
+        """Ensure that path is given if saving per spore."""
+        if self.spores_save_per_spore:
+            if self.spores_save_per_spore_path is None:
+                raise ValueError(
+                    "Must define `spores_save_per_spore_path` if you want to save each SPORES result separately."
+                )
+            elif not self.spores_save_per_spore_path.is_dir():
+                raise ValueError("`spores_save_per_spore_path` must be a directory.")
+        return self
+
+
+class Solve(ConfigBaseModel, SolveSpores, SolveBase):
+    """All configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
+
+
+class CalliopeConfig(ConfigBaseModel):
+    """Calliope configuration class."""
+
+    init: Init
+    build: Build
+    solve: Solve
diff --git a/src/calliope/config/config_schema.yaml b/src/calliope/config/config_schema.yaml
index b9ebe627..41a8c06e 100644
--- a/src/calliope/config/config_schema.yaml
+++ b/src/calliope/config/config_schema.yaml
@@ -15,172 +15,16 @@ properties:
       init:
         type: object
         description: All configuration options used when initialising a Calliope model
-        additionalProperties: false
-        properties:
-          name:
-            type: ["null", string]
-            default: null
-            description: Model name
-          calliope_version:
-            type: ["null", string]
-            default: null
-            description: Calliope framework version this model is intended for
-          time_subset:
-            oneOf:
-              - type: "null"
-              - type: array
-                minItems: 2
-                maxItems: 2
-                items:
-                  type: string
-                  description: ISO8601 format datetime strings of the form `YYYY-mm-dd HH:MM:SS` (e.g, '2005-01', '2005-01-01', '2005-01-01 00:00', ...)
-            default: null
-            description: >-
-              Subset of timesteps as an two-element list giving the **inclusive** range.
-              For example, ['2005-01', '2005-04'] will create a time subset from '2005-01-01 00:00:00' to '2005-04-31 23:59:59'.
-          time_resample:
-            type: ["null", string]
-            default: null
-            description: setting to adjust time resolution, e.g. "2h" for 2-hourly
-            pattern: "^[0-9]+[a-zA-Z]"
-          time_cluster:
-            type: ["null", string]
-            default: null
-            description: setting to cluster the timeseries, must be a path to a file where each date is linked to a representative date that also exists in the timeseries.
-          time_format:
-            type: string
-            default: "ISO8601"
-            description: Timestamp format of all time series data when read from file. "ISO8601" means "%Y-%m-%d %H:%M:%S".
-          distance_unit:
-            type: string
-            default: km
-            description: >-
-              Unit of transmission link `distance` (m - metres, km - kilometres).
-              Automatically derived distances from lat/lon coordinates will be given in this unit.
-            enum: [m, km]
 
       build:
         type: object
         description: >
           All configuration options used when building a Calliope optimisation problem (`calliope.Model.build`).
           Additional configuration items will be passed onto math string parsing and can therefore be accessed in the `where` strings by `config.[item-name]`, where "[item-name]" is the name of your own configuration item.
-        additionalProperties: true
-        properties:
-          add_math:
-            type: array
-            default: []
-            description: List of references to files which contain additional mathematical formulations to be applied on top of or instead of the base mode math.
-            uniqueItems: true
-            items:
-              type: string
-              description: >
-                If referring to an pre-defined Calliope math file (see documentation for available files), do not append the reference with ".yaml".
-                If referring to your own math file, ensure the file type is given as a suffix (".yaml" or ".yml").
-                Relative paths will be assumed to be relative to the model definition file given when creating a calliope Model (`calliope.Model(model_definition=...)`).
-          ignore_mode_math:
-            type: boolean
-            default: false
-            description: >-
-              If True, do not initialise the mathematical formulation with the pre-defined math for the given run `mode`.
-              This option can be used to completely re-define the Calliope mathematical formulation.
-          backend:
-            type: string
-            default: pyomo
-            description: Module with which to build the optimisation problem
-          ensure_feasibility:
-            type: boolean
-            default: false
-            description: >
-              whether to include decision variables in the model which will meet unmet demand or consume unused supply in the model so that the optimisation solves successfully.
-              This should only be used as a debugging option (as any unmet demand/unused supply is a sign of improper model formulation).
-          mode:
-            type: string
-            default: plan
-            description: Mode in which to run the optimisation.
-            enum: [plan, spores, operate]
-          objective:
-            type: string
-            default: min_cost_optimisation
-            description: Name of internal objective function to use, from those defined in the pre-defined math and any applied additional math.
-          operate_window:
-            type: string
-            description: >-
-              Operate mode rolling `window`, given as a pandas frequency string.
-              See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
-          operate_horizon:
-            type: string
-            description: >-
-              Operate mode rolling `horizon`, given as a pandas frequency string.
-              See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
-              Must be ≥ `operate_window`
-          operate_use_cap_results:
-            type: boolean
-            default: false
-            description: If the model already contains `plan` mode results, use those optimal capacities as input parameters to the `operate` mode run.
-          pre_validate_math_strings:
-            type: boolean
-            default: true
-            description: >-
-              If true, the Calliope math definition will be scanned for parsing errors _before_ undertaking the much more expensive operation of building the optimisation problem.
-              You can switch this off (e.g., if you know there are no parsing errors) to reduce overall build time.
 
       solve:
         type: object
         description: All configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`).
-        additionalProperties: false
-        properties:
-          spores_number:
-            type: integer
-            default: 3
-            description: SPORES mode number of iterations after the initial base run.
-          spores_score_cost_class:
-            type: string
-            default: spores_score
-            description: SPORES mode cost class to vary between iterations after the initial base run.
-          spores_slack_cost_group:
-            type: string
-            description: SPORES mode cost class to keep below the given `slack` (usually "monetary").
-          spores_save_per_spore:
-            type: boolean
-            default: false
-            description: Whether or not to save the result of each SPORES mode run between iterations. If False, will consolidate all iterations into one dataset after completion of N iterations (defined by `spores_number`) and save that one dataset.
-          spores_save_per_spore_path:
-            type: string
-            description: If saving per spore, the path to save to.
-          spores_skip_cost_op:
-            type: boolean
-            default: false
-            description: If the model already contains `plan` mode results, use those as the initial base run results and start with SPORES iterations immediately.
-          save_logs:
-            type: ["null", string]
-            default: null
-            description: If given, should be a path to a directory in which to save optimisation logs.
-          solver_io:
-            type: ["null", string]
-            default: null
-            description: >
-              Some solvers have different interfaces that perform differently.
-              For instance, setting `solver_io="python"` when using the solver `gurobi` tends to reduce the time to send the optimisation problem to the solver.
-          solver_options:
-            type: ["null", object]
-            default: null
-            description: Any solver options, as key-value pairs, to pass to the chosen solver
-          solver:
-            type: string
-            default: cbc
-            description: Solver to use. Any solvers that have Pyomo interfaces can be used. Refer to the Pyomo documentation for the latest list.
-          zero_threshold:
-            type: number
-            default: 1e-10
-            description: On postprocessing the optimisation results, values smaller than this threshold will be considered as optimisation artefacts and will be set to zero.
-          shadow_prices:
-            type: array
-            uniqueItems: true
-            items:
-              type: string
-              description: Names of model constraints.
-            default: []
-            description: List of constraints for which to extract shadow prices. Shadow prices will be added as variables to the model results as `shadow_price_{constraintname}`.
 
   parameters:
     type: [object, "null"]
diff --git a/src/calliope/model.py b/src/calliope/model.py
index ee8c5a77..811c9676 100644
--- a/src/calliope/model.py
+++ b/src/calliope/model.py
@@ -12,7 +12,7 @@
 import xarray as xr
 
 import calliope
-from calliope import backend, exceptions, io, preprocess
+from calliope import backend, config, exceptions, io, preprocess
 from calliope.attrdict import AttrDict
 from calliope.postprocess import postprocess as postprocess_results
 from calliope.preprocess.data_tables import DataTable
@@ -43,7 +43,7 @@ class Model:
     """A Calliope Model."""
 
     _TS_OFFSET = pd.Timedelta(1, unit="nanoseconds")
-    ATTRS_SAVED = ("_def_path", "applied_math")
+    ATTRS_SAVED = ("_def_path", "applied_math", "config")
 
     def __init__(
         self,
@@ -74,7 +74,7 @@ def __init__(
             **kwargs: initialisation overrides.
         """
         self._timings: dict = {}
-        self.config: AttrDict
+        self.config: config.CalliopeConfig
         self.defaults: AttrDict
         self.applied_math: preprocess.CalliopeMath
         self._def_path: str | None = None
@@ -162,19 +162,22 @@ def _init_from_model_def_dict(
             "model_run_creation",
             comment="Model: preprocessing stage 1 (model_run)",
         )
-        model_config = AttrDict(extract_from_schema(CONFIG_SCHEMA, "default"))
-        model_config.union(model_definition.pop("config"), allow_override=True)
 
-        init_config = update_then_validate_config("init", model_config)
+        model_config = config.CalliopeConfig(model_definition.pop("config"))
 
-        if init_config["time_cluster"] is not None:
-            init_config["time_cluster"] = relative_path(
-                self._def_path, init_config["time_cluster"]
+        if model_config.init.data["time_cluster"] is not None:
+            model_config.init.update(
+                {
+                    "time_cluster": relative_path(
+                        self._def_path, model_config.init.data["time_cluster"]
+                    )
+                }
             )
+        model_config.init.validate()
 
         param_metadata = {"default": extract_from_schema(MODEL_SCHEMA, "default")}
         attributes = {
-            "calliope_version_defined": init_config["calliope_version"],
+            "calliope_version_defined": model_config.init.data["calliope_version"],
             "calliope_version_initialised": calliope.__version__,
             "applied_overrides": applied_overrides,
             "scenario": scenario,
@@ -185,13 +188,15 @@ def _init_from_model_def_dict(
         for table_name, table_dict in model_definition.pop("data_tables", {}).items():
             table_dict, _ = climb_template_tree(table_dict, templates, table_name)
             data_tables.append(
-                DataTable(
-                    init_config, table_name, table_dict, data_table_dfs, self._def_path
-                )
+                DataTable(table_name, table_dict, data_table_dfs, self._def_path)
             )
 
         model_data_factory = ModelDataFactory(
-            init_config, model_definition, data_tables, attributes, param_metadata
+            model_config.init.data,
+            model_definition,
+            data_tables,
+            attributes,
+            param_metadata,
         )
         model_data_factory.build()
 
@@ -204,9 +209,10 @@ def _init_from_model_def_dict(
             comment="Model: preprocessing stage 2 (model_data)",
         )
 
-        self._add_observed_dict("config", model_config)
+        self._model_data.attrs["name"] = model_config.init.data["name"]
+
+        self.config = model_config
 
-        self._model_data.attrs["name"] = init_config["name"]
         log_time(
             LOGGER,
             self._timings,
@@ -229,9 +235,10 @@ def _init_from_model_data(self, model_data: xr.Dataset) -> None:
             self.applied_math = preprocess.CalliopeMath.from_dict(
                 model_data.attrs.pop("applied_math")
             )
+        if "config" in model_data.attrs:
+            self.config = config.CalliopeConfig(model_data.attrs.pop("config"))
 
         self._model_data = model_data
-        self._add_model_data_methods()
 
         if self.results:
             self._is_solved = True
@@ -243,47 +250,6 @@ def _init_from_model_data(self, model_data: xr.Dataset) -> None:
             comment="Model: loaded model_data",
         )
 
-    def _add_model_data_methods(self):
-        """Add observed data to `model`.
-
-        1. Filter model dataset to produce views on the input/results data
-        2. Add top-level configuration dictionaries simultaneously to the model data attributes and as attributes of this class.
-
-        """
-        self._add_observed_dict("config")
-
-    def _add_observed_dict(self, name: str, dict_to_add: dict | None = None) -> None:
-        """Add the same dictionary as property of model object and an attribute of the model xarray dataset.
-
-        Args:
-            name (str):
-                Name of dictionary which will be set as the model property name and
-                (if necessary) the dataset attribute name.
-            dict_to_add (dict | None, optional):
-                If given, set as both the model property and the dataset attribute,
-                otherwise set an existing dataset attribute as a model property of the
-                same name. Defaults to None.
-
-        Raises:
-            exceptions.ModelError: If `dict_to_add` is not given, it must be an attribute of model data.
-            TypeError: `dict_to_add` must be a dictionary.
-        """
-        if dict_to_add is None:
-            try:
-                dict_to_add = self._model_data.attrs[name]
-            except KeyError:
-                raise exceptions.ModelError(
-                    f"Expected the model property `{name}` to be a dictionary attribute of the model dataset. If you are loading the model from a NetCDF file, ensure it is a valid Calliope model."
-                )
-        if not isinstance(dict_to_add, dict):
-            raise TypeError(
-                f"Attempted to add dictionary property `{name}` to model, but received argument of type `{type(dict_to_add).__name__}`"
-            )
-        else:
-            dict_to_add = AttrDict(dict_to_add)
-        self._model_data.attrs[name] = dict_to_add
-        setattr(self, name, dict_to_add)
-
     def build(
         self, force: bool = False, add_math_dict: dict | None = None, **kwargs
     ) -> None:
@@ -310,30 +276,35 @@ def build(
             comment="Model: backend build starting",
         )
 
-        backend_config = {**self.config["build"], **kwargs}
-        mode = backend_config["mode"]
+        self.config.build.data_temp = kwargs
+        latest_build_config = self.config.build.data
+        mode = self.config.build.mode
         if mode == "operate":
             if not self._model_data.attrs["allow_operate_mode"]:
                 raise exceptions.ModelError(
                     "Unable to run this model in operate (i.e. dispatch) mode, probably because "
                     "there exist non-uniform timesteps (e.g. from time clustering)"
                 )
-            start_window_idx = backend_config.pop("start_window_idx", 0)
+            start_window_idx = self.config.build.data.pop("start_window_idx", 0)
             backend_input = self._prepare_operate_mode_inputs(
-                start_window_idx, **backend_config
+                start_window_idx, **self.config.build.data
             )
         else:
             backend_input = self._model_data
 
-        init_math_list = [] if backend_config.get("ignore_mode_math") else [mode]
+        init_math_list = [] if self.config.build.data["ignore_mode_math"] else [mode]
         end_math_list = [] if add_math_dict is None else [add_math_dict]
-        full_math_list = init_math_list + backend_config["add_math"] + end_math_list
+        full_math_list = (
+            init_math_list + self.config.build.data["add_math"] + end_math_list
+        )
         LOGGER.debug(f"Math preprocessing | Loading math: {full_math_list}")
         model_math = preprocess.CalliopeMath(full_math_list, self._def_path)
 
-        backend_name = backend_config.pop("backend")
         self.backend = backend.get_model_backend(
-            backend_name, backend_input, model_math, **backend_config
+            latest_build_config["backend"],
+            backend_input,
+            model_math,
+            **latest_build_config,
         )
         self.backend.add_optimisation_components()
 
@@ -370,7 +341,7 @@ def solve(self, force: bool = False, warmstart: bool = False, **kwargs) -> None:
             exceptions.ModelError: Some preprocessing steps will stop a run mode of "operate" from being possible.
         """
         # Check that results exist and are non-empty
-        if not self._is_built:
+        if not self.is_built:
             raise exceptions.ModelError(
                 "You must build the optimisation problem (`.build()`) "
                 "before you can run it."
@@ -388,7 +359,7 @@ def solve(self, force: bool = False, warmstart: bool = False, **kwargs) -> None:
         else:
             to_drop = []
 
-        run_mode = self.backend.inputs.attrs["config"]["build"]["mode"]
+        run_mode = self.config.build.data["mode"]
         self._model_data.attrs["timestamp_solve_start"] = log_time(
             LOGGER,
             self._timings,
diff --git a/src/calliope/preprocess/data_tables.py b/src/calliope/preprocess/data_tables.py
index 4a90fbf3..83233e2b 100644
--- a/src/calliope/preprocess/data_tables.py
+++ b/src/calliope/preprocess/data_tables.py
@@ -51,7 +51,6 @@ class DataTable:
 
     def __init__(
         self,
-        model_config: dict,
         table_name: str,
         data_table: DataTableDict,
         data_table_dfs: dict[str, pd.DataFrame] | None = None,
@@ -60,7 +59,6 @@ def __init__(
         """Load and format a data table from file / in-memory object.
 
         Args:
-            model_config (dict): Model initialisation configuration dictionary.
             table_name (str): name of the data table.
             data_table (DataTableDict): Data table definition dictionary.
             data_table_dfs (dict[str, pd.DataFrame] | None, optional):
@@ -75,7 +73,6 @@ def __init__(
         self.input = data_table
         self.dfs = data_table_dfs if data_table_dfs is not None else dict()
         self.model_definition_path = model_definition_path
-        self.config = model_config
 
         self.columns = self._listify_if_defined("columns")
         self.index = self._listify_if_defined("rows")

From 1f96c4760200ef9ba635898569a6c88a0e4a549a Mon Sep 17 00:00:00 2001
From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com>
Date: Wed, 6 Nov 2024 23:40:50 +0000
Subject: [PATCH 02/12] Update config to have operate and spores as subdicts;
 fix use of config throughout src

---
 docs/hooks/generate_readable_schema.py        |   3 +-
 src/calliope/backend/__init__.py              |  14 +-
 src/calliope/backend/backend_model.py         |  33 +--
 src/calliope/backend/gurobi_backend_model.py  |  11 +-
 src/calliope/backend/parsing.py               |   1 +
 src/calliope/backend/pyomo_backend_model.py   |  11 +-
 src/calliope/backend/where_parser.py          |   6 +-
 src/calliope/cli.py                           |  11 +-
 src/calliope/config.py                        | 207 +++++++++++-------
 .../national_scale/scenarios.yaml             |   5 +-
 .../example_models/urban_scale/scenarios.yaml |   5 +-
 src/calliope/model.py                         | 133 +++++------
 src/calliope/postprocess/postprocess.py       |   4 +-
 src/calliope/preprocess/data_tables.py        |   4 +-
 src/calliope/preprocess/model_data.py         |  21 +-
 src/calliope/preprocess/scenarios.py          |   7 -
 src/calliope/util/schema.py                   |  14 --
 src/calliope/util/tools.py                    |   4 +-
 tests/common/util.py                          |   4 +-
 tests/conftest.py                             |   5 +-
 tests/test_core_model.py                      |  43 +---
 tests/test_preprocess_model_data.py           |  11 +-
 22 files changed, 268 insertions(+), 289 deletions(-)

diff --git a/docs/hooks/generate_readable_schema.py b/docs/hooks/generate_readable_schema.py
index 89ae232e..8b799265 100644
--- a/docs/hooks/generate_readable_schema.py
+++ b/docs/hooks/generate_readable_schema.py
@@ -14,12 +14,13 @@
 import jsonschema2md
 from mkdocs.structure.files import File
 
+from calliope import AttrDict, config
 from calliope.util import schema
 
 TEMPDIR = tempfile.TemporaryDirectory()
 
 SCHEMAS = {
-    "config_schema": schema.CONFIG_SCHEMA,
+    "config_schema": AttrDict.from_yaml(config.CalliopeConfig().model_yaml_schema()),
     "model_schema": schema.MODEL_SCHEMA,
     "math_schema": schema.MATH_SCHEMA,
     "data_table_schema": schema.DATA_TABLE_SCHEMA,
diff --git a/src/calliope/backend/__init__.py b/src/calliope/backend/__init__.py
index d37395d8..84929792 100644
--- a/src/calliope/backend/__init__.py
+++ b/src/calliope/backend/__init__.py
@@ -15,19 +15,19 @@
 from calliope.preprocess import CalliopeMath
 
 if TYPE_CHECKING:
+    from calliope import config
     from calliope.backend.backend_model import BackendModel
 
 
 def get_model_backend(
-    name: str, data: xr.Dataset, math: CalliopeMath, **kwargs
+    build_config: "config.Build", data: xr.Dataset, math: CalliopeMath
 ) -> "BackendModel":
     """Assign a backend using the given configuration.
 
     Args:
-        name (str): name of the backend to use.
+        build_config: Build configuration options.
         data (Dataset): model data for the backend.
         math (CalliopeMath): Calliope math.
-        **kwargs: backend keyword arguments corresponding to model.config.build.
 
     Raises:
         exceptions.BackendError: If invalid backend was requested.
@@ -35,10 +35,10 @@ def get_model_backend(
     Returns:
         BackendModel: Initialized backend object.
     """
-    match name:
+    match build_config.backend:
         case "pyomo":
-            return PyomoBackendModel(data, math, **kwargs)
+            return PyomoBackendModel(data, math, build_config)
         case "gurobi":
-            return GurobiBackendModel(data, math, **kwargs)
+            return GurobiBackendModel(data, math, build_config)
         case _:
-            raise BackendError(f"Incorrect backend '{name}' requested.")
+            raise BackendError(f"Incorrect backend '{build_config.backend}' requested.")
diff --git a/src/calliope/backend/backend_model.py b/src/calliope/backend/backend_model.py
index c52d74ab..21603864 100644
--- a/src/calliope/backend/backend_model.py
+++ b/src/calliope/backend/backend_model.py
@@ -26,17 +26,13 @@
 import numpy as np
 import xarray as xr
 
-from calliope import exceptions
+from calliope import config, exceptions
 from calliope.attrdict import AttrDict
 from calliope.backend import helper_functions, parsing
 from calliope.exceptions import warn as model_warn
 from calliope.io import load_config
 from calliope.preprocess.model_math import ORDERED_COMPONENTS_T, CalliopeMath
-from calliope.util.schema import (
-    MODEL_SCHEMA,
-    extract_from_schema,
-    update_then_validate_config,
-)
+from calliope.util.schema import MODEL_SCHEMA, extract_from_schema
 
 if TYPE_CHECKING:
     from calliope.backend.parsing import T as Tp
@@ -69,20 +65,20 @@ class BackendModelGenerator(ABC):
     _PARAM_UNITS = extract_from_schema(MODEL_SCHEMA, "x-unit")
     _PARAM_TYPE = extract_from_schema(MODEL_SCHEMA, "x-type")
 
-    def __init__(self, inputs: xr.Dataset, math: CalliopeMath, **kwargs):
+    def __init__(
+        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config.Build
+    ):
         """Abstract base class to build a representation of the optimisation problem.
 
         Args:
             inputs (xr.Dataset): Calliope model data.
             math (CalliopeMath): Calliope math.
-            **kwargs (Any): build configuration overrides.
+            build_config: Build configuration options.
         """
         self._dataset = xr.Dataset()
         self.inputs = inputs.copy()
         self.inputs.attrs = deepcopy(inputs.attrs)
-        self.inputs.attrs["config"]["build"] = update_then_validate_config(
-            "build", self.inputs.attrs["config"], **kwargs
-        )
+        self.config = build_config
         self.math: CalliopeMath = deepcopy(math)
         self._solve_logger = logging.getLogger(__name__ + ".<solve>")
 
@@ -200,6 +196,7 @@ def _check_inputs(self):
             "equation_name": "",
             "backend_interface": self,
             "input_data": self.inputs,
+            "build_config": self.config,
             "helper_functions": helper_functions._registry["where"],
             "apply_where": True,
             "references": set(),
@@ -246,7 +243,7 @@ def add_optimisation_components(self) -> None:
         # The order of adding components matters!
         # 1. Variables, 2. Global Expressions, 3. Constraints, 4. Objectives
         self._add_all_inputs_as_parameters()
-        if self.inputs.attrs["config"]["build"]["pre_validate_math_strings"]:
+        if self.config.pre_validate_math_strings:
             self._validate_math_string_parsing()
         for components in typing.get_args(ORDERED_COMPONENTS_T):
             component = components.removesuffix("s")
@@ -399,7 +396,7 @@ def _add_all_inputs_as_parameters(self) -> None:
             if param_name in self.parameters.keys():
                 continue
             elif (
-                self.inputs.attrs["config"]["build"]["mode"] != "operate"
+                self.config.mode != "operate"
                 and param_name
                 in extract_from_schema(MODEL_SCHEMA, "x-operate-param").keys()
             ):
@@ -606,7 +603,11 @@ class BackendModel(BackendModelGenerator, Generic[T]):
     """Calliope's backend model functionality."""
 
     def __init__(
-        self, inputs: xr.Dataset, math: CalliopeMath, instance: T, **kwargs
+        self,
+        inputs: xr.Dataset,
+        math: CalliopeMath,
+        instance: T,
+        build_config: config.Build,
     ) -> None:
         """Abstract base class to build backend models that interface with solvers.
 
@@ -614,9 +615,9 @@ def __init__(
             inputs (xr.Dataset): Calliope model data.
             math (CalliopeMath): Calliope math.
             instance (T): Interface model instance.
-            **kwargs: build configuration overrides.
+            build_config: Build configuration options.
         """
-        super().__init__(inputs, math, **kwargs)
+        super().__init__(inputs, math, build_config)
         self._instance = instance
         self.shadow_prices: ShadowPrices
         self._has_verbose_strings: bool = False
diff --git a/src/calliope/backend/gurobi_backend_model.py b/src/calliope/backend/gurobi_backend_model.py
index 2d2e0a48..ab02d9d4 100644
--- a/src/calliope/backend/gurobi_backend_model.py
+++ b/src/calliope/backend/gurobi_backend_model.py
@@ -14,6 +14,7 @@
 import pandas as pd
 import xarray as xr
 
+from calliope import config
 from calliope.backend import backend_model, parsing
 from calliope.exceptions import BackendError, BackendWarning
 from calliope.exceptions import warn as model_warn
@@ -41,19 +42,21 @@
 class GurobiBackendModel(backend_model.BackendModel):
     """gurobipy-specific backend functionality."""
 
-    def __init__(self, inputs: xr.Dataset, math: CalliopeMath, **kwargs) -> None:
+    def __init__(
+        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config.Build
+    ) -> None:
         """Gurobi solver interface class.
 
         Args:
             inputs (xr.Dataset): Calliope model data.
             math (CalliopeMath): Calliope math.
-            **kwargs: passed directly to the solver.
+            build_config: Build configuration options.
         """
         if importlib.util.find_spec("gurobipy") is None:
             raise ImportError(
                 "Install the `gurobipy` package to build the optimisation problem with the Gurobi backend."
             )
-        super().__init__(inputs, math, gurobipy.Model(), **kwargs)
+        super().__init__(inputs, math, gurobipy.Model(), build_config)
         self._instance: gurobipy.Model
         self.shadow_prices = GurobiShadowPrices(self)
 
@@ -144,7 +147,7 @@ def _objective_setter(
         ) -> xr.DataArray:
             expr = element.evaluate_expression(self, references=references)
 
-            if name == self.inputs.attrs["config"].build.objective:
+            if name == self.config.objective:
                 self._instance.setObjective(expr.item(), sense=sense)
 
                 self.log("objectives", name, "Objective activated.")
diff --git a/src/calliope/backend/parsing.py b/src/calliope/backend/parsing.py
index 33c9ea47..5cdd0808 100644
--- a/src/calliope/backend/parsing.py
+++ b/src/calliope/backend/parsing.py
@@ -311,6 +311,7 @@ def evaluate_where(
                 helper_functions=helper_functions._registry["where"],
                 input_data=backend_interface.inputs,
                 backend_interface=backend_interface,
+                build_config=backend_interface.config,
                 references=references if references is not None else set(),
                 apply_where=True,
             )
diff --git a/src/calliope/backend/pyomo_backend_model.py b/src/calliope/backend/pyomo_backend_model.py
index 5ba41ba0..46ea3b32 100644
--- a/src/calliope/backend/pyomo_backend_model.py
+++ b/src/calliope/backend/pyomo_backend_model.py
@@ -26,6 +26,7 @@
 from pyomo.opt import SolverFactory  # type: ignore
 from pyomo.util.model_size import build_model_size_report  # type: ignore
 
+from calliope import config
 from calliope.exceptions import BackendError, BackendWarning
 from calliope.exceptions import warn as model_warn
 from calliope.preprocess import CalliopeMath
@@ -58,15 +59,17 @@
 class PyomoBackendModel(backend_model.BackendModel):
     """Pyomo-specific backend functionality."""
 
-    def __init__(self, inputs: xr.Dataset, math: CalliopeMath, **kwargs) -> None:
+    def __init__(
+        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config.Build
+    ) -> None:
         """Pyomo solver interface class.
 
         Args:
             inputs (xr.Dataset): Calliope model data.
             math (CalliopeMath): Calliope math.
-            **kwargs: passed directly to the solver.
+            build_config: Build configuration options.
         """
-        super().__init__(inputs, math, pmo.block(), **kwargs)
+        super().__init__(inputs, math, pmo.block(), build_config)
 
         self._instance.parameters = pmo.parameter_dict()
         self._instance.variables = pmo.variable_dict()
@@ -185,7 +188,7 @@ def _objective_setter(
         ) -> xr.DataArray:
             expr = element.evaluate_expression(self, references=references)
             objective = pmo.objective(expr.item(), sense=sense)
-            if name == self.inputs.attrs["config"].build.objective:
+            if name == self.config.objective:
                 text = "activated"
                 objective.activate()
             else:
diff --git a/src/calliope/backend/where_parser.py b/src/calliope/backend/where_parser.py
index f434a9bf..06f782f6 100644
--- a/src/calliope/backend/where_parser.py
+++ b/src/calliope/backend/where_parser.py
@@ -17,6 +17,7 @@
 from calliope.exceptions import BackendError
 
 if TYPE_CHECKING:
+    from calliope import config
     from calliope.backend.backend_model import BackendModel
 
 
@@ -34,6 +35,7 @@ class EvalAttrs(TypedDict):
     helper_functions: dict[str, Callable]
     apply_where: NotRequired[bool]
     references: NotRequired[set]
+    build_config: config.Build
 
 
 class EvalWhere(expression_parser.EvalToArrayStr):
@@ -118,9 +120,7 @@ def as_math_string(self) -> str:  # noqa: D102, override
         return rf"\text{{config.{self.config_option}}}"
 
     def as_array(self) -> xr.DataArray:  # noqa: D102, override
-        config_val = (
-            self.eval_attrs["input_data"].attrs["config"].build[self.config_option]
-        )
+        config_val = getattr(self.eval_attrs["build_config"], self.config_option)
 
         if not isinstance(config_val, int | float | str | bool | np.bool_):
             raise BackendError(
diff --git a/src/calliope/cli.py b/src/calliope/cli.py
index a9d811d2..4059de7e 100644
--- a/src/calliope/cli.py
+++ b/src/calliope/cli.py
@@ -278,9 +278,9 @@ def run(
         # Else run the model, then save outputs
         else:
             click.secho("Starting model run...")
-
+            kwargs = {}
             if save_logs:
-                model.config.set_key("solve.save_logs", save_logs)
+                kwargs["solve.save_logs"] = save_logs
 
             if save_csv is None and save_netcdf is None:
                 click.secho(
@@ -292,14 +292,13 @@ def run(
             # If save_netcdf is used, override the 'save_per_spore_path' to point to a
             # directory of the same name as the planned netcdf
 
-            if save_netcdf and model.config.solve.spores_save_per_spore:
-                model.config.set_key(
-                    "solve.spores_save_per_spore_path",
+            if save_netcdf and model.config.solve.spores.save_per_spore:
+                kwargs["solve.spores_save_per_spore_path"] = (
                     save_netcdf.replace(".nc", "/spore_{}.nc"),
                 )
 
             model.build()
-            model.solve()
+            model.solve(**kwargs)
             termination = model._model_data.attrs.get(
                 "termination_condition", "unknown"
             )
diff --git a/src/calliope/config.py b/src/calliope/config.py
index 75a9bf79..79ae5941 100644
--- a/src/calliope/config.py
+++ b/src/calliope/config.py
@@ -5,8 +5,9 @@
 from collections.abc import Hashable
 from datetime import datetime
 from pathlib import Path
-from typing import Annotated, Literal, Self, TypeVar, overload
+from typing import Annotated, Literal, Self, TypeVar, get_args, overload
 
+import jsonref
 from pydantic import AfterValidator, BaseModel, Field, model_validator
 from pydantic_core import PydanticCustomError
 
@@ -67,7 +68,16 @@ def update(self, update_dict: dict, deep: bool = False) -> Self:
         Returns:
             BaseModel: New model instance.
         """
-        updated = super().model_copy(update=update_dict, deep=deep)
+        new_dict: dict = {}
+        # Iterate through dict to be updated and convert any sub-dicts into their respective pydantic model objects
+        for key, val in update_dict.items():
+            key_class = getattr(self, key)
+            if isinstance(key_class, ConfigBaseModel):
+                new_dict[key] = key_class.update(val)
+                key_class._kwargs = val
+            else:
+                new_dict[key] = val
+        updated = super().model_copy(update=new_dict, deep=deep)
         updated.model_validate(updated)
         self._kwargs = update_dict
         return updated
@@ -87,20 +97,31 @@ def model_yaml_schema(self, filepath: str | Path | None = None) -> None | str:
         Returns:
             None | str: If `filepath` is given, returns None. Otherwise, returns the YAML string.
         """
-        return AttrDict(self.model_json_schema()).to_yaml(filepath)
+        schema_dict = jsonref.replace_refs(self.model_json_schema())
+        return AttrDict(schema_dict).to_yaml(filepath)
 
+    @property
+    def applied_keyword_overrides(self) -> dict:
+        """Most recently applied keyword overrides used to update this configuration.
 
-class ModeBaseModel(BaseModel):
+        Returns:
+            dict: Description of applied overrides.
+        """
+        return self._kwargs
+
+
+class ModeBaseModel(ConfigBaseModel):
     """Mode-specific configuration, which will be hidden from the string representation of the model if that mode is not activated."""
 
-    _mode: str
+    mode: MODES_T = Field(default="plan")
+    """Mode in which to run the optimisation."""
 
     @model_validator(mode="after")
     def update_repr(self) -> Self:
         """Hide config from model string representation if mode is not activated."""
         for key, val in self.model_fields.items():
-            if key.startswith(self._mode):
-                val.repr = self.mode == self._mode
+            if key in get_args(MODES_T):
+                val.repr = self.mode == key
         return self
 
 
@@ -108,6 +129,7 @@ class Init(ConfigBaseModel):
     """All configuration options used when initialising a Calliope model."""
 
     model_config = {
+        "title": "init",
         "extra": "forbid",
         "frozen": True,
         "json_schema_extra": hide_from_schema(["def_path"]),
@@ -116,6 +138,8 @@ class Init(ConfigBaseModel):
     }
 
     def_path: Path = Field(default=".", repr=False, exclude=True)
+    """The path to the main model definition YAML file, if one has been used to instantiate the Calliope Model class."""
+
     name: str | None = Field(default=None)
     """Model name"""
 
@@ -155,17 +179,52 @@ class Init(ConfigBaseModel):
     @classmethod
     def abs_path(cls, data):
         """Add model definition path."""
-        if "time_cluster" in data:
+        if data.get("time_cluster", None) is not None:
             data["time_cluster"] = tools.relative_path(
                 data["def_path"], data["time_cluster"]
             )
         return data
 
 
-class BuildBase(BaseModel):
+class BuildOperate(ConfigBaseModel):
+    """Operate mode configuration options used when building a Calliope optimisation problem (`calliope.Model.build`)."""
+
+    model_config = {
+        "title": "operate",
+        "extra": "forbid",
+        "json_schema_extra": hide_from_schema(["start_window_idx"]),
+        "revalidate_instances": "always",
+        "use_attribute_docstrings": True,
+    }
+
+    window: str = Field(default="24h")
+    """
+    Operate mode rolling `window`, given as a pandas frequency string.
+    See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
+    """
+
+    horizon: str = Field(default="48h")
+    """
+    Operate mode rolling `horizon`, given as a pandas frequency string.
+    See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
+    Must be ≥ `window`
+    """
+
+    use_cap_results: bool = Field(default=False)
+    """If the model already contains `plan` mode results, use those optimal capacities as input parameters to the `operate` mode run."""
+
+    start_window_idx: int = Field(default=0, repr=False, exclude=True)
+    """Which time window to build. This is used to track the window when re-building the model part way through solving in `operate` mode."""
+
+
+class Build(ModeBaseModel):
     """Base configuration options used when building a Calliope optimisation problem (`calliope.Model.build`)."""
 
-    model_config = {"extra": "allow", "revalidate_instances": "always"}
+    model_config = {
+        "title": "build",
+        "extra": "allow",
+        "revalidate_instances": "always",
+    }
     add_math: UniqueList[str] = Field(default=[])
     """
     List of references to files which contain additional mathematical formulations to be applied on top of or instead of the base mode math.
@@ -189,9 +248,6 @@ class BuildBase(BaseModel):
     This should only be used as a debugging option (as any unmet demand/unused supply is a sign of improper model formulation).
     """
 
-    mode: MODES_T = Field(default="plan")
-    """Mode in which to run the optimisation."""
-
     objective: str = Field(default="min_cost_optimisation")
     """Name of internal objective function to use, from those defined in the pre-defined math and any applied additional math."""
 
@@ -201,48 +257,56 @@ class BuildBase(BaseModel):
     You can switch this off (e.g., if you know there are no parsing errors) to reduce overall build time.
     """
 
+    operate: BuildOperate = BuildOperate()
 
-class BuildOperate(ModeBaseModel):
-    """Operate mode configuration options used when building a Calliope optimisation problem (`calliope.Model.build`)."""
 
-    _mode = "operate"
+class SolveSpores(ConfigBaseModel):
+    """SPORES configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
+
+    number: int = Field(default=3)
+    """SPORES mode number of iterations after the initial base run."""
 
-    operate_window: str = Field(default=None)
-    """
-    Operate mode rolling `window`, given as a pandas frequency string.
-    See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
-    """
+    score_cost_class: str = Field(default="score")
+    """SPORES mode cost class to vary between iterations after the initial base run."""
+
+    slack_cost_group: str = Field(default=None)
+    """SPORES mode cost class to keep below the given `slack` (usually "monetary")."""
 
-    operate_horizon: str = Field(default=None)
+    save_per_spore: bool = Field(default=False)
     """
-    Operate mode rolling `horizon`, given as a pandas frequency string.
-    See [here](https://pandas.pydata.org/docs/user_guide/timeseries.html#offset-aliases) for a list of frequency aliases.
-    Must be ≥ `operate_window`
+    Whether or not to save the result of each SPORES mode run between iterations.
+    If False, will consolidate all iterations into one dataset after completion of N iterations (defined by `number`) and save that one dataset.
     """
 
-    operate_use_cap_results: bool = Field(default=False)
-    """If the model already contains `plan` mode results, use those optimal capacities as input parameters to the `operate` mode run."""
-
+    save_per_spore_path: Path | None = Field(default=None)
+    """If saving per spore, the path to save to."""
 
-class Build(ConfigBaseModel, BuildOperate, BuildBase):
-    """All configuration options used when building a Calliope optimisation problem (`calliope.Model.build`).
+    skip_cost_op: bool = Field(default=False)
+    """If the model already contains `plan` mode results, use those as the initial base run results and start with SPORES iterations immediately."""
 
-    Additional configuration items will be passed onto math string parsing and can therefore be accessed in the `where` strings by `config.[item-name]`,
-    where "[item-name]" is the name of your own configuration item.
-    """
+    @model_validator(mode="after")
+    def require_save_per_spore_path(self) -> Self:
+        """Ensure that path is given if saving per spore."""
+        if self.save_per_spore:
+            if self.save_per_spore_path is None:
+                raise ValueError(
+                    "Must define `save_per_spore_path` if you want to save each SPORES result separately."
+                )
+            elif not self.save_per_spore_path.is_dir():
+                raise ValueError("`save_per_spore_path` must be a directory.")
+        return self
 
 
-class SolveBase(BaseModel):
+class Solve(ModeBaseModel):
     """Base configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
 
     model_config = {
+        "title": "solve",
         "extra": "forbid",
         "revalidate_instances": "always",
         "json_schema_extra": hide_from_schema(["mode"]),
     }
 
-    mode: Literal["plan", "spores", "operate"] = Field(default="plan", repr=False)
-
     save_logs: Path | None = Field(default=None)
     """If given, should be a path to a directory in which to save optimisation logs."""
 
@@ -264,55 +328,38 @@ class SolveBase(BaseModel):
     shadow_prices: UniqueList[str] = Field(default=[])
     """Names of model constraints."""
 
+    spores: SolveSpores = SolveSpores()
 
-class SolveSpores(ModeBaseModel):
-    """SPORES configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
-
-    _mode = "spores"
 
-    mode: MODES_T = Field(default=None)
-
-    spores_number: int = Field(default=3)
-    """SPORES mode number of iterations after the initial base run."""
-
-    spores_score_cost_class: str = Field(default="spores_score")
-    """SPORES mode cost class to vary between iterations after the initial base run."""
-
-    spores_slack_cost_group: str = Field(default=None)
-    """SPORES mode cost class to keep below the given `slack` (usually "monetary")."""
-
-    spores_save_per_spore: bool = Field(default=False)
-    """
-    Whether or not to save the result of each SPORES mode run between iterations.
-    If False, will consolidate all iterations into one dataset after completion of N iterations (defined by `spores_number`) and save that one dataset.
-    """
-
-    spores_save_per_spore_path: Path | None = Field(default=None)
-    """If saving per spore, the path to save to."""
-
-    spores_skip_cost_op: bool = Field(default=False)
-    """If the model already contains `plan` mode results, use those as the initial base run results and start with SPORES iterations immediately."""
+class CalliopeConfig(ConfigBaseModel):
+    """Calliope configuration class."""
 
-    @model_validator(mode="after")
-    def save_per_spore_path(self) -> Self:
-        """Ensure that path is given if saving per spore."""
-        if self.spores_save_per_spore:
-            if self.spores_save_per_spore_path is None:
-                raise ValueError(
-                    "Must define `spores_save_per_spore_path` if you want to save each SPORES result separately."
-                )
-            elif not self.spores_save_per_spore_path.is_dir():
-                raise ValueError("`spores_save_per_spore_path` must be a directory.")
-        return self
+    model_config = {"title": "config"}
+    init: Init = Init()
+    build: Build = Build()
+    solve: Solve = Solve()
 
+    @model_validator(mode="before")
+    @classmethod
+    def update_solve_mode(cls, data):
+        """Solve mode should match build mode."""
+        data["solve"]["mode"] = data["build"]["mode"]
+        return data
 
-class Solve(ConfigBaseModel, SolveSpores, SolveBase):
-    """All configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
+    def update(self, update_dict: dict, deep: bool = False) -> Self:
+        """Return a new iteration of the model with updated fields.
 
+        Updates are validated and stored in the parent class in the `_kwargs` key.
 
-class CalliopeConfig(ConfigBaseModel):
-    """Calliope configuration class."""
+        Args:
+            update_dict (dict): Dictionary with which to update the base model.
+            deep (bool, optional): Set to True to make a deep copy of the model. Defaults to False.
 
-    init: Init
-    build: Build
-    solve: Solve
+        Returns:
+            BaseModel: New model instance.
+        """
+        update_dict_temp = AttrDict(update_dict)
+        if update_dict_temp.get_key("build.mode", None) is not None:
+            update_dict_temp.set_key("solve.mode", update_dict_temp["build"]["mode"])
+        updated = super().update(update_dict_temp.as_dict(), deep=deep)
+        return updated
diff --git a/src/calliope/example_models/national_scale/scenarios.yaml b/src/calliope/example_models/national_scale/scenarios.yaml
index 58a3dc81..0e34f8f9 100644
--- a/src/calliope/example_models/national_scale/scenarios.yaml
+++ b/src/calliope/example_models/national_scale/scenarios.yaml
@@ -70,8 +70,9 @@ overrides:
       init.time_subset: ["2005-01-01", "2005-01-10"]
       build:
         mode: operate
-        operate_window: 12h
-        operate_horizon: 24h
+        operate:
+          window: 12h
+          horizon: 24h
     nodes:
       region1.techs.ccgt.flow_cap: 30000
 
diff --git a/src/calliope/example_models/urban_scale/scenarios.yaml b/src/calliope/example_models/urban_scale/scenarios.yaml
index 12d114cb..d754496d 100644
--- a/src/calliope/example_models/urban_scale/scenarios.yaml
+++ b/src/calliope/example_models/urban_scale/scenarios.yaml
@@ -51,8 +51,9 @@ overrides:
       init.time_subset: ["2005-07-01", "2005-07-10"]
       build:
         mode: operate
-        operate_window: 2h
-        operate_horizon: 48h
+        operate:
+          window: 2h
+          horizon: 48h
 
     nodes:
       X1:
diff --git a/src/calliope/model.py b/src/calliope/model.py
index 811c9676..e6088c21 100644
--- a/src/calliope/model.py
+++ b/src/calliope/model.py
@@ -22,10 +22,9 @@
     CONFIG_SCHEMA,
     MODEL_SCHEMA,
     extract_from_schema,
-    update_then_validate_config,
     validate_dict,
 )
-from calliope.util.tools import climb_template_tree, relative_path
+from calliope.util.tools import climb_template_tree
 
 if TYPE_CHECKING:
     from calliope.backend.backend_model import BackendModel
@@ -43,7 +42,7 @@ class Model:
     """A Calliope Model."""
 
     _TS_OFFSET = pd.Timedelta(1, unit="nanoseconds")
-    ATTRS_SAVED = ("_def_path", "applied_math", "config")
+    ATTRS_SAVED = ("applied_math", "config")
 
     def __init__(
         self,
@@ -77,7 +76,6 @@ def __init__(
         self.config: config.CalliopeConfig
         self.defaults: AttrDict
         self.applied_math: preprocess.CalliopeMath
-        self._def_path: str | None = None
         self.backend: BackendModel
         self._is_built: bool = False
         self._is_solved: bool = False
@@ -88,20 +86,24 @@ def __init__(
             LOGGER, self._timings, "model_creation", comment="Model: initialising"
         )
         if isinstance(model_definition, xr.Dataset):
+            if kwargs:
+                raise exceptions.ModelError(
+                    "Cannot apply initialisation configuration overrides when loading data from an xarray Dataset."
+                )
             self._init_from_model_data(model_definition)
         else:
             if isinstance(model_definition, dict):
                 model_def_dict = AttrDict(model_definition)
             else:
-                self._def_path = str(model_definition)
+                kwargs["def_path"] = str(model_definition)
                 model_def_dict = AttrDict.from_yaml(model_definition)
 
             (model_def, applied_overrides) = preprocess.load_scenario_overrides(
-                model_def_dict, scenario, override_dict, **kwargs
+                model_def_dict, scenario, override_dict
             )
 
             self._init_from_model_def_dict(
-                model_def, applied_overrides, scenario, data_table_dfs
+                model_def, applied_overrides, scenario, data_table_dfs, **kwargs
             )
 
         self._model_data.attrs["timestamp_model_creation"] = timestamp_model_creation
@@ -144,6 +146,7 @@ def _init_from_model_def_dict(
         applied_overrides: str,
         scenario: str | None,
         data_table_dfs: dict[str, pd.DataFrame] | None = None,
+        **kwargs,
     ) -> None:
         """Initialise the model using pre-processed YAML files and optional dataframes/dicts.
 
@@ -152,6 +155,7 @@ def _init_from_model_def_dict(
             applied_overrides (str): overrides specified by users
             scenario (str | None): scenario specified by users
             data_table_dfs (dict[str, pd.DataFrame] | None, optional): files with additional model information. Defaults to None.
+            **kwargs: Initialisation configuration overrides.
         """
         # First pass to check top-level keys are all good
         validate_dict(model_definition, CONFIG_SCHEMA, "Model definition")
@@ -163,21 +167,12 @@ def _init_from_model_def_dict(
             comment="Model: preprocessing stage 1 (model_run)",
         )
 
-        model_config = config.CalliopeConfig(model_definition.pop("config"))
-
-        if model_config.init.data["time_cluster"] is not None:
-            model_config.init.update(
-                {
-                    "time_cluster": relative_path(
-                        self._def_path, model_config.init.data["time_cluster"]
-                    )
-                }
-            )
-        model_config.init.validate()
+        model_config = config.CalliopeConfig(**model_definition.pop("config"))
+        init_config = model_config.update({"init": kwargs}).init
 
         param_metadata = {"default": extract_from_schema(MODEL_SCHEMA, "default")}
         attributes = {
-            "calliope_version_defined": model_config.init.data["calliope_version"],
+            "calliope_version_defined": init_config.calliope_version,
             "calliope_version_initialised": calliope.__version__,
             "applied_overrides": applied_overrides,
             "scenario": scenario,
@@ -188,15 +183,10 @@ def _init_from_model_def_dict(
         for table_name, table_dict in model_definition.pop("data_tables", {}).items():
             table_dict, _ = climb_template_tree(table_dict, templates, table_name)
             data_tables.append(
-                DataTable(table_name, table_dict, data_table_dfs, self._def_path)
+                DataTable(table_name, table_dict, data_table_dfs, init_config.def_path)
             )
-
         model_data_factory = ModelDataFactory(
-            model_config.init.data,
-            model_definition,
-            data_tables,
-            attributes,
-            param_metadata,
+            init_config, model_definition, data_tables, attributes, param_metadata
         )
         model_data_factory.build()
 
@@ -209,8 +199,10 @@ def _init_from_model_def_dict(
             comment="Model: preprocessing stage 2 (model_data)",
         )
 
-        self._model_data.attrs["name"] = model_config.init.data["name"]
+        self._model_data.attrs["name"] = init_config.name
 
+        # Unlike at the build and solve phases, we store the init config overrides in the main model config.
+        model_config.init = init_config
         self.config = model_config
 
         log_time(
@@ -229,14 +221,13 @@ def _init_from_model_data(self, model_data: xr.Dataset) -> None:
             model_data (xr.Dataset):
                 Model dataset with input parameters as arrays and configuration stored in the dataset attributes dictionary.
         """
-        if "_def_path" in model_data.attrs:
-            self._def_path = model_data.attrs.pop("_def_path")
         if "applied_math" in model_data.attrs:
             self.applied_math = preprocess.CalliopeMath.from_dict(
                 model_data.attrs.pop("applied_math")
             )
         if "config" in model_data.attrs:
-            self.config = config.CalliopeConfig(model_data.attrs.pop("config"))
+            self.config = config.CalliopeConfig(**model_data.attrs.pop("config"))
+            self.config.update(model_data.attrs.pop("config_kwarg_overrides"))
 
         self._model_data = model_data
 
@@ -276,35 +267,26 @@ def build(
             comment="Model: backend build starting",
         )
 
-        self.config.build.data_temp = kwargs
-        latest_build_config = self.config.build.data
-        mode = self.config.build.mode
+        this_build_config = self.config.update({"build": kwargs}).build
+        mode = this_build_config.mode
         if mode == "operate":
             if not self._model_data.attrs["allow_operate_mode"]:
                 raise exceptions.ModelError(
                     "Unable to run this model in operate (i.e. dispatch) mode, probably because "
                     "there exist non-uniform timesteps (e.g. from time clustering)"
                 )
-            start_window_idx = self.config.build.data.pop("start_window_idx", 0)
-            backend_input = self._prepare_operate_mode_inputs(
-                start_window_idx, **self.config.build.data
-            )
+            backend_input = self._prepare_operate_mode_inputs(this_build_config.operate)
         else:
             backend_input = self._model_data
 
-        init_math_list = [] if self.config.build.data["ignore_mode_math"] else [mode]
+        init_math_list = [] if this_build_config.ignore_mode_math else [mode]
         end_math_list = [] if add_math_dict is None else [add_math_dict]
-        full_math_list = (
-            init_math_list + self.config.build.data["add_math"] + end_math_list
-        )
+        full_math_list = init_math_list + this_build_config.add_math + end_math_list
         LOGGER.debug(f"Math preprocessing | Loading math: {full_math_list}")
-        model_math = preprocess.CalliopeMath(full_math_list, self._def_path)
+        model_math = preprocess.CalliopeMath(full_math_list, self.config.init.def_path)
 
         self.backend = backend.get_model_backend(
-            latest_build_config["backend"],
-            backend_input,
-            model_math,
-            **latest_build_config,
+            this_build_config, backend_input, model_math
         )
         self.backend.add_optimisation_components()
 
@@ -359,23 +341,27 @@ def solve(self, force: bool = False, warmstart: bool = False, **kwargs) -> None:
         else:
             to_drop = []
 
-        run_mode = self.config.build.data["mode"]
+        kwargs["mode"] = self.config.build.applied_keyword_overrides.get(
+            "mode", self.config.build.mode
+        )
+
+        this_solve_config = self.config.update({"solve": kwargs}).solve
         self._model_data.attrs["timestamp_solve_start"] = log_time(
             LOGGER,
             self._timings,
             "solve_start",
-            comment=f"Optimisation model | starting model in {run_mode} mode.",
+            comment=f"Optimisation model | starting model in {this_solve_config.mode} mode.",
         )
 
-        solver_config = update_then_validate_config("solve", self.config, **kwargs)
-
-        shadow_prices = solver_config.get("shadow_prices", [])
+        shadow_prices = this_solve_config.shadow_prices
         self.backend.shadow_prices.track_constraints(shadow_prices)
 
-        if run_mode == "operate":
-            results = self._solve_operate(**solver_config)
+        if this_solve_config.mode == "operate":
+            results = self._solve_operate(**this_solve_config.model_dump())
         else:
-            results = self.backend._solve(warmstart=warmstart, **solver_config)
+            results = self.backend._solve(
+                warmstart=warmstart, **this_solve_config.model_dump()
+            )
 
         log_time(
             LOGGER,
@@ -388,7 +374,7 @@ def solve(self, force: bool = False, warmstart: bool = False, **kwargs) -> None:
         # Add additional post-processed result variables to results
         if results.attrs["termination_condition"] in ["optimal", "feasible"]:
             results = postprocess_results.postprocess_model_results(
-                results, self._model_data
+                results, self._model_data, self.config.solve.zero_threshold
             )
 
         log_time(
@@ -405,7 +391,6 @@ def solve(self, force: bool = False, warmstart: bool = False, **kwargs) -> None:
         self._model_data = xr.merge(
             [results, self._model_data], compat="override", combine_attrs="no_conflicts"
         )
-        self._add_model_data_methods()
 
         self._model_data.attrs["timestamp_solve_complete"] = log_time(
             LOGGER,
@@ -440,6 +425,7 @@ def to_netcdf(self, path):
                 saved_attrs[attr] = dict(getattr(self, attr))
             else:
                 saved_attrs[attr] = getattr(self, attr)
+        saved_attrs["config_kwarg_overrides"] = self.config.applied_keyword_overrides
 
         io.save_netcdf(self._model_data, path, **saved_attrs)
 
@@ -478,28 +464,24 @@ def info(self) -> str:
         return "\n".join(info_strings)
 
     def _prepare_operate_mode_inputs(
-        self, start_window_idx: int = 0, **config_kwargs
+        self, operate_config: config.BuildOperate
     ) -> xr.Dataset:
         """Slice the input data to just the length of operate mode time horizon.
 
         Args:
-            start_window_idx (int, optional):
-                Set the operate `window` to start at, based on integer index.
-                This is used when re-initialising the backend model for shorter time horizons close to the end of the model period.
-                Defaults to 0.
-            **config_kwargs: kwargs related to operate mode configuration.
+            operate_config (config.BuildOperate): operate mode configuration options.
 
         Returns:
             xr.Dataset: Slice of input data.
         """
-        window = config_kwargs["operate_window"]
-        horizon = config_kwargs["operate_horizon"]
         self._model_data.coords["windowsteps"] = pd.date_range(
             self.inputs.timesteps[0].item(),
             self.inputs.timesteps[-1].item(),
-            freq=window,
+            freq=operate_config.window,
+        )
+        horizonsteps = self._model_data.coords["windowsteps"] + pd.Timedelta(
+            operate_config.horizon
         )
-        horizonsteps = self._model_data.coords["windowsteps"] + pd.Timedelta(horizon)
         # We require an offset because pandas / xarray slicing is _inclusive_ of both endpoints
         # where we only want it to be inclusive of the left endpoint.
         # Except in the last time horizon, where we want it to include the right endpoint.
@@ -509,11 +491,11 @@ def _prepare_operate_mode_inputs(
         self._model_data.coords["horizonsteps"] = clipped_horizonsteps - self._TS_OFFSET
         sliced_inputs = self._model_data.sel(
             timesteps=slice(
-                self._model_data.windowsteps[start_window_idx],
-                self._model_data.horizonsteps[start_window_idx],
+                self._model_data.windowsteps[operate_config.start_window_idx],
+                self._model_data.horizonsteps[operate_config.start_window_idx],
             )
         )
-        if config_kwargs.get("operate_use_cap_results", False):
+        if operate_config.use_cap_results:
             to_parameterise = extract_from_schema(MODEL_SCHEMA, "x-operate-param")
             if not self._is_solved:
                 raise exceptions.ModelError(
@@ -536,10 +518,7 @@ def _solve_operate(self, **solver_config) -> xr.Dataset:
         """
         if self.backend.inputs.timesteps[0] != self._model_data.timesteps[0]:
             LOGGER.info("Optimisation model | Resetting model to first time window.")
-            self.build(
-                force=True,
-                **{"mode": "operate", **self.backend.inputs.attrs["config"]["build"]},
-            )
+            self.build(force=True, **self.config.build.applied_keyword_overrides)
 
         LOGGER.info("Optimisation model | Running first time window.")
 
@@ -566,11 +545,9 @@ def _solve_operate(self, **solver_config) -> xr.Dataset:
                     "Optimisation model | Reaching the end of the timeseries. "
                     "Re-building model with shorter time horizon."
                 )
-                self.build(
-                    force=True,
-                    start_window_idx=idx + 1,
-                    **self.backend.inputs.attrs["config"]["build"],
-                )
+                build_kwargs = AttrDict(self.config.build.applied_keyword_overrides)
+                build_kwargs.set_key("operate.start_window_idx", idx + 1)
+                self.build(force=True, **build_kwargs)
             else:
                 self.backend._dataset.coords["timesteps"] = new_inputs.timesteps
                 self.backend.inputs.coords["timesteps"] = new_inputs.timesteps
diff --git a/src/calliope/postprocess/postprocess.py b/src/calliope/postprocess/postprocess.py
index 402b928e..327b1ce2 100644
--- a/src/calliope/postprocess/postprocess.py
+++ b/src/calliope/postprocess/postprocess.py
@@ -11,7 +11,7 @@
 
 
 def postprocess_model_results(
-    results: xr.Dataset, model_data: xr.Dataset
+    results: xr.Dataset, model_data: xr.Dataset, zero_threshold: float
 ) -> xr.Dataset:
     """Post-processing of model results.
 
@@ -22,11 +22,11 @@ def postprocess_model_results(
     Args:
         results (xarray.Dataset): Output from the solver backend.
         model_data (xarray.Dataset): Calliope model data.
+        zero_threshold (float): Numbers below this value will be assumed to be zero
 
     Returns:
         xarray.Dataset: input-results dataset.
     """
-    zero_threshold = model_data.config.solve.zero_threshold
     results["capacity_factor"] = capacity_factor(results, model_data)
     results["systemwide_capacity_factor"] = capacity_factor(
         results, model_data, systemwide=True
diff --git a/src/calliope/preprocess/data_tables.py b/src/calliope/preprocess/data_tables.py
index 83233e2b..a9e7acf2 100644
--- a/src/calliope/preprocess/data_tables.py
+++ b/src/calliope/preprocess/data_tables.py
@@ -54,7 +54,7 @@ def __init__(
         table_name: str,
         data_table: DataTableDict,
         data_table_dfs: dict[str, pd.DataFrame] | None = None,
-        model_definition_path: Path | None = None,
+        model_definition_path: Path = Path("."),
     ):
         """Load and format a data table from file / in-memory object.
 
@@ -64,7 +64,7 @@ def __init__(
             data_table_dfs (dict[str, pd.DataFrame] | None, optional):
                 If given, a dictionary mapping table names in `data_table` to in-memory pandas DataFrames.
                 Defaults to None.
-            model_definition_path (Path | None, optional):
+            model_definition_path (Path, optional):
                 If given, the path to the model definition YAML file, relative to which data table filepaths will be set.
                 If None, relative data table filepaths will be considered relative to the current working directory.
                 Defaults to None.
diff --git a/src/calliope/preprocess/model_data.py b/src/calliope/preprocess/model_data.py
index 7c6d6cc3..89b21386 100644
--- a/src/calliope/preprocess/model_data.py
+++ b/src/calliope/preprocess/model_data.py
@@ -15,6 +15,7 @@
 
 from calliope import exceptions
 from calliope.attrdict import AttrDict
+from calliope.config import Init
 from calliope.preprocess import data_tables, time
 from calliope.util.schema import MODEL_SCHEMA, validate_dict
 from calliope.util.tools import climb_template_tree, listify
@@ -70,7 +71,7 @@ class ModelDataFactory:
 
     def __init__(
         self,
-        model_config: dict,
+        init_config: Init,
         model_definition: ModelDefinition,
         data_tables: list[data_tables.DataTable],
         attributes: dict,
@@ -81,13 +82,13 @@ def __init__(
         This includes resampling/clustering timeseries data as necessary.
 
         Args:
-            model_config (dict): Model initialisation configuration (i.e., `config.init`).
+            init_config (Init): Model initialisation configuration (i.e., `config.init`).
             model_definition (ModelDefinition): Definition of model nodes and technologies, and their potential `templates`.
             data_tables (list[data_tables.DataTable]): Pre-loaded data tables that will be used to initialise the dataset before handling definitions given in `model_definition`.
             attributes (dict): Attributes to attach to the model Dataset.
             param_attributes (dict[str, dict]): Attributes to attach to the generated model DataArrays.
         """
-        self.config: dict = model_config
+        self.config: Init = init_config
         self.model_definition: ModelDefinition = model_definition.copy()
         self.dataset = xr.Dataset(attrs=AttrDict(attributes))
         self.tech_data_from_tables = AttrDict()
@@ -244,7 +245,7 @@ def update_time_dimension_and_params(self):
             raise exceptions.ModelError(
                 "Must define at least one timeseries parameter in a Calliope model."
             )
-        time_subset = self.config.get("time_subset", None)
+        time_subset = self.config.time_subset
         if time_subset is not None:
             self.dataset = time.subset_timeseries(self.dataset, time_subset)
         self.dataset = time.add_inferred_time_params(self.dataset)
@@ -252,11 +253,11 @@ def update_time_dimension_and_params(self):
         # By default, the model allows operate mode
         self.dataset.attrs["allow_operate_mode"] = 1
 
-        if self.config["time_resample"] is not None:
-            self.dataset = time.resample(self.dataset, self.config["time_resample"])
-        if self.config["time_cluster"] is not None:
+        if self.config.time_resample is not None:
+            self.dataset = time.resample(self.dataset, self.config.time_resample)
+        if self.config.time_cluster is not None:
             self.dataset = time.cluster(
-                self.dataset, self.config["time_cluster"], self.config["time_format"]
+                self.dataset, self.config.time_cluster, self.config.time_format
             )
 
     def clean_data_from_undefined_members(self):
@@ -324,7 +325,7 @@ def add_link_distances(self):
                     self.dataset.longitude.sel(nodes=node2).item(),
                 )["s12"]
             distance_array = pd.Series(distances).rename_axis(index="techs").to_xarray()
-            if self.config["distance_unit"] == "km":
+            if self.config.distance_unit == "km":
                 distance_array /= 1000
         else:
             LOGGER.debug(
@@ -660,7 +661,7 @@ def _add_to_dataset(self, to_add: xr.Dataset, id_: str):
         """
         to_add_numeric_dims = self._update_numeric_dims(to_add, id_)
         to_add_numeric_ts_dims = time.timeseries_to_datetime(
-            to_add_numeric_dims, self.config["time_format"], id_
+            to_add_numeric_dims, self.config.time_format, id_
         )
         self.dataset = xr.merge(
             [to_add_numeric_ts_dims, self.dataset],
diff --git a/src/calliope/preprocess/scenarios.py b/src/calliope/preprocess/scenarios.py
index 473544fb..88e382a1 100644
--- a/src/calliope/preprocess/scenarios.py
+++ b/src/calliope/preprocess/scenarios.py
@@ -15,7 +15,6 @@ def load_scenario_overrides(
     model_definition: dict,
     scenario: str | None = None,
     override_dict: dict | None = None,
-    **kwargs,
 ) -> tuple[AttrDict, str]:
     """Apply user-defined overrides to the model definition.
 
@@ -28,8 +27,6 @@ def load_scenario_overrides(
         override_dict (dict | None, optional):
             Overrides to apply _after_ `scenario` overrides.
             Defaults to None.
-        **kwargs:
-            initialisation overrides.
 
     Returns:
         tuple[AttrDict, str]:
@@ -88,10 +85,6 @@ def load_scenario_overrides(
 
     _log_overrides(model_def_dict, model_def_with_overrides)
 
-    model_def_with_overrides.union(
-        AttrDict({"config.init": kwargs}), allow_override=True
-    )
-
     return (model_def_with_overrides, ";".join(applied_overrides))
 
 
diff --git a/src/calliope/util/schema.py b/src/calliope/util/schema.py
index bd98cc77..361cd9a9 100644
--- a/src/calliope/util/schema.py
+++ b/src/calliope/util/schema.py
@@ -25,20 +25,6 @@ def reset():
     importlib.reload(sys.modules[__name__])
 
 
-def update_then_validate_config(
-    config_key: str, config_dict: AttrDict, **update_kwargs
-) -> AttrDict:
-    """Return an updated version of the configuration schema."""
-    to_validate = deepcopy(config_dict[config_key])
-    to_validate.union(AttrDict(update_kwargs), allow_override=True)
-    validate_dict(
-        {"config": {config_key: to_validate}},
-        CONFIG_SCHEMA,
-        f"`{config_key}` configuration",
-    )
-    return to_validate
-
-
 def update_model_schema(
     top_level_property: Literal["nodes", "techs", "parameters"],
     new_entries: dict,
diff --git a/src/calliope/util/tools.py b/src/calliope/util/tools.py
index dee2f6ca..3d8d4320 100644
--- a/src/calliope/util/tools.py
+++ b/src/calliope/util/tools.py
@@ -15,7 +15,7 @@
 T = TypeVar("T")
 
 
-def relative_path(base_path_file, path) -> Path:
+def relative_path(base_path_file: str | Path, path: str | Path) -> Path:
     """Path standardization.
 
     If ``path`` is not absolute, it is interpreted as relative to the
@@ -23,7 +23,7 @@ def relative_path(base_path_file, path) -> Path:
     """
     # Check if base_path_file is a string because it might be an AttrDict
     path = Path(path)
-    if path.is_absolute() or base_path_file is None:
+    if path.is_absolute():
         return path
     else:
         base_path_file = Path(base_path_file)
diff --git a/tests/common/util.py b/tests/common/util.py
index 8ae70da8..94f90dc2 100644
--- a/tests/common/util.py
+++ b/tests/common/util.py
@@ -95,9 +95,7 @@ def build_lp(
         math (dict | None, optional): All constraint/global expression/objective math to apply. Defaults to None.
         backend_name (Literal["pyomo"], optional): Backend to use to create the LP file. Defaults to "pyomo".
     """
-    math = calliope.preprocess.CalliopeMath(
-        ["plan", *model.config.build.get("add_math", [])]
-    )
+    math = calliope.preprocess.CalliopeMath(["plan", *model.config.build.add_math])
 
     math_to_add = calliope.AttrDict()
     if isinstance(math_data, dict):
diff --git a/tests/conftest.py b/tests/conftest.py
index 3d4694c5..0334d0b4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,10 +5,11 @@
 import pytest
 import xarray as xr
 
+from calliope import config
 from calliope.attrdict import AttrDict
 from calliope.backend import latex_backend_model, pyomo_backend_model
 from calliope.preprocess import CalliopeMath
-from calliope.util.schema import CONFIG_SCHEMA, MODEL_SCHEMA, extract_from_schema
+from calliope.util.schema import MODEL_SCHEMA, extract_from_schema
 
 from .common.util import build_test_model as build_model
 
@@ -33,7 +34,7 @@ def foreach(request):
 
 @pytest.fixture(scope="session")
 def config_defaults():
-    return AttrDict(extract_from_schema(CONFIG_SCHEMA, "default"))
+    return AttrDict(config.CalliopeConfig().model_dump())
 
 
 @pytest.fixture(scope="session")
diff --git a/tests/test_core_model.py b/tests/test_core_model.py
index e16ebfa4..ddd97800 100644
--- a/tests/test_core_model.py
+++ b/tests/test_core_model.py
@@ -9,7 +9,6 @@
 import calliope.preprocess
 
 from .common.util import build_test_model as build_model
-from .common.util import check_error_or_warning
 
 LOGGER = "calliope.model"
 
@@ -32,40 +31,6 @@ def test_info(self, national_scale_example):
     def test_info_simple_model(self, simple_supply):
         simple_supply.info()
 
-    def test_update_observed_dict(self, national_scale_example):
-        national_scale_example.config.build["backend"] = "foo"
-        assert national_scale_example._model_data.attrs["config"].build.backend == "foo"
-
-    def test_add_observed_dict_from_model_data(
-        self, national_scale_example, dict_to_add
-    ):
-        national_scale_example._model_data.attrs["foo"] = dict_to_add
-        national_scale_example._add_observed_dict("foo")
-        assert national_scale_example.foo == dict_to_add
-        assert national_scale_example._model_data.attrs["foo"] == dict_to_add
-
-    def test_add_observed_dict_from_dict(self, national_scale_example, dict_to_add):
-        national_scale_example._add_observed_dict("bar", dict_to_add)
-        assert national_scale_example.bar == dict_to_add
-        assert national_scale_example._model_data.attrs["bar"] == dict_to_add
-
-    def test_add_observed_dict_not_available(self, national_scale_example):
-        with pytest.raises(calliope.exceptions.ModelError) as excinfo:
-            national_scale_example._add_observed_dict("baz")
-        assert check_error_or_warning(
-            excinfo,
-            "Expected the model property `baz` to be a dictionary attribute of the model dataset",
-        )
-        assert not hasattr(national_scale_example, "baz")
-
-    def test_add_observed_dict_not_dict(self, national_scale_example):
-        with pytest.raises(TypeError) as excinfo:
-            national_scale_example._add_observed_dict("baz", "bar")
-        assert check_error_or_warning(
-            excinfo,
-            "Attempted to add dictionary property `baz` to model, but received argument of type `str`",
-        )
-
 
 class TestOperateMode:
     @contextmanager
@@ -127,9 +92,7 @@ def rerun_operate_log(self, request, operate_model_and_log):
     def test_backend_build_mode(self, operate_model_and_log):
         """Verify that we have run in operate mode"""
         operate_model, _ = operate_model_and_log
-        assert (
-            operate_model.backend.inputs.attrs["config"]["build"]["mode"] == "operate"
-        )
+        assert operate_model.backend.config.mode == "operate"
 
     def test_operate_mode_success(self, operate_model_and_log):
         """Solving in operate mode should lead to an optimal solution."""
@@ -153,8 +116,8 @@ def test_reset_model_window(self, rerun_operate_log):
     def test_end_of_horizon(self, operate_model_and_log):
         """Check that increasingly shorter time horizons are logged as model rebuilds."""
         operate_model, log = operate_model_and_log
-        config = operate_model.backend.inputs.attrs["config"]["build"]
-        if config["operate_window"] != config["operate_horizon"]:
+        config = operate_model.backend.config.operate
+        if config.operate_window != config.operate_horizon:
             assert "Reaching the end of the timeseries." in log
         else:
             assert "Reaching the end of the timeseries." not in log
diff --git a/tests/test_preprocess_model_data.py b/tests/test_preprocess_model_data.py
index 48bc519c..e3208e1a 100644
--- a/tests/test_preprocess_model_data.py
+++ b/tests/test_preprocess_model_data.py
@@ -202,10 +202,14 @@ def test_add_link_distances_missing_distance(
 
     @pytest.mark.parametrize(("unit", "expected"), [("m", 343834), ("km", 343.834)])
     def test_add_link_distances_no_da(
-        self, my_caplog, model_data_factory_w_params: ModelDataFactory, unit, expected
+        self,
+        mocker,
+        my_caplog,
+        model_data_factory_w_params: ModelDataFactory,
+        unit,
+        expected,
     ):
-        _default_distance_unit = model_data_factory_w_params.config["distance_unit"]
-        model_data_factory_w_params.config["distance_unit"] = unit
+        mocker.patch.object(ModelDataFactory, "config.distance_unit", return_value=unit)
         model_data_factory_w_params.clean_data_from_undefined_members()
         model_data_factory_w_params.dataset["latitude"] = (
             pd.Series({"A": 51.507222, "B": 48.8567})
@@ -220,7 +224,6 @@ def test_add_link_distances_no_da(
         del model_data_factory_w_params.dataset["distance"]
 
         model_data_factory_w_params.add_link_distances()
-        model_data_factory_w_params.config["distance_unit"] = _default_distance_unit
         assert "Link distance matrix automatically computed" in my_caplog.text
         assert (
             model_data_factory_w_params.dataset["distance"].dropna("techs")

From 4f8168443e2e2638ac29742312a95362f2ed51f7 Mon Sep 17 00:00:00 2001
From: Bryn Pickering <17178478+brynpickering@users.noreply.github.com>
Date: Thu, 7 Nov 2024 17:15:36 +0000
Subject: [PATCH 03/12] Minor cleanup

---
 docs/hooks/generate_readable_schema.py | 4 +++-
 requirements/base.txt                  | 3 ++-
 src/calliope/config.py                 | 8 ++++++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/docs/hooks/generate_readable_schema.py b/docs/hooks/generate_readable_schema.py
index 8b799265..7bcfa205 100644
--- a/docs/hooks/generate_readable_schema.py
+++ b/docs/hooks/generate_readable_schema.py
@@ -20,7 +20,9 @@
 TEMPDIR = tempfile.TemporaryDirectory()
 
 SCHEMAS = {
-    "config_schema": AttrDict.from_yaml(config.CalliopeConfig().model_yaml_schema()),
+    "config_schema": AttrDict.from_yaml_string(
+        config.CalliopeConfig().model_yaml_schema()
+    ),
     "model_schema": schema.MODEL_SCHEMA,
     "math_schema": schema.MATH_SCHEMA,
     "data_table_schema": schema.DATA_TABLE_SCHEMA,
diff --git a/requirements/base.txt b/requirements/base.txt
index 2bf5f664..6305e13d 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -4,6 +4,7 @@ geographiclib >= 2, < 3
 ipdb >= 0.13, < 0.14
 ipykernel < 7
 jinja2 >= 3, < 4
+jsonref >= 1.1, < 2
 jsonschema >= 4, < 5
 natsort >= 8, < 9
 netcdf4 >= 1.2, < 1.7
@@ -13,4 +14,4 @@ pyomo >= 6.5, < 6.7.2
 pyparsing >= 3.0, < 3.1
 ruamel.yaml >= 0.18, < 0.19
 typing-extensions >= 4, < 5
-xarray >= 2024.1, < 2024.4
\ No newline at end of file
+xarray >= 2024.1, < 2024.4
diff --git a/src/calliope/config.py b/src/calliope/config.py
index 79ae5941..e07ee2cb 100644
--- a/src/calliope/config.py
+++ b/src/calliope/config.py
@@ -97,8 +97,12 @@ def model_yaml_schema(self, filepath: str | Path | None = None) -> None | str:
         Returns:
             None | str: If `filepath` is given, returns None. Otherwise, returns the YAML string.
         """
-        schema_dict = jsonref.replace_refs(self.model_json_schema())
-        return AttrDict(schema_dict).to_yaml(filepath)
+        # By default, the schema uses $ref/$def cross-referencing for each pydantic model class,
+        # but this isn't very readable when rendered in our documentation.
+        # So, we resolve references and then delete all the `$defs`
+        schema_dict = AttrDict(jsonref.replace_refs(self.model_json_schema()))
+        schema_dict.del_key("$defs")
+        return schema_dict.to_yaml(filepath)
 
     @property
     def applied_keyword_overrides(self) -> dict:

From e59e5e7edfbbaf4bcd99db5edc8958ba72a66fd1 Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:16:56 +0100
Subject: [PATCH 04/12] Removed mode redundancy to simplify the configuration

---
 requirements/base.txt                |  1 +
 src/calliope/backend/where_parser.py |  3 +--
 src/calliope/config.py               | 25 ------------------------
 src/calliope/model.py                | 29 +++++++++++++---------------
 4 files changed, 15 insertions(+), 43 deletions(-)

diff --git a/requirements/base.txt b/requirements/base.txt
index 6305e13d..65e0713e 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -15,3 +15,4 @@ pyparsing >= 3.0, < 3.1
 ruamel.yaml >= 0.18, < 0.19
 typing-extensions >= 4, < 5
 xarray >= 2024.1, < 2024.4
+pydantic >= 2.9.2
diff --git a/src/calliope/backend/where_parser.py b/src/calliope/backend/where_parser.py
index 06f782f6..ad020958 100644
--- a/src/calliope/backend/where_parser.py
+++ b/src/calliope/backend/where_parser.py
@@ -13,16 +13,15 @@
 import xarray as xr
 from typing_extensions import NotRequired, TypedDict
 
+from calliope import config
 from calliope.backend import expression_parser
 from calliope.exceptions import BackendError
 
 if TYPE_CHECKING:
-    from calliope import config
     from calliope.backend.backend_model import BackendModel
 
 
 pp.ParserElement.enablePackrat()
-
 BOOLEANTYPE = np.bool_ | np.typing.NDArray[np.bool_]
 
 
diff --git a/src/calliope/config.py b/src/calliope/config.py
index e07ee2cb..d8050d66 100644
--- a/src/calliope/config.py
+++ b/src/calliope/config.py
@@ -342,28 +342,3 @@ class CalliopeConfig(ConfigBaseModel):
     init: Init = Init()
     build: Build = Build()
     solve: Solve = Solve()
-
-    @model_validator(mode="before")
-    @classmethod
-    def update_solve_mode(cls, data):
-        """Solve mode should match build mode."""
-        data["solve"]["mode"] = data["build"]["mode"]
-        return data
-
-    def update(self, update_dict: dict, deep: bool = False) -> Self:
-        """Return a new iteration of the model with updated fields.
-
-        Updates are validated and stored in the parent class in the `_kwargs` key.
-
-        Args:
-            update_dict (dict): Dictionary with which to update the base model.
-            deep (bool, optional): Set to True to make a deep copy of the model. Defaults to False.
-
-        Returns:
-            BaseModel: New model instance.
-        """
-        update_dict_temp = AttrDict(update_dict)
-        if update_dict_temp.get_key("build.mode", None) is not None:
-            update_dict_temp.set_key("solve.mode", update_dict_temp["build"]["mode"])
-        updated = super().update(update_dict_temp.as_dict(), deep=deep)
-        return updated
diff --git a/src/calliope/model.py b/src/calliope/model.py
index e6088c21..26a14653 100644
--- a/src/calliope/model.py
+++ b/src/calliope/model.py
@@ -267,26 +267,26 @@ def build(
             comment="Model: backend build starting",
         )
 
-        this_build_config = self.config.update({"build": kwargs}).build
-        mode = this_build_config.mode
+        build_config = self.config.update({"build": kwargs}).build
+        mode = build_config.mode
         if mode == "operate":
             if not self._model_data.attrs["allow_operate_mode"]:
                 raise exceptions.ModelError(
                     "Unable to run this model in operate (i.e. dispatch) mode, probably because "
                     "there exist non-uniform timesteps (e.g. from time clustering)"
                 )
-            backend_input = self._prepare_operate_mode_inputs(this_build_config.operate)
+            backend_input = self._prepare_operate_mode_inputs(build_config.operate)
         else:
             backend_input = self._model_data
 
-        init_math_list = [] if this_build_config.ignore_mode_math else [mode]
+        init_math_list = [] if build_config.ignore_mode_math else [mode]
         end_math_list = [] if add_math_dict is None else [add_math_dict]
-        full_math_list = init_math_list + this_build_config.add_math + end_math_list
+        full_math_list = init_math_list + build_config.add_math + end_math_list
         LOGGER.debug(f"Math preprocessing | Loading math: {full_math_list}")
         model_math = preprocess.CalliopeMath(full_math_list, self.config.init.def_path)
 
         self.backend = backend.get_model_backend(
-            this_build_config, backend_input, model_math
+            build_config, backend_input, model_math
         )
         self.backend.add_optimisation_components()
 
@@ -341,26 +341,23 @@ def solve(self, force: bool = False, warmstart: bool = False, **kwargs) -> None:
         else:
             to_drop = []
 
-        kwargs["mode"] = self.config.build.applied_keyword_overrides.get(
-            "mode", self.config.build.mode
-        )
-
-        this_solve_config = self.config.update({"solve": kwargs}).solve
+        solve_config = self.config.update({"solve": kwargs}).solve
+        mode = self.config.build.mode
         self._model_data.attrs["timestamp_solve_start"] = log_time(
             LOGGER,
             self._timings,
             "solve_start",
-            comment=f"Optimisation model | starting model in {this_solve_config.mode} mode.",
+            comment=f"Optimisation model | starting model in {mode} mode.",
         )
 
-        shadow_prices = this_solve_config.shadow_prices
+        shadow_prices = solve_config.shadow_prices
         self.backend.shadow_prices.track_constraints(shadow_prices)
 
-        if this_solve_config.mode == "operate":
-            results = self._solve_operate(**this_solve_config.model_dump())
+        if mode == "operate":
+            results = self._solve_operate(**solve_config.model_dump())
         else:
             results = self.backend._solve(
-                warmstart=warmstart, **this_solve_config.model_dump()
+                warmstart=warmstart, **solve_config.model_dump()
             )
 
         log_time(

From e033b74c2b1b321c880eab92f1ebd2eeb747db2a Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Fri, 15 Nov 2024 10:28:25 +0100
Subject: [PATCH 05/12] ruff fixes

---
 src/calliope/config.py | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/src/calliope/config.py b/src/calliope/config.py
index d8050d66..56b6bf2c 100644
--- a/src/calliope/config.py
+++ b/src/calliope/config.py
@@ -5,7 +5,7 @@
 from collections.abc import Hashable
 from datetime import datetime
 from pathlib import Path
-from typing import Annotated, Literal, Self, TypeVar, get_args, overload
+from typing import Annotated, Literal, Self, TypeVar, overload
 
 import jsonref
 from pydantic import AfterValidator, BaseModel, Field, model_validator
@@ -114,21 +114,6 @@ def applied_keyword_overrides(self) -> dict:
         return self._kwargs
 
 
-class ModeBaseModel(ConfigBaseModel):
-    """Mode-specific configuration, which will be hidden from the string representation of the model if that mode is not activated."""
-
-    mode: MODES_T = Field(default="plan")
-    """Mode in which to run the optimisation."""
-
-    @model_validator(mode="after")
-    def update_repr(self) -> Self:
-        """Hide config from model string representation if mode is not activated."""
-        for key, val in self.model_fields.items():
-            if key in get_args(MODES_T):
-                val.repr = self.mode == key
-        return self
-
-
 class Init(ConfigBaseModel):
     """All configuration options used when initialising a Calliope model."""
 
@@ -221,7 +206,7 @@ class BuildOperate(ConfigBaseModel):
     """Which time window to build. This is used to track the window when re-building the model part way through solving in `operate` mode."""
 
 
-class Build(ModeBaseModel):
+class Build(ConfigBaseModel):
     """Base configuration options used when building a Calliope optimisation problem (`calliope.Model.build`)."""
 
     model_config = {
@@ -229,6 +214,10 @@ class Build(ModeBaseModel):
         "extra": "allow",
         "revalidate_instances": "always",
     }
+
+    mode: MODES_T = Field(default="plan")
+    """Mode in which to run the optimisation."""
+
     add_math: UniqueList[str] = Field(default=[])
     """
     List of references to files which contain additional mathematical formulations to be applied on top of or instead of the base mode math.
@@ -301,14 +290,13 @@ def require_save_per_spore_path(self) -> Self:
         return self
 
 
-class Solve(ModeBaseModel):
+class Solve(ConfigBaseModel):
     """Base configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
 
     model_config = {
         "title": "solve",
         "extra": "forbid",
         "revalidate_instances": "always",
-        "json_schema_extra": hide_from_schema(["mode"]),
     }
 
     save_logs: Path | None = Field(default=None)

From e2a051acfc999a3deb89860469336799ead1bb80 Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Fri, 15 Nov 2024 15:45:25 +0100
Subject: [PATCH 06/12] Simplify config schema extraction

---
 docs/hooks/generate_readable_schema.py |  6 ++----
 src/calliope/attrdict.py               | 28 ++++++++++++++------------
 src/calliope/backend/backend_model.py  |  2 +-
 src/calliope/cli.py                    |  2 +-
 src/calliope/config.py                 | 27 +++++++++----------------
 src/calliope/io.py                     |  2 +-
 src/calliope/preprocess/scenarios.py   |  2 +-
 tests/test_core_attrdict.py            |  4 ++--
 tests/test_preprocess_model_math.py    |  2 +-
 9 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/docs/hooks/generate_readable_schema.py b/docs/hooks/generate_readable_schema.py
index 7bcfa205..9d9b4837 100644
--- a/docs/hooks/generate_readable_schema.py
+++ b/docs/hooks/generate_readable_schema.py
@@ -14,15 +14,13 @@
 import jsonschema2md
 from mkdocs.structure.files import File
 
-from calliope import AttrDict, config
+from calliope import config
 from calliope.util import schema
 
 TEMPDIR = tempfile.TemporaryDirectory()
 
 SCHEMAS = {
-    "config_schema": AttrDict.from_yaml_string(
-        config.CalliopeConfig().model_yaml_schema()
-    ),
+    "config_schema": config.CalliopeConfig().model_json_schema(replace_refs=True),
     "model_schema": schema.MODEL_SCHEMA,
     "math_schema": schema.MATH_SCHEMA,
     "data_table_schema": schema.DATA_TABLE_SCHEMA,
diff --git a/src/calliope/attrdict.py b/src/calliope/attrdict.py
index f17cf0ef..53c00934 100644
--- a/src/calliope/attrdict.py
+++ b/src/calliope/attrdict.py
@@ -329,12 +329,8 @@ def as_dict_flat(self):
             d[k] = self.get_key(k)
         return d
 
-    def to_yaml(self, path=None):
-        """Conversion to YAML.
-
-        Saves the AttrDict to the ``path`` as a YAML file or returns a YAML string
-        if ``path`` is None.
-        """
+    def as_yaml_str(self) -> str:
+        """Return a serialised YAML string."""
         result = self.copy()
         yaml_ = ruamel_yaml.YAML()
         yaml_.indent = 2
@@ -359,13 +355,19 @@ def to_yaml(self, path=None):
         # handle multi-line strings.
         walk_tree(result)
 
-        if path is not None:
-            with open(path, "w") as f:
-                yaml_.dump(result, f)
-        else:
-            stream = io.StringIO()
-            yaml_.dump(result, stream)
-            return stream.getvalue()
+        stream = io.StringIO()
+        yaml_.dump(result, stream)
+        return stream.getvalue()
+
+    def save_yaml(self, path: str) -> None:
+        """Save AttrDict as a yaml file.
+
+        Args:
+            path (str): path of saved YAML.
+        """
+        yaml_str = self.as_yaml_str()
+        with open(path, "w") as f:
+            f.write(yaml_str)
 
     def keys_nested(self, subkeys_as="list"):
         """Returns all keys in the AttrDict, including nested keys.
diff --git a/src/calliope/backend/backend_model.py b/src/calliope/backend/backend_model.py
index 21603864..a3a0e8ae 100644
--- a/src/calliope/backend/backend_model.py
+++ b/src/calliope/backend/backend_model.py
@@ -446,7 +446,7 @@ def _add_to_dataset(
                 yaml_snippet_attrs[attr] = val
 
         if yaml_snippet_attrs:
-            add_attrs["yaml_snippet"] = AttrDict(yaml_snippet_attrs).to_yaml()
+            add_attrs["yaml_snippet"] = AttrDict(yaml_snippet_attrs).as_yaml_str()
 
         da.attrs = {
             "obj_type": obj_type,
diff --git a/src/calliope/cli.py b/src/calliope/cli.py
index 4059de7e..8e049de9 100644
--- a/src/calliope/cli.py
+++ b/src/calliope/cli.py
@@ -399,4 +399,4 @@ def generate_scenarios(
             }
         }
 
-        AttrDict(scenarios).to_yaml(out_file)
+        AttrDict(scenarios).save_yaml(out_file)
diff --git a/src/calliope/config.py b/src/calliope/config.py
index 56b6bf2c..4e55beeb 100644
--- a/src/calliope/config.py
+++ b/src/calliope/config.py
@@ -5,7 +5,7 @@
 from collections.abc import Hashable
 from datetime import datetime
 from pathlib import Path
-from typing import Annotated, Literal, Self, TypeVar, overload
+from typing import Annotated, Literal, Self, TypeVar
 
 import jsonref
 from pydantic import AfterValidator, BaseModel, Field, model_validator
@@ -82,27 +82,20 @@ def update(self, update_dict: dict, deep: bool = False) -> Self:
         self._kwargs = update_dict
         return updated
 
-    @overload
-    def model_yaml_schema(self, filepath: str | Path) -> None: ...
-
-    @overload
-    def model_yaml_schema(self, filepath: None = None) -> str: ...
-
-    def model_yaml_schema(self, filepath: str | Path | None = None) -> None | str:
-        """Generate a YAML schema for the class.
+    def model_json_schema(self, replace_refs=False) -> AttrDict:
+        """Generate an AttrDict with the schema of this class.
 
         Args:
-            filepath (str | Path | None, optional): If given, save schema to given path. Defaults to None.
+            replace_refs (bool, optional): If True, replace $ref/$def for better readability. Defaults to False.
 
         Returns:
-            None | str: If `filepath` is given, returns None. Otherwise, returns the YAML string.
+            AttrDict: class schema.
         """
-        # By default, the schema uses $ref/$def cross-referencing for each pydantic model class,
-        # but this isn't very readable when rendered in our documentation.
-        # So, we resolve references and then delete all the `$defs`
-        schema_dict = AttrDict(jsonref.replace_refs(self.model_json_schema()))
-        schema_dict.del_key("$defs")
-        return schema_dict.to_yaml(filepath)
+        schema_dict = AttrDict(super().model_json_schema())
+        if replace_refs:
+            schema_dict = AttrDict(jsonref.replace_refs(schema_dict))
+            schema_dict.del_key("$defs")
+        return schema_dict
 
     @property
     def applied_keyword_overrides(self) -> dict:
diff --git a/src/calliope/io.py b/src/calliope/io.py
index 205ffe7f..cd1f19f9 100644
--- a/src/calliope/io.py
+++ b/src/calliope/io.py
@@ -70,7 +70,7 @@ def _serialise(attrs: dict) -> None:
     dict_attrs = [k for k, v in attrs.items() if isinstance(v, dict)]
     attrs["serialised_dicts"] = dict_attrs
     for attr in dict_attrs:
-        attrs[attr] = AttrDict(attrs[attr]).to_yaml()
+        attrs[attr] = AttrDict(attrs[attr]).as_yaml_str()
 
     # Convert boolean attrs to ints
     bool_attrs = [k for k, v in attrs.items() if isinstance(v, bool)]
diff --git a/src/calliope/preprocess/scenarios.py b/src/calliope/preprocess/scenarios.py
index 88e382a1..a5ed5fe5 100644
--- a/src/calliope/preprocess/scenarios.py
+++ b/src/calliope/preprocess/scenarios.py
@@ -92,7 +92,7 @@ def _combine_overrides(overrides: AttrDict, scenario_overrides: list):
     combined_override_dict = AttrDict()
     for override in scenario_overrides:
         try:
-            yaml_string = overrides[override].to_yaml()
+            yaml_string = overrides[override].as_yaml_str()
             override_with_imports = AttrDict.from_yaml_string(yaml_string)
         except KeyError:
             raise exceptions.ModelError(f"Override `{override}` is not defined.")
diff --git a/tests/test_core_attrdict.py b/tests/test_core_attrdict.py
index c65ab18e..8e677f44 100644
--- a/tests/test_core_attrdict.py
+++ b/tests/test_core_attrdict.py
@@ -307,7 +307,7 @@ def test_to_yaml(self, yaml_filepath):
         d.a_list = [0, 1, 2]
         with tempfile.TemporaryDirectory() as tempdir:
             out_file = os.path.join(tempdir, "test.yaml")
-            d.to_yaml(out_file)
+            d.save_yaml(out_file)
 
             with open(out_file) as f:
                 result = f.read()
@@ -318,7 +318,7 @@ def test_to_yaml(self, yaml_filepath):
 
     def test_to_yaml_string(self, yaml_filepath):
         d = AttrDict.from_yaml(yaml_filepath)
-        result = d.to_yaml()
+        result = d.as_yaml_str()
         assert "a: 1" in result
 
     def test_import_must_be_list(self):
diff --git a/tests/test_preprocess_model_math.py b/tests/test_preprocess_model_math.py
index 46af363e..a6b146f5 100644
--- a/tests/test_preprocess_model_math.py
+++ b/tests/test_preprocess_model_math.py
@@ -36,7 +36,7 @@ def user_math(dummy_int):
 @pytest.fixture(scope="module")
 def user_math_path(def_path, user_math):
     file_path = def_path / "custom-math.yaml"
-    user_math.to_yaml(def_path / file_path)
+    user_math.save_yaml(def_path / file_path)
     return "custom-math.yaml"
 
 

From 56e11265fdac127bd503c1e08482edad9ab71433 Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Mon, 18 Nov 2024 16:04:48 +0100
Subject: [PATCH 07/12] Add schemas folder

Moved config.py to 'src/calliope/schemas'.
config.py is too generic, and conflicts with config variable names.
---
 docs/hooks/generate_readable_schema.py               |  6 ++++--
 pyproject.toml                                       |  2 +-
 src/calliope/backend/__init__.py                     |  4 ++--
 src/calliope/backend/backend_model.py                |  7 ++++---
 src/calliope/backend/gurobi_backend_model.py         |  4 ++--
 src/calliope/backend/pyomo_backend_model.py          |  4 ++--
 src/calliope/backend/where_parser.py                 |  4 ++--
 src/calliope/model.py                                | 11 ++++++-----
 src/calliope/preprocess/model_data.py                |  2 +-
 src/calliope/schemas/__init__.py                     |  0
 src/calliope/{config.py => schemas/config_schema.py} |  0
 tests/conftest.py                                    |  4 ++--
 12 files changed, 26 insertions(+), 22 deletions(-)
 create mode 100644 src/calliope/schemas/__init__.py
 rename src/calliope/{config.py => schemas/config_schema.py} (100%)

diff --git a/docs/hooks/generate_readable_schema.py b/docs/hooks/generate_readable_schema.py
index 9d9b4837..0b62a47d 100644
--- a/docs/hooks/generate_readable_schema.py
+++ b/docs/hooks/generate_readable_schema.py
@@ -14,13 +14,15 @@
 import jsonschema2md
 from mkdocs.structure.files import File
 
-from calliope import config
+from calliope.schemas import config_schema
 from calliope.util import schema
 
 TEMPDIR = tempfile.TemporaryDirectory()
 
 SCHEMAS = {
-    "config_schema": config.CalliopeConfig().model_json_schema(replace_refs=True),
+    "config_schema": config_schema.CalliopeConfig().model_json_schema(
+        replace_refs=True
+    ),
     "model_schema": schema.MODEL_SCHEMA,
     "math_schema": schema.MATH_SCHEMA,
     "data_table_schema": schema.DATA_TABLE_SCHEMA,
diff --git a/pyproject.toml b/pyproject.toml
index c0d5839f..5d4a5b0b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -54,7 +54,7 @@ max-complexity = 10
 
 # Ignore `E402` (import violations) and `F401` (unused imports) in all `__init__.py` files
 [tool.ruff.lint.per-file-ignores]
-"__init__.py" = ["E402", "F401"]
+"__init__.py" = ["E402", "F401", "D104"]
 "*.ipynb" = ["E402"]
 "tests/*" = ["D"]
 "docs/examples/*" = ["D"]
diff --git a/src/calliope/backend/__init__.py b/src/calliope/backend/__init__.py
index 84929792..1b663ae3 100644
--- a/src/calliope/backend/__init__.py
+++ b/src/calliope/backend/__init__.py
@@ -15,12 +15,12 @@
 from calliope.preprocess import CalliopeMath
 
 if TYPE_CHECKING:
-    from calliope import config
     from calliope.backend.backend_model import BackendModel
+    from calliope.schemas import config_schema
 
 
 def get_model_backend(
-    build_config: "config.Build", data: xr.Dataset, math: CalliopeMath
+    build_config: "config_schema.Build", data: xr.Dataset, math: CalliopeMath
 ) -> "BackendModel":
     """Assign a backend using the given configuration.
 
diff --git a/src/calliope/backend/backend_model.py b/src/calliope/backend/backend_model.py
index a3a0e8ae..57745f68 100644
--- a/src/calliope/backend/backend_model.py
+++ b/src/calliope/backend/backend_model.py
@@ -26,12 +26,13 @@
 import numpy as np
 import xarray as xr
 
-from calliope import config, exceptions
+from calliope import exceptions
 from calliope.attrdict import AttrDict
 from calliope.backend import helper_functions, parsing
 from calliope.exceptions import warn as model_warn
 from calliope.io import load_config
 from calliope.preprocess.model_math import ORDERED_COMPONENTS_T, CalliopeMath
+from calliope.schemas import config_schema
 from calliope.util.schema import MODEL_SCHEMA, extract_from_schema
 
 if TYPE_CHECKING:
@@ -66,7 +67,7 @@ class BackendModelGenerator(ABC):
     _PARAM_TYPE = extract_from_schema(MODEL_SCHEMA, "x-type")
 
     def __init__(
-        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config.Build
+        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config_schema.Build
     ):
         """Abstract base class to build a representation of the optimisation problem.
 
@@ -607,7 +608,7 @@ def __init__(
         inputs: xr.Dataset,
         math: CalliopeMath,
         instance: T,
-        build_config: config.Build,
+        build_config: config_schema.Build,
     ) -> None:
         """Abstract base class to build backend models that interface with solvers.
 
diff --git a/src/calliope/backend/gurobi_backend_model.py b/src/calliope/backend/gurobi_backend_model.py
index ab02d9d4..a36d3ac0 100644
--- a/src/calliope/backend/gurobi_backend_model.py
+++ b/src/calliope/backend/gurobi_backend_model.py
@@ -14,11 +14,11 @@
 import pandas as pd
 import xarray as xr
 
-from calliope import config
 from calliope.backend import backend_model, parsing
 from calliope.exceptions import BackendError, BackendWarning
 from calliope.exceptions import warn as model_warn
 from calliope.preprocess import CalliopeMath
+from calliope.schemas import config_schema
 
 if importlib.util.find_spec("gurobipy") is not None:
     import gurobipy
@@ -43,7 +43,7 @@ class GurobiBackendModel(backend_model.BackendModel):
     """gurobipy-specific backend functionality."""
 
     def __init__(
-        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config.Build
+        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config_schema.Build
     ) -> None:
         """Gurobi solver interface class.
 
diff --git a/src/calliope/backend/pyomo_backend_model.py b/src/calliope/backend/pyomo_backend_model.py
index 46ea3b32..2a439672 100644
--- a/src/calliope/backend/pyomo_backend_model.py
+++ b/src/calliope/backend/pyomo_backend_model.py
@@ -26,10 +26,10 @@
 from pyomo.opt import SolverFactory  # type: ignore
 from pyomo.util.model_size import build_model_size_report  # type: ignore
 
-from calliope import config
 from calliope.exceptions import BackendError, BackendWarning
 from calliope.exceptions import warn as model_warn
 from calliope.preprocess import CalliopeMath
+from calliope.schemas import config_schema
 from calliope.util.logging import LogWriter
 
 from . import backend_model, parsing
@@ -60,7 +60,7 @@ class PyomoBackendModel(backend_model.BackendModel):
     """Pyomo-specific backend functionality."""
 
     def __init__(
-        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config.Build
+        self, inputs: xr.Dataset, math: CalliopeMath, build_config: config_schema.Build
     ) -> None:
         """Pyomo solver interface class.
 
diff --git a/src/calliope/backend/where_parser.py b/src/calliope/backend/where_parser.py
index ad020958..f4b3ff6c 100644
--- a/src/calliope/backend/where_parser.py
+++ b/src/calliope/backend/where_parser.py
@@ -13,9 +13,9 @@
 import xarray as xr
 from typing_extensions import NotRequired, TypedDict
 
-from calliope import config
 from calliope.backend import expression_parser
 from calliope.exceptions import BackendError
+from calliope.schemas import config_schema
 
 if TYPE_CHECKING:
     from calliope.backend.backend_model import BackendModel
@@ -34,7 +34,7 @@ class EvalAttrs(TypedDict):
     helper_functions: dict[str, Callable]
     apply_where: NotRequired[bool]
     references: NotRequired[set]
-    build_config: config.Build
+    build_config: config_schema.Build
 
 
 class EvalWhere(expression_parser.EvalToArrayStr):
diff --git a/src/calliope/model.py b/src/calliope/model.py
index 26a14653..201e3fe1 100644
--- a/src/calliope/model.py
+++ b/src/calliope/model.py
@@ -12,11 +12,12 @@
 import xarray as xr
 
 import calliope
-from calliope import backend, config, exceptions, io, preprocess
+from calliope import backend, exceptions, io, preprocess
 from calliope.attrdict import AttrDict
 from calliope.postprocess import postprocess as postprocess_results
 from calliope.preprocess.data_tables import DataTable
 from calliope.preprocess.model_data import ModelDataFactory
+from calliope.schemas import config_schema
 from calliope.util.logging import log_time
 from calliope.util.schema import (
     CONFIG_SCHEMA,
@@ -73,7 +74,7 @@ def __init__(
             **kwargs: initialisation overrides.
         """
         self._timings: dict = {}
-        self.config: config.CalliopeConfig
+        self.config: config_schema.CalliopeConfig
         self.defaults: AttrDict
         self.applied_math: preprocess.CalliopeMath
         self.backend: BackendModel
@@ -167,7 +168,7 @@ def _init_from_model_def_dict(
             comment="Model: preprocessing stage 1 (model_run)",
         )
 
-        model_config = config.CalliopeConfig(**model_definition.pop("config"))
+        model_config = config_schema.CalliopeConfig(**model_definition.pop("config"))
         init_config = model_config.update({"init": kwargs}).init
 
         param_metadata = {"default": extract_from_schema(MODEL_SCHEMA, "default")}
@@ -226,7 +227,7 @@ def _init_from_model_data(self, model_data: xr.Dataset) -> None:
                 model_data.attrs.pop("applied_math")
             )
         if "config" in model_data.attrs:
-            self.config = config.CalliopeConfig(**model_data.attrs.pop("config"))
+            self.config = config_schema.CalliopeConfig(**model_data.attrs.pop("config"))
             self.config.update(model_data.attrs.pop("config_kwarg_overrides"))
 
         self._model_data = model_data
@@ -461,7 +462,7 @@ def info(self) -> str:
         return "\n".join(info_strings)
 
     def _prepare_operate_mode_inputs(
-        self, operate_config: config.BuildOperate
+        self, operate_config: config_schema.BuildOperate
     ) -> xr.Dataset:
         """Slice the input data to just the length of operate mode time horizon.
 
diff --git a/src/calliope/preprocess/model_data.py b/src/calliope/preprocess/model_data.py
index 89b21386..b4b3bad0 100644
--- a/src/calliope/preprocess/model_data.py
+++ b/src/calliope/preprocess/model_data.py
@@ -15,8 +15,8 @@
 
 from calliope import exceptions
 from calliope.attrdict import AttrDict
-from calliope.config import Init
 from calliope.preprocess import data_tables, time
+from calliope.schemas.config_schema import Init
 from calliope.util.schema import MODEL_SCHEMA, validate_dict
 from calliope.util.tools import climb_template_tree, listify
 
diff --git a/src/calliope/schemas/__init__.py b/src/calliope/schemas/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/calliope/config.py b/src/calliope/schemas/config_schema.py
similarity index 100%
rename from src/calliope/config.py
rename to src/calliope/schemas/config_schema.py
diff --git a/tests/conftest.py b/tests/conftest.py
index 0334d0b4..f85c7593 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,10 +5,10 @@
 import pytest
 import xarray as xr
 
-from calliope import config
 from calliope.attrdict import AttrDict
 from calliope.backend import latex_backend_model, pyomo_backend_model
 from calliope.preprocess import CalliopeMath
+from calliope.schemas import config_schema
 from calliope.util.schema import MODEL_SCHEMA, extract_from_schema
 
 from .common.util import build_test_model as build_model
@@ -34,7 +34,7 @@ def foreach(request):
 
 @pytest.fixture(scope="session")
 def config_defaults():
-    return AttrDict(config.CalliopeConfig().model_dump())
+    return AttrDict(config_schema.CalliopeConfig().model_dump())
 
 
 @pytest.fixture(scope="session")

From 46ae5a005f0d3c8443d3b14c1cd92cde5ae81f45 Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Tue, 19 Nov 2024 10:14:09 +0100
Subject: [PATCH 08/12] Add data table schema w/ tests

---
 src/calliope/schemas/config_schema.py     | 25 +------
 src/calliope/schemas/data_table_schema.py | 84 +++++++++++++++++++++++
 src/calliope/util/schema.py               | 26 ++++++-
 tests/test_data_table_schema.py           | 77 +++++++++++++++++++++
 4 files changed, 189 insertions(+), 23 deletions(-)
 create mode 100644 src/calliope/schemas/data_table_schema.py
 create mode 100644 tests/test_data_table_schema.py

diff --git a/src/calliope/schemas/config_schema.py b/src/calliope/schemas/config_schema.py
index 4e55beeb..e9113ef6 100644
--- a/src/calliope/schemas/config_schema.py
+++ b/src/calliope/schemas/config_schema.py
@@ -2,39 +2,20 @@
 # Licensed under the Apache 2.0 License (see LICENSE file).
 """Implements the Calliope configuration class."""
 
-from collections.abc import Hashable
 from datetime import datetime
 from pathlib import Path
-from typing import Annotated, Literal, Self, TypeVar
+from typing import Literal, Self
 
 import jsonref
-from pydantic import AfterValidator, BaseModel, Field, model_validator
-from pydantic_core import PydanticCustomError
+from pydantic import BaseModel, Field, model_validator
 
 from calliope.attrdict import AttrDict
 from calliope.util import tools
+from calliope.util.schema import UniqueList
 
 MODES_T = Literal["plan", "operate", "spores"]
 CONFIG_T = Literal["init", "build", "solve"]
 
-# ==
-# Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909
-T = TypeVar("T", bound=Hashable)
-
-
-def _validate_unique_list(v: list[T]) -> list[T]:
-    if len(v) != len(set(v)):
-        raise PydanticCustomError("unique_list", "List must be unique")
-    return v
-
-
-UniqueList = Annotated[
-    list[T],
-    AfterValidator(_validate_unique_list),
-    Field(json_schema_extra={"uniqueItems": True}),
-]
-# ==
-
 
 def hide_from_schema(to_hide: list[str]):
     """Hide fields from the generated schema.
diff --git a/src/calliope/schemas/data_table_schema.py b/src/calliope/schemas/data_table_schema.py
new file mode 100644
index 00000000..c693150f
--- /dev/null
+++ b/src/calliope/schemas/data_table_schema.py
@@ -0,0 +1,84 @@
+"""Implements the data table configuration class."""
+
+from typing import Self
+
+from pydantic import BaseModel, model_validator
+
+from calliope.util.schema import AttrStr, UniqueList
+from calliope.util.tools import listify
+
+# Get rid of pyright false negatives (see https://github.com/microsoft/pylance-release/issues/5457)
+# pyright: reportInvalidTypeForm=false
+
+
+class DataTable(BaseModel):
+    """Data table validation model."""
+
+    data: str
+    """
+    Absolute or relative filepath.
+    Relative paths are based on the model config file used to initialise the model.
+    """
+    rows: None | AttrStr | UniqueList[AttrStr] = None
+    """
+    Names of dimensions defined row-wise.
+    Each name should correspond to a column in your data that contains index items.
+    These columns must be to the left of the columns containing your data.
+    """
+    columns: None | AttrStr | UniqueList[AttrStr] = None
+    """
+    Names of dimensions defined column-wise.
+    Each name should correspond to a row in your data that contains index items.
+    These rows must be above the rows containing your data.
+    """
+    select: None | dict[AttrStr, AttrStr | UniqueList[AttrStr]] = None
+    """
+    Select one or more index item from a dimension.
+    Selection takes place before `drop` and `add_dims`, so you can select a single
+    value from a data dimension and then drop the dimension so it doesn't find its way
+    through to the final dataset.
+    """
+    drop: None | AttrStr | UniqueList[AttrStr] = None
+    """
+    Enables removing rows and/or columns that contain irrelevant data/metadata.
+    These could include comments on the source of the data, the data license, or the parameter units.
+    You can also drop a dimension and then reintroduce it in `add_dims`, but with different index items.
+    """
+    add_dims: None | dict[AttrStr, AttrStr] = None
+    """
+    Data dimensions to add after loading in the array.
+    These allow you to use the same file to assign values to different parameters/dimension index items
+    (e.g., setting `flow_cap_min` and `flow_cap_max` to the same value),
+    or to add a dimension which would otherwise be a column containing the same information in each row
+    (e.g., assigning the cost class to monetary for a file containing cost data).
+    """
+    rename_dims: None | dict[AttrStr, AttrStr] = None
+    """
+    Mapping between dimension names in the data table being loaded to equivalent Calliope dimension names.
+    For instance, the "time" column in the data table would need to be mapped to "timesteps": `{"time": "timesteps"}`.
+    """
+    template: None | AttrStr = None
+    """
+    Reference to a template from which to inherit common configuration options.
+    """
+
+    @model_validator(mode="after")
+    def check_row_and_columns(self) -> Self:
+        """Ensure users specify a valid data table shape."""
+        rows = set(listify(self.rows))
+        columns = set(listify(self.columns))
+        if not rows and not columns:
+            raise ValueError("Either row or columns must be defined for data_table.")
+        elif rows & columns:
+            raise ValueError("Rows and columns must not overlap.")
+
+        if self.add_dims:
+            if self.add_dims.keys() & (rows | columns):
+                raise ValueError("Added dimensions must not be in columns or rows.")
+
+        if self.rename_dims:
+            if set(self.rename_dims.values()) - (rows | columns):
+                raise ValueError(
+                    "Renamed dimensions must be in either rows or columns."
+                )
+        return self
diff --git a/src/calliope/util/schema.py b/src/calliope/util/schema.py
index 361cd9a9..f207c35a 100644
--- a/src/calliope/util/schema.py
+++ b/src/calliope/util/schema.py
@@ -5,10 +5,13 @@
 import importlib
 import re
 import sys
+from collections.abc import Hashable
 from copy import deepcopy
-from typing import Literal
+from typing import Annotated, Literal, TypeVar
 
 import jsonschema
+from pydantic import AfterValidator, Field, constr
+from pydantic_core import PydanticCustomError
 
 from calliope.attrdict import AttrDict
 from calliope.exceptions import print_warnings_and_raise_errors
@@ -19,6 +22,27 @@
 DATA_TABLE_SCHEMA = load_config("data_table_schema.yaml")
 MATH_SCHEMA = load_config("math_schema.yaml")
 
+# Regular string pattern for most calliope attributes
+FIELD_REGEX = r"^[^_^\d][\w]*$"
+AttrStr = constr(pattern=FIELD_REGEX)
+# ==
+# Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909
+T = TypeVar("T", bound=Hashable)
+
+
+def _validate_unique_list(v: list[T]) -> list[T]:
+    if len(v) != len(set(v)):
+        raise PydanticCustomError("unique_list", "List must be unique")
+    return v
+
+
+UniqueList = Annotated[
+    list[T],
+    AfterValidator(_validate_unique_list),
+    Field(json_schema_extra={"uniqueItems": True}),
+]
+# ==
+
 
 def reset():
     """Reset all module-level schema to the pre-defined dictionaries."""
diff --git a/tests/test_data_table_schema.py b/tests/test_data_table_schema.py
new file mode 100644
index 00000000..4e7b5c81
--- /dev/null
+++ b/tests/test_data_table_schema.py
@@ -0,0 +1,77 @@
+"""Test data table schema validation."""
+
+import pytest
+from pydantic import ValidationError
+
+from calliope.attrdict import AttrDict
+from calliope.schemas.data_table_schema import DataTable
+
+from .common.util import check_error_or_warning
+
+FULL_TABLE_CONFIG = """
+data: time_varying_df
+rows: timesteps
+columns: [comment, nodes, techs]
+select:
+    nodes: [node1, node2]
+    techs: pv
+drop: comment
+add_dims:
+    parameters: something
+    costs: monetary
+rename_dims:
+    location: nodes
+template: some_template
+"""
+
+
+@pytest.mark.parametrize(
+    "data_table",
+    [{"rows": "timesteps"}, {"rows": "timesteps", "columns": ["techs", "nodes"]}],
+)
+def test_path_not_provided(data_table):
+    """Not providing the path should result in a failure."""
+    with pytest.raises(ValidationError):
+        DataTable(**data_table)
+
+
+@pytest.mark.parametrize("data_table", [{"data": "foo"}])
+def test_incomplete_column_or_row(data_table):
+    """Not providing either rows or columns is invalid."""
+    with pytest.raises(ValidationError) as excinfo:
+        DataTable(**data_table)
+    assert check_error_or_warning(
+        excinfo, "Either row or columns must be defined for data_table."
+    )
+
+
+@pytest.mark.parametrize(
+    ("rows", "columns"),
+    [
+        ("nodes", "nodes"),
+        (["nodes", "techs"], "techs"),
+        (["nodes", "techs", "params"], ["params", "costs"]),
+    ],
+)
+def test_row_column_overlap(rows, columns):
+    """Rows and columns must not share any similar values."""
+    with pytest.raises(ValidationError) as excinfo:
+        DataTable(data="foobar", rows=rows, columns=columns)
+    assert check_error_or_warning(excinfo, "Rows and columns must not overlap.")
+
+
+@pytest.mark.parametrize(
+    ("rows", "columns", "add_dims"), [("nodes", None, {"nodes": "MEX"})]
+)
+def test_add_dims_overlap(rows, columns, add_dims):
+    with pytest.raises(ValidationError) as excinfo:
+        DataTable(data="foo", rows=rows, columns=columns, add_dims=add_dims)
+    assert check_error_or_warning(
+        excinfo, "Added dimensions must not be in columns or rows."
+    )
+
+
+@pytest.mark.parametrize("data_table", [FULL_TABLE_CONFIG])
+def test_full_table_config(data_table):
+    """Test a fully fledged data table configuration."""
+    DataTable(**AttrDict.from_yaml_string(data_table))

From 318666006de86c746ca96ee1bea22dee875c35d4 Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Tue, 19 Nov 2024 17:17:58 +0100
Subject: [PATCH 09/12] Use Annotated for regex strings

---
 src/calliope/schemas/data_table_schema.py |  6 +----
 src/calliope/util/schema.py               |  5 ++--
 tests/test_data_table_schema.py           | 29 +++++++++++++++++++----
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/calliope/schemas/data_table_schema.py b/src/calliope/schemas/data_table_schema.py
index c693150f..0638d226 100644
--- a/src/calliope/schemas/data_table_schema.py
+++ b/src/calliope/schemas/data_table_schema.py
@@ -1,15 +1,11 @@
 """Implements the data table configuration class."""
 
-from typing import Self
-
 from pydantic import BaseModel, model_validator
+from typing_extensions import Self
 
 from calliope.util.schema import AttrStr, UniqueList
 from calliope.util.tools import listify
 
-# Get rid of pyright false negatives (see https://github.com/microsoft/pylance-release/issues/5457)
-# pyright: reportInvalidTypeForm=false
-
 
 class DataTable(BaseModel):
     """Data table validation model."""
diff --git a/src/calliope/util/schema.py b/src/calliope/util/schema.py
index f207c35a..f8cb7b6c 100644
--- a/src/calliope/util/schema.py
+++ b/src/calliope/util/schema.py
@@ -10,7 +10,7 @@
 from typing import Annotated, Literal, TypeVar
 
 import jsonschema
-from pydantic import AfterValidator, Field, constr
+from pydantic import AfterValidator, Field
 from pydantic_core import PydanticCustomError
 
 from calliope.attrdict import AttrDict
@@ -24,7 +24,8 @@
 
 # Regular string pattern for most calliope attributes
 FIELD_REGEX = r"^[^_^\d][\w]*$"
-AttrStr = constr(pattern=FIELD_REGEX)
+AttrStr = Annotated[str, Field(pattern=FIELD_REGEX)]
+
 # ==
 # Taken from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1670475909
 T = TypeVar("T", bound=Hashable)
diff --git a/tests/test_data_table_schema.py b/tests/test_data_table_schema.py
index 4e7b5c81..ba4d76b4 100644
--- a/tests/test_data_table_schema.py
+++ b/tests/test_data_table_schema.py
@@ -1,5 +1,7 @@
 """Test data table schema validation."""
 
+from pathlib import Path
+
 import pytest
 from pydantic import ValidationError
 
@@ -8,8 +10,10 @@
 
 from .common.util import check_error_or_warning
 
-FULL_TABLE_CONFIG = """
-data: time_varying_df
+
+@pytest.fixture
+def full_data_table_config():
+    return """data: time_varying_df
 rows: timesteps
 columns: [comment, nodes, techs]
 select:
@@ -25,6 +29,16 @@
 """
 
 
+@pytest.fixture
+def model_yaml_data_tables() -> AttrDict:
+    return AttrDict.from_yaml(
+        Path(__file__).parent
+        / "common"
+        / "national_scale_from_data_tables"
+        / "model.yaml"
+    )
+
+
 @pytest.mark.parametrize(
     "data_table",
     [{"rows": "timesteps"}, {"rows": "timesteps", "columns": ["techs", "nodes"]}],
@@ -71,7 +85,12 @@ def test_add_dims_overlap(rows, columns, add_dims):
     )
 
 
-@pytest.mark.parametrize("data_table", [FULL_TABLE_CONFIG])
-def test_full_table_config(data_table):
+def test_full_table_config(full_data_table_config):
     """Test a fully fledged data table configuration."""
-    DataTable(**AttrDict.from_yaml_string(data_table))
+    DataTable(**AttrDict.from_yaml_string(full_data_table_config))
+
+
+def test_data_table_model(model_yaml_data_tables):
+    """Data table validation must conform to expected usage."""
+    for data_table in model_yaml_data_tables["data_tables"].values():
+        DataTable(**data_table)

From e35ee0eafde6f11ff8800ef63a0e2f65bb6cbdcc Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Wed, 20 Nov 2024 17:24:09 +0100
Subject: [PATCH 10/12] Add config obj:  simplification suggestions (#711)

* Removed mode redundancy to simplify the configuration
* Simplify config schema extraction
---
 docs/hooks/generate_readable_schema.py |  6 +--
 requirements/base.txt                  |  1 +
 src/calliope/attrdict.py               | 20 +++----
 src/calliope/backend/where_parser.py   |  3 +-
 src/calliope/config.py                 | 74 +++++---------------------
 src/calliope/model.py                  | 30 +++++------
 6 files changed, 39 insertions(+), 95 deletions(-)

diff --git a/docs/hooks/generate_readable_schema.py b/docs/hooks/generate_readable_schema.py
index 7bcfa205..be72c513 100644
--- a/docs/hooks/generate_readable_schema.py
+++ b/docs/hooks/generate_readable_schema.py
@@ -14,15 +14,13 @@
 import jsonschema2md
 from mkdocs.structure.files import File
 
-from calliope import AttrDict, config
+from calliope import config
 from calliope.util import schema
 
 TEMPDIR = tempfile.TemporaryDirectory()
 
 SCHEMAS = {
-    "config_schema": AttrDict.from_yaml_string(
-        config.CalliopeConfig().model_yaml_schema()
-    ),
+    "config_schema": config.CalliopeConfig().model_no_ref_schema(),
     "model_schema": schema.MODEL_SCHEMA,
     "math_schema": schema.MATH_SCHEMA,
     "data_table_schema": schema.DATA_TABLE_SCHEMA,
diff --git a/requirements/base.txt b/requirements/base.txt
index 6305e13d..65e0713e 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -15,3 +15,4 @@ pyparsing >= 3.0, < 3.1
 ruamel.yaml >= 0.18, < 0.19
 typing-extensions >= 4, < 5
 xarray >= 2024.1, < 2024.4
+pydantic >= 2.9.2
diff --git a/src/calliope/attrdict.py b/src/calliope/attrdict.py
index f17cf0ef..3e12e402 100644
--- a/src/calliope/attrdict.py
+++ b/src/calliope/attrdict.py
@@ -329,12 +329,8 @@ def as_dict_flat(self):
             d[k] = self.get_key(k)
         return d
 
-    def to_yaml(self, path=None):
-        """Conversion to YAML.
-
-        Saves the AttrDict to the ``path`` as a YAML file or returns a YAML string
-        if ``path`` is None.
-        """
+    def to_yaml(self, path: str | None = None) -> str:
+        """Return a serialised YAML string."""
         result = self.copy()
         yaml_ = ruamel_yaml.YAML()
         yaml_.indent = 2
@@ -359,13 +355,13 @@ def to_yaml(self, path=None):
         # handle multi-line strings.
         walk_tree(result)
 
-        if path is not None:
+        stream = io.StringIO()
+        yaml_.dump(result, stream)
+        yaml_str = stream.getvalue()
+        if path:
             with open(path, "w") as f:
-                yaml_.dump(result, f)
-        else:
-            stream = io.StringIO()
-            yaml_.dump(result, stream)
-            return stream.getvalue()
+                f.write(yaml_str)
+        return yaml_str
 
     def keys_nested(self, subkeys_as="list"):
         """Returns all keys in the AttrDict, including nested keys.
diff --git a/src/calliope/backend/where_parser.py b/src/calliope/backend/where_parser.py
index 06f782f6..ad020958 100644
--- a/src/calliope/backend/where_parser.py
+++ b/src/calliope/backend/where_parser.py
@@ -13,16 +13,15 @@
 import xarray as xr
 from typing_extensions import NotRequired, TypedDict
 
+from calliope import config
 from calliope.backend import expression_parser
 from calliope.exceptions import BackendError
 
 if TYPE_CHECKING:
-    from calliope import config
     from calliope.backend.backend_model import BackendModel
 
 
 pp.ParserElement.enablePackrat()
-
 BOOLEANTYPE = np.bool_ | np.typing.NDArray[np.bool_]
 
 
diff --git a/src/calliope/config.py b/src/calliope/config.py
index e07ee2cb..68af4c11 100644
--- a/src/calliope/config.py
+++ b/src/calliope/config.py
@@ -5,7 +5,7 @@
 from collections.abc import Hashable
 from datetime import datetime
 from pathlib import Path
-from typing import Annotated, Literal, Self, TypeVar, get_args, overload
+from typing import Annotated, Literal, Self, TypeVar
 
 import jsonref
 from pydantic import AfterValidator, BaseModel, Field, model_validator
@@ -82,27 +82,16 @@ def update(self, update_dict: dict, deep: bool = False) -> Self:
         self._kwargs = update_dict
         return updated
 
-    @overload
-    def model_yaml_schema(self, filepath: str | Path) -> None: ...
-
-    @overload
-    def model_yaml_schema(self, filepath: None = None) -> str: ...
-
-    def model_yaml_schema(self, filepath: str | Path | None = None) -> None | str:
-        """Generate a YAML schema for the class.
-
-        Args:
-            filepath (str | Path | None, optional): If given, save schema to given path. Defaults to None.
+    def model_no_ref_schema(self) -> AttrDict:
+        """Generate an AttrDict with the schema replacing $ref/$def for better readability.
 
         Returns:
-            None | str: If `filepath` is given, returns None. Otherwise, returns the YAML string.
+            AttrDict: class schema.
         """
-        # By default, the schema uses $ref/$def cross-referencing for each pydantic model class,
-        # but this isn't very readable when rendered in our documentation.
-        # So, we resolve references and then delete all the `$defs`
-        schema_dict = AttrDict(jsonref.replace_refs(self.model_json_schema()))
+        schema_dict = AttrDict(super().model_json_schema())
+        schema_dict = AttrDict(jsonref.replace_refs(schema_dict))
         schema_dict.del_key("$defs")
-        return schema_dict.to_yaml(filepath)
+        return schema_dict
 
     @property
     def applied_keyword_overrides(self) -> dict:
@@ -114,21 +103,6 @@ def applied_keyword_overrides(self) -> dict:
         return self._kwargs
 
 
-class ModeBaseModel(ConfigBaseModel):
-    """Mode-specific configuration, which will be hidden from the string representation of the model if that mode is not activated."""
-
-    mode: MODES_T = Field(default="plan")
-    """Mode in which to run the optimisation."""
-
-    @model_validator(mode="after")
-    def update_repr(self) -> Self:
-        """Hide config from model string representation if mode is not activated."""
-        for key, val in self.model_fields.items():
-            if key in get_args(MODES_T):
-                val.repr = self.mode == key
-        return self
-
-
 class Init(ConfigBaseModel):
     """All configuration options used when initialising a Calliope model."""
 
@@ -221,7 +195,7 @@ class BuildOperate(ConfigBaseModel):
     """Which time window to build. This is used to track the window when re-building the model part way through solving in `operate` mode."""
 
 
-class Build(ModeBaseModel):
+class Build(ConfigBaseModel):
     """Base configuration options used when building a Calliope optimisation problem (`calliope.Model.build`)."""
 
     model_config = {
@@ -229,6 +203,10 @@ class Build(ModeBaseModel):
         "extra": "allow",
         "revalidate_instances": "always",
     }
+
+    mode: MODES_T = Field(default="plan")
+    """Mode in which to run the optimisation."""
+
     add_math: UniqueList[str] = Field(default=[])
     """
     List of references to files which contain additional mathematical formulations to be applied on top of or instead of the base mode math.
@@ -301,14 +279,13 @@ def require_save_per_spore_path(self) -> Self:
         return self
 
 
-class Solve(ModeBaseModel):
+class Solve(ConfigBaseModel):
     """Base configuration options used when solving a Calliope optimisation problem (`calliope.Model.solve`)."""
 
     model_config = {
         "title": "solve",
         "extra": "forbid",
         "revalidate_instances": "always",
-        "json_schema_extra": hide_from_schema(["mode"]),
     }
 
     save_logs: Path | None = Field(default=None)
@@ -342,28 +319,3 @@ class CalliopeConfig(ConfigBaseModel):
     init: Init = Init()
     build: Build = Build()
     solve: Solve = Solve()
-
-    @model_validator(mode="before")
-    @classmethod
-    def update_solve_mode(cls, data):
-        """Solve mode should match build mode."""
-        data["solve"]["mode"] = data["build"]["mode"]
-        return data
-
-    def update(self, update_dict: dict, deep: bool = False) -> Self:
-        """Return a new iteration of the model with updated fields.
-
-        Updates are validated and stored in the parent class in the `_kwargs` key.
-
-        Args:
-            update_dict (dict): Dictionary with which to update the base model.
-            deep (bool, optional): Set to True to make a deep copy of the model. Defaults to False.
-
-        Returns:
-            BaseModel: New model instance.
-        """
-        update_dict_temp = AttrDict(update_dict)
-        if update_dict_temp.get_key("build.mode", None) is not None:
-            update_dict_temp.set_key("solve.mode", update_dict_temp["build"]["mode"])
-        updated = super().update(update_dict_temp.as_dict(), deep=deep)
-        return updated
diff --git a/src/calliope/model.py b/src/calliope/model.py
index e6088c21..bc4eb938 100644
--- a/src/calliope/model.py
+++ b/src/calliope/model.py
@@ -267,26 +267,26 @@ def build(
             comment="Model: backend build starting",
         )
 
-        this_build_config = self.config.update({"build": kwargs}).build
-        mode = this_build_config.mode
+        build_config = self.config.update({"build": kwargs}).build
+        mode = build_config.mode
         if mode == "operate":
             if not self._model_data.attrs["allow_operate_mode"]:
                 raise exceptions.ModelError(
                     "Unable to run this model in operate (i.e. dispatch) mode, probably because "
                     "there exist non-uniform timesteps (e.g. from time clustering)"
                 )
-            backend_input = self._prepare_operate_mode_inputs(this_build_config.operate)
+            backend_input = self._prepare_operate_mode_inputs(build_config.operate)
         else:
             backend_input = self._model_data
 
-        init_math_list = [] if this_build_config.ignore_mode_math else [mode]
+        init_math_list = [] if build_config.ignore_mode_math else [mode]
         end_math_list = [] if add_math_dict is None else [add_math_dict]
-        full_math_list = init_math_list + this_build_config.add_math + end_math_list
+        full_math_list = init_math_list + build_config.add_math + end_math_list
         LOGGER.debug(f"Math preprocessing | Loading math: {full_math_list}")
         model_math = preprocess.CalliopeMath(full_math_list, self.config.init.def_path)
 
         self.backend = backend.get_model_backend(
-            this_build_config, backend_input, model_math
+            build_config, backend_input, model_math
         )
         self.backend.add_optimisation_components()
 
@@ -341,26 +341,24 @@ def solve(self, force: bool = False, warmstart: bool = False, **kwargs) -> None:
         else:
             to_drop = []
 
-        kwargs["mode"] = self.config.build.applied_keyword_overrides.get(
-            "mode", self.config.build.mode
-        )
-
-        this_solve_config = self.config.update({"solve": kwargs}).solve
+        solve_config = self.config.update({"solve": kwargs}).solve
+        # FIXME: find a way to avoid overcomplicated passing of settings between modes
+        mode = self.config.update(self.config.applied_keyword_overrides).build.mode
         self._model_data.attrs["timestamp_solve_start"] = log_time(
             LOGGER,
             self._timings,
             "solve_start",
-            comment=f"Optimisation model | starting model in {this_solve_config.mode} mode.",
+            comment=f"Optimisation model | starting model in {mode} mode.",
         )
 
-        shadow_prices = this_solve_config.shadow_prices
+        shadow_prices = solve_config.shadow_prices
         self.backend.shadow_prices.track_constraints(shadow_prices)
 
-        if this_solve_config.mode == "operate":
-            results = self._solve_operate(**this_solve_config.model_dump())
+        if mode == "operate":
+            results = self._solve_operate(**solve_config.model_dump())
         else:
             results = self.backend._solve(
-                warmstart=warmstart, **this_solve_config.model_dump()
+                warmstart=warmstart, **solve_config.model_dump()
             )
 
         log_time(

From 2c9a36d01f439556b6ea408457f8ffaaf6ed3746 Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Thu, 21 Nov 2024 10:27:55 +0100
Subject: [PATCH 11/12] fix yaml call merge bug

---
 src/calliope/attrdict.py              | 2 +-
 src/calliope/backend/backend_model.py | 2 +-
 src/calliope/io.py                    | 2 +-
 src/calliope/preprocess/scenarios.py  | 4 ++--
 tests/test_core_attrdict.py           | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/calliope/attrdict.py b/src/calliope/attrdict.py
index 94b23164..fa9ebbf7 100644
--- a/src/calliope/attrdict.py
+++ b/src/calliope/attrdict.py
@@ -369,7 +369,7 @@ def save_yaml(self, path: str) -> None:
         Args:
             path (str): path of saved YAML.
         """
-        yaml_str = self.as_yaml_str()
+        yaml_str = self.to_yaml()
         with open(path, "w") as f:
             f.write(yaml_str)
 
diff --git a/src/calliope/backend/backend_model.py b/src/calliope/backend/backend_model.py
index 57745f68..40a687ef 100644
--- a/src/calliope/backend/backend_model.py
+++ b/src/calliope/backend/backend_model.py
@@ -447,7 +447,7 @@ def _add_to_dataset(
                 yaml_snippet_attrs[attr] = val
 
         if yaml_snippet_attrs:
-            add_attrs["yaml_snippet"] = AttrDict(yaml_snippet_attrs).as_yaml_str()
+            add_attrs["yaml_snippet"] = AttrDict(yaml_snippet_attrs).to_yaml()
 
         da.attrs = {
             "obj_type": obj_type,
diff --git a/src/calliope/io.py b/src/calliope/io.py
index cd1f19f9..205ffe7f 100644
--- a/src/calliope/io.py
+++ b/src/calliope/io.py
@@ -70,7 +70,7 @@ def _serialise(attrs: dict) -> None:
     dict_attrs = [k for k, v in attrs.items() if isinstance(v, dict)]
     attrs["serialised_dicts"] = dict_attrs
     for attr in dict_attrs:
-        attrs[attr] = AttrDict(attrs[attr]).as_yaml_str()
+        attrs[attr] = AttrDict(attrs[attr]).to_yaml()
 
     # Convert boolean attrs to ints
     bool_attrs = [k for k, v in attrs.items() if isinstance(v, bool)]
diff --git a/src/calliope/preprocess/scenarios.py b/src/calliope/preprocess/scenarios.py
index a5ed5fe5..9673defb 100644
--- a/src/calliope/preprocess/scenarios.py
+++ b/src/calliope/preprocess/scenarios.py
@@ -88,11 +88,11 @@ def load_scenario_overrides(
     return (model_def_with_overrides, ";".join(applied_overrides))
 
 
-def _combine_overrides(overrides: AttrDict, scenario_overrides: list):
+def _combine_overrides(overrides: AttrDict, scenario_overrides: list[AttrDict]):
     combined_override_dict = AttrDict()
     for override in scenario_overrides:
         try:
-            yaml_string = overrides[override].as_yaml_str()
+            yaml_string = overrides[override].to_yaml()
             override_with_imports = AttrDict.from_yaml_string(yaml_string)
         except KeyError:
             raise exceptions.ModelError(f"Override `{override}` is not defined.")
diff --git a/tests/test_core_attrdict.py b/tests/test_core_attrdict.py
index 8e677f44..bec72526 100644
--- a/tests/test_core_attrdict.py
+++ b/tests/test_core_attrdict.py
@@ -318,7 +318,7 @@ def test_to_yaml(self, yaml_filepath):
 
     def test_to_yaml_string(self, yaml_filepath):
         d = AttrDict.from_yaml(yaml_filepath)
-        result = d.as_yaml_str()
+        result = d.to_yaml()
         assert "a: 1" in result
 
     def test_import_must_be_list(self):

From a88c18d65d36677d0399ed9e1321e75b117a51ef Mon Sep 17 00:00:00 2001
From: Ivan Ruiz Manuel <72193617+irm-codebase@users.noreply.github.com>
Date: Thu, 21 Nov 2024 20:07:28 +0100
Subject: [PATCH 12/12] Fix attr dict call to yaml

---
 src/calliope/attrdict.py            | 10 ----------
 src/calliope/cli.py                 |  2 +-
 tests/test_core_attrdict.py         |  2 +-
 tests/test_preprocess_model_math.py |  2 +-
 4 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/src/calliope/attrdict.py b/src/calliope/attrdict.py
index fa9ebbf7..3e12e402 100644
--- a/src/calliope/attrdict.py
+++ b/src/calliope/attrdict.py
@@ -363,16 +363,6 @@ def to_yaml(self, path: str | None = None) -> str:
                 f.write(yaml_str)
         return yaml_str
 
-    def save_yaml(self, path: str) -> None:
-        """Save AttrDict as a yaml file.
-
-        Args:
-            path (str): path of saved YAML.
-        """
-        yaml_str = self.to_yaml()
-        with open(path, "w") as f:
-            f.write(yaml_str)
-
     def keys_nested(self, subkeys_as="list"):
         """Returns all keys in the AttrDict, including nested keys.
 
diff --git a/src/calliope/cli.py b/src/calliope/cli.py
index 8e049de9..4059de7e 100644
--- a/src/calliope/cli.py
+++ b/src/calliope/cli.py
@@ -399,4 +399,4 @@ def generate_scenarios(
             }
         }
 
-        AttrDict(scenarios).save_yaml(out_file)
+        AttrDict(scenarios).to_yaml(out_file)
diff --git a/tests/test_core_attrdict.py b/tests/test_core_attrdict.py
index bec72526..c65ab18e 100644
--- a/tests/test_core_attrdict.py
+++ b/tests/test_core_attrdict.py
@@ -307,7 +307,7 @@ def test_to_yaml(self, yaml_filepath):
         d.a_list = [0, 1, 2]
         with tempfile.TemporaryDirectory() as tempdir:
             out_file = os.path.join(tempdir, "test.yaml")
-            d.save_yaml(out_file)
+            d.to_yaml(out_file)
 
             with open(out_file) as f:
                 result = f.read()
diff --git a/tests/test_preprocess_model_math.py b/tests/test_preprocess_model_math.py
index a6b146f5..46af363e 100644
--- a/tests/test_preprocess_model_math.py
+++ b/tests/test_preprocess_model_math.py
@@ -36,7 +36,7 @@ def user_math(dummy_int):
 @pytest.fixture(scope="module")
 def user_math_path(def_path, user_math):
     file_path = def_path / "custom-math.yaml"
-    user_math.save_yaml(def_path / file_path)
+    user_math.to_yaml(def_path / file_path)
     return "custom-math.yaml"