PowerGridModel · mgovers · Aug 21, 2024 · Jul 5, 2024 · Aug 13, 2024 · Aug 13, 2024
diff --git a/src/power_grid_model/_utils.py b/src/power_grid_model/_utils.py
@@ -18,16 +18,17 @@
 from power_grid_model.core.data_handling import OutputType, process_output_component_types
 from power_grid_model.core.dataset_definitions import ComponentType
 from power_grid_model.data_types import (
-    BatchArray,
+    BatchComponentData,
     BatchDataset,
     BatchList,
     Dataset,
     DenseBatchArray,
     PythonDataset,
     SingleArray,
+    SingleComponentData,
     SingleDataset,
     SinglePythonDataset,
-    SparseBatchArray,
+    SparseBatchData,
 )
 from power_grid_model.typing import ComponentAttributeMapping
 
@@ -120,7 +121,7 @@ def get_and_verify_batch_sizes(batch_data: BatchDataset) -> int:
     return n_batch_size
 
 
-def get_batch_size(batch_data: BatchArray) -> int:
+def get_batch_size(batch_data: BatchComponentData) -> int:
     """
     Determine the number of batches and verify the data structure while we're at it.
 
@@ -135,19 +136,21 @@ def get_batch_size(batch_data: BatchArray) -> int:
         # we assume that it is a single batch.
         if batch_data.ndim == 1:
             return 1
-        n_batches = batch_data.shape[0]
-    elif isinstance(batch_data, dict):
+        return batch_data.shape[0]
+
+    if isinstance(batch_data, dict):
         # If the batch data is a dictionary, we assume that it is an indptr/data structure (otherwise it is an
         # invalid dictionary). There is always one indptr more than there are batches.
         if "indptr" not in batch_data:
             raise ValueError("Invalid batch data format, expected 'indptr' and 'data' entries")
-        n_batches = batch_data["indptr"].size - 1
-    else:
-        # If the batch data is not a numpy array and not a dictionary, it is invalid
-        raise ValueError(
-            "Invalid batch data format, expected a 2-d numpy array or a dictionary with an 'indptr' and 'data' entry"
-        )
-    return n_batches
+        indptr = batch_data["indptr"]
+        if isinstance(indptr, np.ndarray):
+            return indptr.size - 1
+
+    # If the batch data is not a numpy array and not a dictionary, it is invalid
+    raise ValueError(
+        "Invalid batch data format, expected a 2-d numpy array or a dictionary with an 'indptr' and 'data' entry"
+    )
 
 
 def split_numpy_array_in_batches(data: DenseBatchArray | SingleArray, component: ComponentType) -> list[np.ndarray]:
@@ -177,7 +180,7 @@ def split_numpy_array_in_batches(data: DenseBatchArray | SingleArray, component:
     )
 
 
-def split_sparse_batches_in_batches(batch_data: SparseBatchArray, component: ComponentType) -> list[np.ndarray]:
+def split_sparse_batches_in_batches(batch_data: SparseBatchData, component: ComponentType) -> list[SingleComponentData]:
     """
     Split a single numpy array representing, a compressed sparse structure, into one or more batches
 
@@ -199,27 +202,36 @@ def split_sparse_batches_in_batches(batch_data: SparseBatchArray, component: Com
     data = batch_data["data"]
     indptr = batch_data["indptr"]
 
-    if not isinstance(data, np.ndarray) or data.ndim != 1:
-        raise TypeError(
-            f"Invalid data type {type(data).__name__} in sparse batch data for '{component}' "
-            "(should be a 1D Numpy structured array (i.e. a single 'table'))."
-        )
+    def _split_buffer(buffer: np.ndarray, scenario: int) -> SingleArray:
+        if not isinstance(buffer, np.ndarray) or buffer.ndim != 1:
+            raise TypeError(
+                f"Invalid data type {type(buffer).__name__} in sparse batch data for '{component}' "
+                "(should be a 1D Numpy structured array (i.e. a single 'table'))."
+            )
 
-    if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer):
-        raise TypeError(
-            f"Invalid indptr data type {type(indptr).__name__} in batch data for '{component}' "
-            "(should be a 1D Numpy array (i.e. a single 'list'), "
-            "containing indices (i.e. integers))."
-        )
+        if not isinstance(indptr, np.ndarray) or indptr.ndim != 1 or not np.issubdtype(indptr.dtype, np.integer):
+            raise TypeError(
+                f"Invalid indptr data type {type(indptr).__name__} in batch data for '{component}' "
+                "(should be a 1D Numpy array (i.e. a single 'list'), "
+                "containing indices (i.e. integers))."
+            )
 
-    if indptr[0] != 0 or indptr[-1] != len(data) or any(indptr[i] > indptr[i + 1] for i in range(len(indptr) - 1)):
-        raise TypeError(
-            f"Invalid indptr in batch data for '{component}' "
-            f"(should start with 0, end with the number of objects ({len(data)}) "
-            "and be monotonic increasing)."
-        )
+        if indptr[0] != 0 or indptr[-1] != len(buffer) or indptr[scenario] > indptr[scenario + 1]:
+            raise TypeError(
+                f"Invalid indptr in batch data for '{component}' "
+                f"(should start with 0, end with the number of objects ({len(buffer)}) "
+                "and be monotonic increasing)."
+            )
+
+        return buffer[indptr[scenario] : indptr[scenario + 1]]
+
+    def _get_scenario(scenario: int) -> SingleComponentData:
+        if isinstance(data, dict):
+            # return {attribute: _split_buffer(attribute_data, scenario) for attribute, attribute_data in data.items()}
+            raise NotImplementedError()  # TODO(mgovers): uncomment when columnar data support is added
+        return _split_buffer(data, scenario)
 
-    return [data[indptr[i] : indptr[i + 1]] for i in range(len(indptr) - 1)]
+    return [_get_scenario(i) for i in range(len(indptr) - 1)]
 
 
 def convert_dataset_to_python_dataset(data: Dataset) -> PythonDataset:

diff --git a/src/power_grid_model/core/buffer_handling.py b/src/power_grid_model/core/buffer_handling.py
@@ -8,14 +8,22 @@
 
 
 from dataclasses import dataclass
-from typing import Mapping, Optional
+from typing import Optional, cast
 
 import numpy as np
 
 from power_grid_model.core.error_handling import VALIDATOR_MSG
 from power_grid_model.core.index_integer import IdxC, IdxNp
 from power_grid_model.core.power_grid_core import IdxPtr, VoidPtr
 from power_grid_model.core.power_grid_meta import ComponentMetaData
+from power_grid_model.data_types import (
+    ComponentData,
+    DenseBatchArray,
+    DenseBatchData,
+    SingleArray,
+    SparseBatchArray,
+    SparseBatchData,
+)
 
 
 @dataclass
@@ -72,12 +80,12 @@ def _get_indptr_view(indptr: np.ndarray) -> IdxPtr:  # type: ignore[valid-type]
     return np.ascontiguousarray(indptr, dtype=IdxNp).ctypes.data_as(IdxPtr)
 
 
-def _get_uniform_buffer_properties(data: np.ndarray) -> BufferProperties:
+def _get_uniform_buffer_properties(data: SingleArray | DenseBatchArray) -> BufferProperties:
     """
     Extract the properties of the uniform batch dataset component.
 
     Args:
-        data (np.ndarray): the dataset component.
+        data (SingleArray | DenseBatchArray): the dataset component.
 
     Raises:
         KeyError: if the dataset component is not sparse.
@@ -105,12 +113,12 @@ def _get_uniform_buffer_properties(data: np.ndarray) -> BufferProperties:
     )
 
 
-def _get_sparse_buffer_properties(data: Mapping[str, np.ndarray]) -> BufferProperties:
+def _get_sparse_buffer_properties(data: SparseBatchArray) -> BufferProperties:
     """
     Extract the properties of the sparse batch dataset component.
 
     Args:
-        data (Mapping[str, np.ndarray]): the sparse dataset component.
+        data (SparseBatchArray): the sparse dataset component.
 
     Raises:
         KeyError: if the dataset component is not sparse.
@@ -147,12 +155,12 @@ def _get_sparse_buffer_properties(data: Mapping[str, np.ndarray]) -> BufferPrope
     )
 
 
-def get_buffer_properties(data: np.ndarray | Mapping[str, np.ndarray]) -> BufferProperties:
+def get_buffer_properties(data: ComponentData) -> BufferProperties:
     """
     Extract the properties of the dataset component
 
     Args:
-        data (np.ndarray | Mapping[str, np.ndarray]): the dataset component.
+        data (ComponentData): the dataset component.
 
     Raises:
         ValueError: if the dataset component contains conflicting or bad data.
@@ -163,7 +171,10 @@ def get_buffer_properties(data: np.ndarray | Mapping[str, np.ndarray]) -> Buffer
     if isinstance(data, np.ndarray):
         return _get_uniform_buffer_properties(data)
 
-    return _get_sparse_buffer_properties(data)
+    if isinstance(data.get("indptr"), np.ndarray) and isinstance(data.get("data"), np.ndarray):
+        return _get_sparse_buffer_properties(cast(SparseBatchArray, data))
+
+    raise NotImplementedError()  # TODO(mgovers): implement columnar data handling
 
 
 def _get_uniform_buffer_view(data: np.ndarray, schema: ComponentMetaData) -> CBuffer:
@@ -188,7 +199,7 @@ def _get_uniform_buffer_view(data: np.ndarray, schema: ComponentMetaData) -> CBu
     )
 
 
-def _get_sparse_buffer_view(data: Mapping[str, np.ndarray], schema: ComponentMetaData) -> CBuffer:
+def _get_sparse_buffer_view(data: SparseBatchArray, schema: ComponentMetaData) -> CBuffer:
     """
     Get a C API compatible view on a sparse buffer.
 
@@ -213,7 +224,7 @@ def _get_sparse_buffer_view(data: Mapping[str, np.ndarray], schema: ComponentMet
     )
 
 
-def get_buffer_view(data: np.ndarray | Mapping[str, np.ndarray], schema: ComponentMetaData) -> CBuffer:
+def get_buffer_view(data: ComponentData, schema: ComponentMetaData) -> CBuffer:
     """
     Get a C API compatible view on a buffer.
 
@@ -227,10 +238,13 @@ def get_buffer_view(data: np.ndarray | Mapping[str, np.ndarray], schema: Compone
     if isinstance(data, np.ndarray):
         return _get_uniform_buffer_view(data, schema)
 
-    return _get_sparse_buffer_view(data, schema)
+    if isinstance(data.get("indptr"), np.ndarray) and isinstance(data.get("data"), np.ndarray):
+        return _get_sparse_buffer_view(cast(SparseBatchArray, data), schema)
+
+    raise NotImplementedError()  # TODO(mgovers): implement columnar data handling
 
 
-def create_buffer(properties: BufferProperties, schema: ComponentMetaData) -> np.ndarray | dict[str, np.ndarray]:
+def create_buffer(properties: BufferProperties, schema: ComponentMetaData) -> ComponentData:
     """
     Create a buffer with the provided properties and type.
 
@@ -250,7 +264,7 @@ def create_buffer(properties: BufferProperties, schema: ComponentMetaData) -> np
     return _create_uniform_buffer(properties=properties, schema=schema)
 
 
-def _create_uniform_buffer(properties: BufferProperties, schema: ComponentMetaData) -> np.ndarray:
+def _create_uniform_buffer(properties: BufferProperties, schema: ComponentMetaData) -> DenseBatchData:
     """
     Create a uniform buffer with the provided properties and type.
 
@@ -275,7 +289,7 @@ def _create_uniform_buffer(properties: BufferProperties, schema: ComponentMetaDa
     return np.empty(shape=shape, dtype=schema.dtype)
 
 
-def _create_sparse_buffer(properties: BufferProperties, schema: ComponentMetaData) -> dict[str, np.ndarray]:
+def _create_sparse_buffer(properties: BufferProperties, schema: ComponentMetaData) -> SparseBatchData:
     """
     Create a sparse buffer with the provided properties and type.
 

diff --git a/src/power_grid_model/core/data_handling.py b/src/power_grid_model/core/data_handling.py
@@ -8,14 +8,11 @@
 
 
 from enum import Enum
-from typing import Mapping
-
-import numpy as np
 
 from power_grid_model.core.dataset_definitions import ComponentType, DatasetType
 from power_grid_model.core.power_grid_dataset import CConstDataset, CMutableDataset
 from power_grid_model.core.power_grid_meta import initialize_array, power_grid_meta_data
-from power_grid_model.data_types import Dataset
+from power_grid_model.data_types import Dataset, SingleDataset
 from power_grid_model.enum import CalculationType
 from power_grid_model.typing import ComponentAttributeMapping, _ComponentAttributeMappingDict
 
@@ -56,7 +53,7 @@ def get_output_type(*, calculation_type: CalculationType, symmetric: bool) -> Ou
     raise NotImplementedError()
 
 
-def prepare_input_view(input_data: Mapping[ComponentType, np.ndarray]) -> CConstDataset:
+def prepare_input_view(input_data: SingleDataset) -> CConstDataset:
     """
     Create a view of the input data in a format compatible with the PGM core libary.
 
@@ -70,7 +67,7 @@ def prepare_input_view(input_data: Mapping[ComponentType, np.ndarray]) -> CConst
     return CConstDataset(input_data, dataset_type=DatasetType.input)
 
 
-def prepare_update_view(update_data: Mapping[ComponentType, np.ndarray | Mapping[str, np.ndarray]]) -> CConstDataset:
+def prepare_update_view(update_data: Dataset) -> CConstDataset:
     """
     Create a view of the update data, or an empty view if not provided, in a format compatible with the PGM core libary.
 
@@ -84,7 +81,7 @@ def prepare_update_view(update_data: Mapping[ComponentType, np.ndarray | Mapping
     return CConstDataset(update_data, dataset_type=DatasetType.update)
 
 
-def prepare_output_view(output_data: Mapping[ComponentType, np.ndarray], output_type: OutputType) -> CMutableDataset:
+def prepare_output_view(output_data: Dataset, output_type: OutputType) -> CMutableDataset:
     """
     create a view of the output data in a format compatible with the PGM core libary.
 
@@ -166,7 +163,7 @@ def process_output_component_types(
     """
     # limit all component count to user specified component types in output and convert to a dict
     if output_component_types is None:
-        output_component_types = {k: None for k in available_components}
+        output_component_types = {ComponentType[k]: None for k in available_components}
     elif isinstance(output_component_types, (list, set)):
         output_component_types = {k: None for k in output_component_types}
     elif not isinstance(output_component_types, dict) or not all(

diff --git a/src/power_grid_model/core/power_grid_dataset.py b/src/power_grid_model/core/power_grid_dataset.py
@@ -27,7 +27,7 @@
     power_grid_core as pgc,
 )
 from power_grid_model.core.power_grid_meta import DatasetMetaData, power_grid_meta_data
-from power_grid_model.data_types import Dataset
+from power_grid_model.data_types import ComponentData, Dataset
 from power_grid_model.errors import PowerGridError
 
 
@@ -186,11 +186,7 @@ class CMutableDataset:
     _mutable_dataset: MutableDatasetPtr
     _buffer_views: list[CBuffer]
 
-    def __new__(
-        cls,
-        data: Mapping[ComponentType, np.ndarray] | Mapping[ComponentType, np.ndarray | Mapping[str, np.ndarray]],
-        dataset_type: Any = None,
-    ):
+    def __new__(cls, data: Dataset, dataset_type: Any = None):
         instance = super().__new__(cls)
         instance._mutable_dataset = MutableDatasetPtr()
         instance._buffer_views = []
@@ -243,10 +239,7 @@ def get_buffer_views(self) -> list[CBuffer]:
         """
         return self._buffer_views
 
-    def _add_data(
-        self,
-        data: Mapping[ComponentType, np.ndarray] | Mapping[ComponentType, np.ndarray | Mapping[str, np.ndarray]],
-    ):
+    def _add_data(self, data: Dataset):
         """
         Add Power Grid Model data to the mutable dataset view.
 
@@ -261,12 +254,7 @@ def _add_data(
         for component, component_data in data.items():
             self._add_component_data(component, component_data, allow_unknown=False)
 
-    def _add_component_data(
-        self,
-        component: ComponentType,
-        data: np.ndarray | Mapping[str, np.ndarray],
-        allow_unknown: bool = False,
-    ):
+    def _add_component_data(self, component: ComponentType, data: ComponentData, allow_unknown: bool = False):
         """
         Add Power Grid Model data for a single component to the mutable dataset view.
 
@@ -301,7 +289,7 @@ def _register_buffer(self, component: ComponentType, buffer: CBuffer):
         )
         assert_no_error()
 
-    def _validate_properties(self, data: np.ndarray | Mapping[str, np.ndarray]):
+    def _validate_properties(self, data: ComponentData):
         properties = get_buffer_properties(data)
         if properties.is_batch != self._is_batch:
             raise ValueError(
@@ -328,11 +316,7 @@ class CConstDataset:
     _const_dataset: ConstDatasetPtr
     _buffer_views: list[CBuffer]
 
-    def __new__(
-        cls,
-        data: Mapping[ComponentType, np.ndarray] | Mapping[ComponentType, np.ndarray | Mapping[str, np.ndarray]],
-        dataset_type: Optional[DatasetType] = None,
-    ):
+    def __new__(cls, data: Dataset, dataset_type: Optional[DatasetType] = None):
         instance = super().__new__(cls)
         instance._const_dataset = ConstDatasetPtr()
 
@@ -419,7 +403,7 @@ def get_data(self) -> Dataset:
         """
         return self._data
 
-    def get_component_data(self, component: ComponentType) -> np.ndarray | Mapping[str, np.ndarray]:
+    def get_component_data(self, component: ComponentType) -> ComponentData:
         """
         Retrieve Power Grid Model data from the dataset for a specific component.