diff --git a/doc/DaphneLib/Overview.md b/doc/DaphneLib/Overview.md index ef53189ee..44fdb30a6 100644 --- a/doc/DaphneLib/Overview.md +++ b/doc/DaphneLib/Overview.md @@ -144,6 +144,8 @@ The results of these expressions again represent DAPHNE matrices/frames/scalars. ### Python Operators +#### Binary Operators + DaphneLib currently supports the following binary operators on DAPHNE matrices/frames/scalars: | Operator | Meaning | @@ -177,10 +179,45 @@ In the future, we will fully support *scalar-`op`-matrix* operations as well as *Examples:* -```r +```python 1.5 * X @ y + 0.001 ``` +#### Indexing + +DaphneLib supports right and left indexing on DAPHNE matrices using Python's square bracket `[]` operator to extract elements from a matrix or set elements into a matrix, respectively. + +Extracting elements (right indexing) supports indexing by integer (e.g., `3`), slice (e.g, `:`, `:3`, `1:4`), or a DaphneLib column matrix of positions (do not need to be unique or sorted). +The extracted elements are always returned as a DaphneLib matrix, even if it is just a single element. + +Setting elements (left indexing) currently only supports indexing by integer and slice. +The elements to insert must always be provided as a DaphneLib matrix, even if it is just a single element. + +*Examples:* + +```python +from daphne.context.daphne_context import DaphneContext + +dc = DaphneContext() + +# 10x5 matrix containing the numbers from 0 to 49. +X = dc.seq(0, 49).reshape(10, 5) + +# Extract rows from 1 (inclusive) to 4 (exclusive). +X[1:4, :].print().compute() + +# Extract column 3. +X[:, 3].print().compute() + +# Extract columns 2 (inclusive) to 4 (exclusive) of rows [0, 3, 6]. +rowIdxs = dc.seq(0, 6, 3) +X[rowIdxs, 2:4].print().compute() + +# Set columns 1 (inclusive) to 4 (exclusive) of row 3 to zero. +X[3, 1:4] = dc.fill(0, 1, 3) +X.print().compute() +``` + ### Matrix/Frame/Scalar Methods DaphneLib's classes `Matrix`, `Frame`, and `Scalar` offer a range of methods to call DAPHNE built-in functions. @@ -188,7 +225,7 @@ A comprehensive list can be found in the [DaphneLib API reference](/doc/DaphneLi *Examples:* -```r +```python X.t() X.sqrt() X.cbind(Y) diff --git a/src/api/python/daphne/operator/nodes/matrix.py b/src/api/python/daphne/operator/nodes/matrix.py index c69b37081..5c39d342e 100644 --- a/src/api/python/daphne/operator/nodes/matrix.py +++ b/src/api/python/daphne/operator/nodes/matrix.py @@ -33,6 +33,7 @@ import json import os from typing import Union, TYPE_CHECKING, Dict, Iterable, Optional, Sequence, List +import copy if TYPE_CHECKING: # to avoid cyclic dependencies during runtime @@ -44,7 +45,8 @@ class Matrix(OperationNode): def __init__(self, daphne_context: 'DaphneContext', operation:str, unnamed_input_nodes:Union[str, Iterable[VALID_INPUT_TYPES]]=None, named_input_nodes:Dict[str, VALID_INPUT_TYPES]=None, - local_data: np.array = None, brackets:bool = False, copy: bool = False)->'Matrix': + local_data: np.array = None, brackets:bool = False, left_brackets: bool = False, copy: bool = False, + consumer_list: List['OperationNode'] = None)->'Matrix': self.__copy = copy is_python_local_data = False if local_data is not None: @@ -53,7 +55,7 @@ def __init__(self, daphne_context: 'DaphneContext', operation:str, unnamed_input is_python_local_data = True else: self._np_array = None - super().__init__(daphne_context, operation, unnamed_input_nodes, named_input_nodes, OutputType.MATRIX,is_python_local_data, brackets) + super().__init__(daphne_context, operation, unnamed_input_nodes, named_input_nodes, OutputType.MATRIX,is_python_local_data, brackets, left_brackets, consumer_list) def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_input_vars: Dict[str, str]) -> str: @@ -160,10 +162,58 @@ def __rne__(self, other) -> 'Matrix': def __matmul__(self, other: 'Matrix') -> 'Matrix': return Matrix(self.daphne_context, '@', [self, other]) - def __getitem__(self, pos): - if not isinstance(pos, int): - i, x = pos - return Matrix(self.daphne_context,'',[self, i, x], brackets=True) + def __getitem__(self, key): + if not isinstance(key, tuple) or len(key) != 2: + raise TypeError("you must specify exactly two dimensions") + + if not(isinstance(key[0], slice) or isinstance(key[0], int), isinstance(key[0], Matrix)) or \ + not(isinstance(key[1], slice) or isinstance(key[1], int), isinstance(key[1], Matrix)): + raise TypeError("keys must be an integer, slice or Matrix") + + if isinstance(key[0], slice): + # create a string for slicing based on slice´s start and stop for row index + row_index = (f'{key[0].start}' if key[0].start is not None else '') + ':' + (f'{key[0].stop}' if key[0].stop is not None else '') + else: + row_index = key[0] + + if isinstance(key[1], slice): + # create a string for slicing based on slice´s start and stop for column index + column_index = (f'{key[1].start}' if key[1].start is not None else '') + ':' + (f'{key[1].stop}' if key[1].stop is not None else '') + else: + column_index = key[1] + + return Matrix(self.daphne_context, None, [self, row_index, column_index], brackets=True) + + def __setitem__(self, key, value): + if not isinstance(key, tuple) or len(key) != 2: + raise TypeError("you must specify exactly two dimensions") + + if not(isinstance(key[0], slice) or isinstance(key[0], int)) or \ + not(isinstance(key[1], slice) or isinstance(key[1], int)): + raise TypeError("keys must be an integer or a slice") + + if isinstance(key[0], slice): + # create a string for slicing based on slice´s start and stop for row index + row_index = (f'{key[0].start}' if key[0].start is not None else '') + ':' + (f'{key[0].stop}' if key[0].stop is not None else '') + else: + row_index = key[0] + + if isinstance(key[1], slice): + # create a string for slicing based on slice´s start and stop for column index + column_index = (f'{key[1].start}' if key[1].start is not None else '') + ':' + (f'{key[1].stop}' if key[1].stop is not None else '') + else: + column_index = key[1] + + # As __setitem__() cannot return anything, but we still want to add a DAPHNE operation to the DAG: + # Firstly, create a new_node that is a copy of the current DAG node. + # Secondly, update the input nodes of all consumers of the current node, to use the new node instead. + # Finally, change the state of the current DAG node to an operation for left indexing. + # TODO Can it happen that deepcopy() copies numpy data backing this Matrix node? + # If so, we should prevent that for performance reasons. + new_node = copy.deepcopy(self) + for consumer in self.consumer_list: + consumer.update_node_in_input_list(new_node, self) + self.__dict__ = Matrix(new_node.daphne_context, None, [new_node, value, row_index, column_index], left_brackets=True).__dict__ def sum(self, axis: int = None) -> 'OperationNode': """Calculate sum of matrix. diff --git a/src/api/python/daphne/operator/operation_node.py b/src/api/python/daphne/operator/operation_node.py index 8b0e79ea1..1a13095aa 100644 --- a/src/api/python/daphne/operator/operation_node.py +++ b/src/api/python/daphne/operator/operation_node.py @@ -42,7 +42,7 @@ import json import os import time -from typing import Dict, Iterable, Optional, Sequence, Union, TYPE_CHECKING +from typing import Dict, Iterable, Optional, Sequence, Union, TYPE_CHECKING, List if TYPE_CHECKING: # to avoid cyclic dependencies during runtime @@ -60,11 +60,14 @@ def __init__(self, daphne_context,operation:str, unnamed_input_nodes: Union[str, Iterable[VALID_INPUT_TYPES]]=None, named_input_nodes: Dict[str, VALID_INPUT_TYPES]=None, output_type:OutputType = OutputType.MATRIX, is_python_local_data: bool = False, - brackets: bool = False): + brackets: bool = False, left_brackets: bool = False, + consumer_list: List['OperationNode'] = None): if unnamed_input_nodes is None: unnamed_input_nodes = [] if named_input_nodes is None: named_input_nodes = [] + if consumer_list is None: + self.consumer_list = [] self.daphne_context = daphne_context self.operation = operation self._unnamed_input_nodes = unnamed_input_nodes @@ -76,8 +79,18 @@ def __init__(self, daphne_context,operation:str, self.daphnedsl_name = "" self._is_python_local_data = is_python_local_data self._brackets = brackets + self._left_brackets = left_brackets self._output_type = output_type + # add this node to the consumer lists of all the nodes it uses. + for i in range(len(unnamed_input_nodes)): + if isinstance(unnamed_input_nodes[i], OperationNode): + self._unnamed_input_nodes[i].consumer_list.append(self) + + def update_node_in_input_list(self, new_node, current_node): + current_index = self._unnamed_input_nodes.index(current_node) + self._unnamed_input_nodes[current_index] = new_node + def compute(self, type="shared memory", verbose=False, asTensorFlow=False, asPyTorch=False, shape=None, useIndexColumn=False): """ Compute function for processing the Daphne Object or operation node and returning the results. @@ -241,6 +254,10 @@ def clear_tmp(self): os.remove(os.path.join(TMP_PATH, f)) def code_line(self, var_name: str, unnamed_input_vars: Sequence[str], named_input_vars: Dict[str, str])->str: + if self._left_brackets: + line_1 = f'{unnamed_input_vars[0]}[{",".join(unnamed_input_vars[2:])}] = {unnamed_input_vars[1]};' + line_2 = f'{var_name} = {unnamed_input_vars[0]};' + return line_1 + "\n" + line_2 if self._brackets: return f'{var_name}={unnamed_input_vars[0]}[{",".join(unnamed_input_vars[1:])}];' if self.operation in BINARY_OPERATIONS: diff --git a/test/api/cli/Utils.h b/test/api/cli/Utils.h index f977ee4e5..7a52e0846 100644 --- a/test/api/cli/Utils.h +++ b/test/api/cli/Utils.h @@ -298,7 +298,6 @@ void checkDaphneFailsSimple(const std::string & dirPath, const std::string & nam * * @param exp The expected output on stdout. * @param scriptFilePath The path to the DaphneDSL script file to execute. - * output. * @param args The arguments to pass in addition to the script's path. Note * that script arguments must be passed via the `--args` option for this * utility function. Despite the variadic template, each element should be of @@ -319,6 +318,34 @@ void compareDaphneToStr(const std::string & exp, const std::string & scriptFileP CHECK(err.str() == ""); } +/** + * @brief Compares the standard output of executing the given Python/DaphneLib + * script to a reference text. + * + * Also checks that the status code indicates a successful execution and that + * nothing was printed to standard error. + * + * @param exp The expected output on stdout. + * @param scriptFilePath The path to the Python/DaphneLib script file to execute. + * @param args The arguments to pass in addition to the script's path. Despite + * the variadic template, each element should be of type `char *`. The last one + * does *not* need to be a null pointer. + */ +template +void compareDaphneLibToStr(const std::string & exp, const std::string & scriptFilePath, Args ... args) { + std::stringstream out; + std::stringstream err; + int status = runDaphneLib(out, err, scriptFilePath.c_str(), args...); + + // Just CHECK (don't REQUIRE) success, such that in case of a failure, the + // checks of out and err still run and provide useful messages. For err, + // don't check empty(), because then catch2 doesn't display the error + // output. + CHECK(status == StatusCode::SUCCESS); + CHECK(out.str() == exp); + CHECK(err.str() == ""); +} + /** * @brief Checks if the numerical values in the standard output of the two given * DaphneDSL script runs are within a relative distance to a reference text. diff --git a/test/api/python/DaphneLibTest.cpp b/test/api/python/DaphneLibTest.cpp index 0eb9f3992..33b3a2d09 100644 --- a/test/api/python/DaphneLibTest.cpp +++ b/test/api/python/DaphneLibTest.cpp @@ -60,6 +60,11 @@ const std::string dirPath = "test/api/python/"; const std::string prefix = dirPath+name; \ compareDaphneToDaphneLib(prefix+".py", prefix+".daphne", argument); \ } +#define MAKE_TEST_CASE_STR(name, str) \ + TEST_CASE(name ".py", TAG_DAPHNELIB) { \ + const std::string prefix = dirPath+name; \ + compareDaphneLibToStr(str, prefix+".py"); \ + } MAKE_TEST_CASE("data_transfer_numpy_1") MAKE_TEST_CASE("data_transfer_numpy_2") @@ -96,6 +101,9 @@ MAKE_TEST_CASE("matrix_agg") MAKE_TEST_CASE("matrix_reorg") MAKE_TEST_CASE("matrix_other") MAKE_TEST_CASE("matrix_preprocessing") +MAKE_TEST_CASE("matrix_indexing_1") +MAKE_TEST_CASE("matrix_indexing_2") +MAKE_TEST_CASE_STR("matrix_indexing_3", "90\n") MAKE_TEST_CASE_SCALAR("numpy_matrix_ops") MAKE_TEST_CASE_SCALAR("numpy_matrix_ops_extended") MAKE_TEST_CASE("numpy_matrix_ops_replace") diff --git a/test/api/python/matrix_indexing_1.daphne b/test/api/python/matrix_indexing_1.daphne new file mode 100644 index 000000000..10d1d0d61 --- /dev/null +++ b/test/api/python/matrix_indexing_1.daphne @@ -0,0 +1,31 @@ +m1 = reshape(seq(1, 100), 10, 10); + +row_ids = seq(0, 2, 1) * 3 + 1; +col_ids = seq(0, 1, 1) * 2; + +# Only row +print(m1[0, :]); +print(m1[0:5, :]); +print(m1[0:, :]); +print(m1[:5, :]); +print(m1[row_ids, :]); + +# Only col +print(m1[:, 0]); +print(m1[:, 0:5]); +print(m1[:, 0:]); +print(m1[:, :5]); +print(m1[:, col_ids]); + +# Row and col +print(m1[0:5, 0]); +print(m1[0, 0:5]); +print(m1[0, 0]); +print(m1[0:5, 0:5]); +print(m1[0:, 0]); +print(m1[0, 0:]); +print(m1[0:, 0:]); +print(m1[:5, 0]); +print(m1[0, :5]); +print(m1[:5, :5]); +print(m1[row_ids, col_ids]); \ No newline at end of file diff --git a/test/api/python/matrix_indexing_1.py b/test/api/python/matrix_indexing_1.py new file mode 100644 index 000000000..1b62ae2ba --- /dev/null +++ b/test/api/python/matrix_indexing_1.py @@ -0,0 +1,38 @@ +#!/usr/bin/python + +from daphne.context.daphne_context import DaphneContext + + +daphne_context = DaphneContext() + +m1 = daphne_context.seq(1, 100).reshape(10, 10) + +row_ids = daphne_context.seq(0, 2, 1) * 3 + 1 +col_ids = daphne_context.seq(0, 1, 1) * 2 + +# Only row +m1[0, :].print().compute() +m1[0:5, :].print().compute() +m1[0:, :].print().compute() +m1[:5, :].print().compute() +m1[row_ids, :].print().compute() + +# Only col +m1[:, 0].print().compute() +m1[:, 0:5].print().compute() +m1[:, 0:].print().compute() +m1[:, :5].print().compute() +m1[:, col_ids].print().compute() + +# Row and col +m1[0:5, 0].print().compute() +m1[0, 0:5].print().compute() +m1[0, 0].print().compute() +m1[0:5, 0:5].print().compute() +m1[0:, 0].print().compute() +m1[0, 0:].print().compute() +m1[0:, 0:].print().compute() +m1[:5, 0].print().compute() +m1[0, :5].print().compute() +m1[:5, :5].print().compute() +m1[row_ids, col_ids].print().compute() diff --git a/test/api/python/matrix_indexing_2.daphne b/test/api/python/matrix_indexing_2.daphne new file mode 100644 index 000000000..687170e58 --- /dev/null +++ b/test/api/python/matrix_indexing_2.daphne @@ -0,0 +1,34 @@ +m1 = fill(123, 10, 10); + +m1[0, 0] = fill(1001, 1, 1); +print(m1); + +m1[0:5, 0] = fill(1002, 5, 1); +print(m1); + +m1[:5, 0] = fill(1003, 5, 1); +print(m1); + +m1[0:, 0] = fill(1004, 10, 1); +print(m1); + +m1[:, 0] = fill(1005, 10, 1); +print(m1); + +m1[0, :] = fill(1006, 1, 10); +print(m1); + +m1[0, 0:] = fill(1007, 1, 10); +print(m1); + +m1[0, :5] = fill(1008, 1, 5); +print(m1); + +m1[0, 0:5] = fill(1009, 1, 5); +print(m1); + +m1[:, 0:5] = fill(1010, 10, 5); +print(m1); + +m1[0:5, :] = fill(1011, 5, 10); +print(m1); \ No newline at end of file diff --git a/test/api/python/matrix_indexing_2.py b/test/api/python/matrix_indexing_2.py new file mode 100644 index 000000000..0e91b21a6 --- /dev/null +++ b/test/api/python/matrix_indexing_2.py @@ -0,0 +1,41 @@ +#!/usr/bin/python + +from daphne.context.daphne_context import DaphneContext + + +daphne_context = DaphneContext() + +m1 = daphne_context.fill(123, 10, 10) + +m1[0, 0] = daphne_context.fill(1001, 1, 1) +m1.print().compute() + +m1[0:5, 0] = daphne_context.fill(1002, 5, 1) +m1.print().compute() + +m1[:5, 0] = daphne_context.fill(1003, 5, 1) +m1.print().compute() + +m1[0:, 0] = daphne_context.fill(1004, 10, 1) +m1.print().compute() + +m1[:, 0] = daphne_context.fill(1005, 10, 1) +m1.print().compute() + +m1[0, :] = daphne_context.fill(1006, 1, 10) +m1.print().compute() + +m1[0, 0:] = daphne_context.fill(1007, 1, 10) +m1.print().compute() + +m1[0, :5] = daphne_context.fill(1008, 1, 5) +m1.print().compute() + +m1[0, 0:5] = daphne_context.fill(1009, 1, 5) +m1.print().compute() + +m1[:, 0:5] = daphne_context.fill(1010, 10, 5) +m1.print().compute() + +m1[0:5, :] = daphne_context.fill(1011, 5, 10) +m1.print().compute() diff --git a/test/api/python/matrix_indexing_3.py b/test/api/python/matrix_indexing_3.py new file mode 100644 index 000000000..6991ca38e --- /dev/null +++ b/test/api/python/matrix_indexing_3.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 + +from daphne.context.daphne_context import DaphneContext +dc = DaphneContext() + +X = dc.fill(10, 3, 3) +Y = X.sum() + +# The presence of this line should not have an impact on Y. +X[1, 1] = dc.fill(0, 1, 1) + +# Should print 90. +Y.print().compute() \ No newline at end of file