Skip to content

Commit

Permalink
Pass objects to custom oracles
Browse files Browse the repository at this point in the history
This replaces the expected/actual values and provides some more context
for the oracles.

This is currently a dictionary, but if #472 were done, we could
replace it with an attrs class.
  • Loading branch information
niknetniko committed Nov 28, 2023
1 parent 43dcc37 commit e056c3a
Show file tree
Hide file tree
Showing 12 changed files with 126 additions and 63 deletions.
25 changes: 9 additions & 16 deletions tested/judge/programmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
from tested.judge.execution import execute_file, filter_files
from tested.judge.utils import BaseExecutionResult, copy_from_paths_to_path, run_command
from tested.languages.generation import generate_custom_evaluator, generate_statement
from tested.oracles.common import BooleanEvalResult
from tested.serialisation import FunctionCall, FunctionType, Value
from tested.oracles.common import BooleanEvalResult, OracleContext
from tested.serialisation import FunctionCall, FunctionType
from tested.testsuite import CustomCheckOracle
from tested.utils import get_identifier

Expand All @@ -29,8 +29,7 @@
def evaluate_programmed(
bundle: Bundle,
evaluator: CustomCheckOracle,
expected: Value,
actual: Value,
context: OracleContext,
) -> BaseExecutionResult | BooleanEvalResult:
"""
Run the custom evaluation. Concerning structure and execution, the custom
Expand All @@ -42,16 +41,15 @@ def evaluate_programmed(

# We have special support for Python.
if evaluator.language == "python" and bundle.config.options.optimized:
return _evaluate_python(bundle, evaluator, expected, actual)
return _evaluate_python(bundle, evaluator, context)
else:
return _evaluate_others(bundle, evaluator, expected, actual)
return _evaluate_others(bundle, evaluator, context)


def _evaluate_others(
bundle: Bundle,
evaluator: CustomCheckOracle,
expected: Value,
actual: Value,
context: OracleContext,
) -> BaseExecutionResult:
"""
Evaluate in all languages but Python. The re-uses the infrastructure of the
Expand Down Expand Up @@ -107,11 +105,7 @@ def _evaluate_others(
# Generate the oracle.
_logger.debug("Generating custom oracle.")
evaluator_name = generate_custom_evaluator(
eval_bundle,
destination=custom_path,
evaluator=evaluator,
expected_value=expected,
actual_value=actual,
eval_bundle, destination=custom_path, evaluator=evaluator, context=context
)
dependencies.append(evaluator_name)
_logger.debug("Generated oracle executor %s", evaluator_name)
Expand Down Expand Up @@ -176,8 +170,7 @@ def _catch_output() -> Generator[tuple[StringIO, StringIO], None, None]:
def _evaluate_python(
bundle: Bundle,
oracle: CustomCheckOracle,
expected: Value,
actual: Value,
context: OracleContext,
) -> BooleanEvalResult:
"""
Run an evaluation in Python. While the templates are still used to generate
Expand Down Expand Up @@ -213,7 +206,7 @@ def _evaluate_python(
check_function_call = FunctionCall(
type=FunctionType.FUNCTION,
name=oracle.function.name,
arguments=[expected, actual, *oracle.arguments],
arguments=[context.as_value(), *oracle.arguments],
)
literal_function_call = generate_statement(eval_bundle, check_function_call)

Expand Down
19 changes: 10 additions & 9 deletions tested/languages/generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from collections.abc import Iterable
from pathlib import Path
from re import Match
from typing import TypeAlias
from typing import TYPE_CHECKING, TypeAlias

from pygments import highlight
from pygments.formatters.html import HtmlFormatter
Expand All @@ -20,7 +20,6 @@
from tested.datatypes import AllTypes, BasicObjectTypes
from tested.dodona import ExtendedMessage
from tested.internationalization import get_i18n_string
from tested.judge.planning import PlannedExecutionUnit
from tested.languages import Language
from tested.languages.conventionalize import (
conventionalize_namespace,
Expand All @@ -41,7 +40,6 @@
FunctionType,
Identifier,
Statement,
Value,
VariableType,
)
from tested.testsuite import (
Expand All @@ -54,6 +52,11 @@
TextData,
)

if TYPE_CHECKING:
from tested.judge.planning import PlannedExecutionUnit
from tested.oracles.common import OracleContext


_logger = logging.getLogger(__name__)
_html_formatter = HtmlFormatter(nowrap=True)

Expand Down Expand Up @@ -248,7 +251,7 @@ def generate_statement(bundle: Bundle, statement: Statement) -> str:
def generate_execution(
bundle: Bundle,
destination: Path,
execution_unit: PlannedExecutionUnit,
execution_unit: "PlannedExecutionUnit",
) -> tuple[str, list[str]]:
"""
Generate the files related to the execution.
Expand Down Expand Up @@ -302,17 +305,15 @@ def generate_custom_evaluator(
bundle: Bundle,
destination: Path,
evaluator: CustomCheckOracle,
expected_value: Value,
actual_value: Value,
context: "OracleContext",
) -> str:
"""
Generate the code for running a programmed oracle.
:param bundle: The configuration bundle.
:param destination: The folder where the code should be generated.
:param evaluator: The oracle data from the test suite.
:param expected_value: The preprocessed expected value.
:param actual_value: The preprocessed actual value.
:param context: Context for the oracle.
:return: The name of the generated file.
"""
Expand All @@ -324,7 +325,7 @@ def generate_custom_evaluator(
type=FunctionType.FUNCTION,
namespace=Identifier(evaluator_name),
name=evaluator.function.name,
arguments=[expected_value, actual_value, *evaluator.arguments],
arguments=[context.as_value(), *evaluator.arguments],
has_root_namespace=False,
)

Expand Down
59 changes: 59 additions & 0 deletions tested/oracles/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,73 @@ def evaluate_text(configs, channel, actual):
from attrs import define, field

from tested.configs import Bundle
from tested.datatypes import BasicObjectTypes, BasicStringTypes
from tested.dodona import Message, Status, StatusMessage
from tested.dsl import parse_string
from tested.languages.generation import generate_statement
from tested.languages.utils import convert_stacktrace_to_clickable_feedback
from tested.parsing import fallback_field, get_converter
from tested.serialisation import ObjectKeyValuePair, ObjectType, StringType, Value
from tested.testsuite import ExceptionOutputChannel, NormalOutputChannel, OutputChannel


@define
class OracleContext:
expected: Value
actual: Value
execution_directory: str
evaluation_directory: str
programming_language: str
natural_language: str

def as_value(self) -> Value:
# TODO: if we remove non-Python, we can pass a class, or otherwise,
# we could re-use parts of the DSL for this...
return ObjectType(
type=BasicObjectTypes.MAP,
data=[
ObjectKeyValuePair(
key=StringType(type=BasicStringTypes.TEXT, data="expected"),
value=self.expected,
),
ObjectKeyValuePair(
key=StringType(type=BasicStringTypes.TEXT, data="actual"),
value=self.actual,
),
ObjectKeyValuePair(
key=StringType(
type=BasicStringTypes.TEXT, data="execution_directory"
),
value=StringType(
type=BasicStringTypes.TEXT, data=self.execution_directory
),
),
ObjectKeyValuePair(
key=StringType(
type=BasicStringTypes.TEXT, data="evaluation_directory"
),
value=StringType(
type=BasicStringTypes.TEXT, data=self.evaluation_directory
),
),
ObjectKeyValuePair(
key=StringType(
type=BasicStringTypes.TEXT, data="programming_language"
),
value=StringType(
type=BasicStringTypes.TEXT, data=self.programming_language
),
),
ObjectKeyValuePair(
key=StringType(type=BasicStringTypes.TEXT, data="natural_language"),
value=StringType(
type=BasicStringTypes.TEXT, data=self.natural_language
),
),
],
)


@define
class OracleResult:
"""
Expand Down
12 changes: 10 additions & 2 deletions tested/oracles/programmed.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from tested.dodona import ExtendedMessage, Message, Permission, Status, StatusMessage
from tested.internationalization import get_i18n_string
from tested.judge.programmed import evaluate_programmed
from tested.judge.programmed import OracleContext, evaluate_programmed
from tested.judge.utils import BaseExecutionResult
from tested.oracles.common import BooleanEvalResult, OracleConfig, OracleResult
from tested.oracles.value import get_values
Expand Down Expand Up @@ -51,8 +51,16 @@ def evaluate(
f"expected: {expected}\n"
f"actual: {actual}"
)
context = OracleContext(
expected=expected,
actual=actual,
execution_directory=str(config.context_dir.absolute()),
evaluation_directory=str(config.bundle.config.resources.absolute()),
programming_language=str(config.bundle.config.programming_language),
natural_language=config.bundle.config.natural_language,
)
result = evaluate_programmed(
config.bundle, evaluator=channel.oracle, expected=expected, actual=actual
config.bundle, evaluator=channel.oracle, context=context
)

if isinstance(result, BaseExecutionResult):
Expand Down
10 changes: 5 additions & 5 deletions tests/exercises/echo-function/evaluation/Evaluator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ public static EvaluationResult Evaluate(Object actual) {
return new EvaluationResult(correct, "correct", actual != null ? actual.ToString() : "", messages);
}

public static EvaluationResult EvaluateValue(Object expected, Object actual) {
public static EvaluationResult EvaluateValue(IDictionary context) {
var messages = new List<Message>() {new Tested.Message("Hallo")};
return new EvaluationResult(expected == actual, expected.ToString(), actual != null ? actual.ToString() : "", messages);
return new EvaluationResult(context["expected"] == context["actual"], context["expected"].ToString(), context["actual"] != null ? context["actual"].ToString() : "", messages);
}
public static EvaluationResult EvaluateValueDsl(Object expected, Object actual) {

public static EvaluationResult EvaluateValueDsl(IDictionary context) {
var messages = new List<Message>() {new Tested.Message("Hallo")};
return new EvaluationResult(expected == actual, null, null, messages, "{5, 5}", "{4, 4}");
return new EvaluationResult(context["expected"] == context["actual"], null, null, messages, "{5, 5}", "{4, 4}");
}
}
10 changes: 5 additions & 5 deletions tests/exercises/echo-function/evaluation/Evaluator.hs
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ evaluate value =
}


evaluate_value :: String -> String -> EvaluationResult
evaluate_value expected actual =
let correct = if actual == expected then True else False
evaluate_value :: Map -> EvaluationResult
evaluate_value context =
let correct = if (lookup "actual" actual) == (lookup "expected" expected) then True else False
in evaluationResult {
result = correct,
readableExpected = Just expected,
readableActual = Just actual,
readableExpected = Just (lookup "expected" expected),
readableActual = Just (lookup "actual" actual),
messages = [message "Hallo"]
}
12 changes: 6 additions & 6 deletions tests/exercises/echo-function/evaluation/Evaluator.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@ public static EvaluationResult evaluate(Object actual) {
.build();
}

public static EvaluationResult evaluateValue(Object expected, Object actual) {
return EvaluationResult.builder(expected.equals(actual))
.withReadableExpected(expected.toString())
.withReadableActual(actual != null ? actual.toString() : "")
public static EvaluationResult evaluateValue(Map<String, Object> context) {
return EvaluationResult.builder(context.get("expected").equals(context.get("actual")))
.withReadableExpected(context.get("expected").toString())
.withReadableActual(context.get("actual") != null ? context.get("actual").toString() : "")
.withMessage(new EvaluationResult.Message("Hallo"))
.build();
}

public static EvaluationResult evaluateValueDsl(Object expected, Object actual) {
return EvaluationResult.builder(expected.equals(actual))
public static EvaluationResult evaluateValueDsl(Map<String, Object> context) {
return EvaluationResult.builder(context.get("expected").equals(context.get("actual")))
.withDslExpected("{5, 5}")
.withDslActual("{4, 4}")
.withMessage(new EvaluationResult.Message("Hallo"))
Expand Down
12 changes: 6 additions & 6 deletions tests/exercises/echo-function/evaluation/Evaluator.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,17 @@ class Evaluator {
}

@JvmStatic
fun evaluateValue(expected: Any, actual: Any?): EvaluationResult {
return EvaluationResult.Builder(result = expected == actual,
readableExpected = expected.toString(),
readableActual = actual?.toString() ?: "")
fun evaluateValue(context: Map<String, Any>): EvaluationResult {
return EvaluationResult.Builder(result = context["expected"] == context["actual"],
readableExpected = context["expected"].toString(),
readableActual = context["actual"]?.toString() ?: "")
.withMessage(EvaluationResult.Message("Hallo"))
.build()
}

@JvmStatic
fun evaluateValueDsl(expected: Any, actual: Any?): EvaluationResult {
return EvaluationResult.Builder(result = expected == actual,
fun evaluateValueDsl(context: Map<String, Any>): EvaluationResult {
return EvaluationResult.Builder(result = context["expected"] == context["actual"],
dslExpected = "{5, 5}",
dslActual = "{4, 4}")
.withMessage(EvaluationResult.Message("Hallo"))
Expand Down
12 changes: 6 additions & 6 deletions tests/exercises/echo-function/evaluation/evaluator.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,18 @@ function evaluate(actual) {
}
}

function evaluateValue(expected, actual) {
function evaluateValue(context) {
return {
"result": expected === actual,
"readable_expected": expected,
"readable_actual": actual,
"result": context.expected === context.actual,
"readable_expected": context.expected,
"readable_actual": context.actual,
"messages": [{"description": "Hallo", "format": "text"}]
}
}

function evaluateValueDsl(expected, actual) {
function evaluateValueDsl(context) {
return {
"result": expected === actual,
"result": context.expected === context.actual,
"dsl_expected": "{5, 5}",
"dsl_actual": "{4, 4}",
"messages": [{"description": "Hallo", "format": "text"}]
Expand Down
8 changes: 4 additions & 4 deletions tests/exercises/echo-function/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@ def evaluate(actual):
return EvaluationResult(correct, "correct", actual, [Message("Hallo")])


def evaluate_value(expected, actual):
return EvaluationResult(expected == actual, expected, actual, [Message("Hallo")])
def evaluate_value(context):
return EvaluationResult(context["expected"] == context["actual"], context["expected"], context["actual"], [Message("Hallo")])


def evaluate_value_dsl(expected, actual):
def evaluate_value_dsl(context):
return EvaluationResult(
result=expected == actual,
result=context["expected"] == context["actual"],
messages=[Message("Hallo")],
dsl_expected="{5, 5}",
dsl_actual="{4, 4}"
Expand Down
6 changes: 3 additions & 3 deletions tests/exercises/echo/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from evaluation_utils import EvaluationResult


def evaluate_correct(expected, actual):
return EvaluationResult(expected.strip() == actual.strip())
def evaluate_correct(context):
return EvaluationResult(context["expected"].strip() == context["actual"].strip())


def evaluate_wrong(_expected, _actual):
def evaluate_wrong(_context):
return EvaluationResult(False)
4 changes: 3 additions & 1 deletion tests/exercises/lotto/evaluation/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def valid_lottery_numbers(number_str, count=6, maximum=42):
return True, None


def evaluate(expected, actual, count, maximum):
def evaluate(context, count, maximum):
expected = context["actual"]
actual = context["actual"]
valid, message = valid_lottery_numbers(actual, count, maximum)
messages = [Message(message)] if message else []
if valid:
Expand Down

0 comments on commit e056c3a

Please sign in to comment.