Skip to content

Commit

Permalink
Upgrade llguidance to 0.5.1rc0 (#1090)
Browse files Browse the repository at this point in the history
- Allow intersection of `pattern`, `format`, and `minLength`/`maxLength`
on strings
- Support for pre-Draft2019-09 `additionalItems`
- Reduce cases where we give a warning for `oneOf` (we can infer cases
where coercion is guaranteed to be safe)
- Correction to JSON `pattern`'s regex anchoring semantics (patterns not
assumed to be anchored by default)
- Minor changes to JSON string generation with `minLength`/`maxLength`:
UTF-16 surrogate pairs are no longer allowed
  • Loading branch information
hudson-ai authored Dec 17, 2024
1 parent d913f68 commit 2fc4151
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 25 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"referencing",
"requests",
"tiktoken>=0.3",
"llguidance==0.5.0",
"llguidance==0.5.1rc0",
]

# Our basic list of 'extras'
Expand Down
100 changes: 76 additions & 24 deletions tests/unit/library/json/test_json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import re
from json import dumps as json_dumps
import warnings

import pytest
from jsonschema import ValidationError, validate
Expand Down Expand Up @@ -345,16 +346,6 @@ def test_regex(self, my_string: str):
# The actual check
generate_and_check(my_string, schema_obj)

def test_regex_no_min_max_length(self):
schema = """{ "type": "string", "pattern": "a[A-Z]", "minLength": 1 }"""
schema_obj = json.loads(schema)

lm = models.Mock("".encode())

with pytest.raises(ValueError) as ve:
lm += gen_json(schema=schema_obj)
assert ve.value.args[0] == "If a pattern is specified, minLength and maxLength must be unspecified."

@pytest.mark.parametrize(
["bad_string", "good_bytes", "failure_byte", "allowed_bytes"],
[
Expand Down Expand Up @@ -574,6 +565,20 @@ def test_unsatisfiable_length(self):
_ = gen_json(schema=schema)
assert ve.value.args[0] == "Unsatisfiable schema: minLength (10) is greater than maxLength (5)"

@pytest.mark.parametrize("length", range(2, 7))
@pytest.mark.parametrize("character", ["a", "b"])
def test_pattern_length_intersection(self, length, character):
schema = {"type": "string", "minLength": 3, "maxLength": 5, "pattern": "^a+$"}
string = character * length
if length < 3 or length > 5 or character != "a":
with pytest.raises(ValidationError):
# Sanity check
validate(instance=string, schema=schema)
check_match_failure(bad_string=json_dumps(string), schema_obj=schema)
else:
# Sanity check
validate(instance=string, schema=schema)
generate_and_check(string, schema)

class TestSimpleObject:
# These are objects without cross references
Expand Down Expand Up @@ -1403,22 +1408,69 @@ def test_oneOf_simple(self, target_obj):
# The actual check
generate_and_check(target_obj, schema_obj)

@pytest.mark.parametrize("target_obj", [123, True])
def test_oneOf_compound(self, target_obj):
schema = """{
"oneOf" : [{ "type": "integer" }, { "type": "boolean" }]
}
"""
# First sanity check what we're setting up
schema_obj = json.loads(schema)
validate(instance=target_obj, schema=schema_obj)
@pytest.mark.parametrize(
"schema, instances",
[
# Simple case, disjoint types
({"oneOf": [{"type": "integer"}, {"type": "boolean"}]}, [123, True]),
# Simple case, disjoint enums
({"oneOf": [{"enum": ["a", "b", "c"]}, {"enum": [1,2,3]}]}, ["a", "b", "c", 1, 2, 3]),
# More complex case, discriminated union
(
{
"oneOf": [
# Only one of them needs the prop key to be required
{"type": "object", "properties": {"prop": {"const": "foo"}}, "required": ["prop"]},
{"type": "object", "properties": {"prop": {"const": "bar"}}}
]
},
[{"prop": "foo"}, {"prop": "bar"}]
),
# Enums made disjoint by type
(
{"oneOf": [{"enum": [1,2,"foo"]}, {"enum": [2,3,"bar"]}], "type": "string"},
["foo", "bar"]
),
]
)
def test_oneOf_disjoint(self, schema, instances):
for instance in instances:
# First sanity check what we're setting up
validate(instance=instance, schema=schema)

# The actual check; we expect a warning here because oneOf is not fully supported
with pytest.warns() as record:
generate_and_check(target_obj, schema_obj)
assert len(record) == 1
assert record[0].message.args[0].startswith("oneOf not fully supported")
# The actual check; we assert NO warning here because oneOf is disjoint
with warnings.catch_warnings():
warnings.simplefilter("error")
generate_and_check(instance, schema)

@pytest.mark.parametrize(
"schema, instances",
[
# Overlapping enums
({"oneOf": [{"enum": ["a", "b", "c"]}, {"enum": ["c", 2, 3]}]}, ["a", "b", 2, 3]),
# More complex case, object without proper discriminator
(
{
"oneOf": [
# Only one of them needs the prop key to be required
{"type": "object", "properties": {"prop": {"const": "foo"}}},
{"type": "object", "properties": {"prop": {"const": "bar"}}}
]
},
[{"prop": "foo"}, {"prop": "bar"}]
)
]
)
def test_oneOf_overlap(self, schema, instances):
for instance in instances:
# First sanity check what we're setting up
validate(instance=instance, schema=schema)

# The actual check; assert a warning here because oneOf is not disjoint and we can't guarantee correctness
with pytest.warns() as record:
generate_and_check(instance, schema)
assert len(record) == 1
assert record[0].message.args[0] == "oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases."

class TestEnum:
simple_schema = """{
Expand Down

0 comments on commit 2fc4151

Please sign in to comment.