Upgrade llguidance to 0.5.1rc0 (#1090)

- Allow intersection of `pattern`, `format`, and `minLength`/`maxLength` on strings - Support for pre-Draft2019-09 `additionalItems` - Reduce cases where we give a warning for `oneOf` (we can infer cases where coercion is guaranteed to be safe) - Correction to JSON `pattern`'s regex anchoring semantics (patterns not assumed to be anchored by default) - Minor changes to JSON string generation with `minLength`/`maxLength`: UTF-16 surrogate pairs are no longer allowed
guidance-ai · Dec 17, 2024 · 2fc4151 · 2fc4151
1 parent d913f68
commit 2fc4151
Show file tree

Hide file tree

Showing 2 changed files with 77 additions and 25 deletions.
diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
     "referencing",
     "requests",
     "tiktoken>=0.3",
-    "llguidance==0.5.0",
+    "llguidance==0.5.1rc0",
 ]
 
 # Our basic list of 'extras'

diff --git a/tests/unit/library/json/test_json.py b/tests/unit/library/json/test_json.py
@@ -1,6 +1,7 @@
 import json
 import re
 from json import dumps as json_dumps
+import warnings
 
 import pytest
 from jsonschema import ValidationError, validate
@@ -345,16 +346,6 @@ def test_regex(self, my_string: str):
         # The actual check
         generate_and_check(my_string, schema_obj)
 
-    def test_regex_no_min_max_length(self):
-        schema = """{ "type": "string", "pattern": "a[A-Z]", "minLength": 1 }"""
-        schema_obj = json.loads(schema)
-
-        lm = models.Mock("".encode())
-
-        with pytest.raises(ValueError) as ve:
-            lm += gen_json(schema=schema_obj)
-        assert ve.value.args[0] == "If a pattern is specified, minLength and maxLength must be unspecified."
-
     @pytest.mark.parametrize(
         ["bad_string", "good_bytes", "failure_byte", "allowed_bytes"],
         [
@@ -574,6 +565,20 @@ def test_unsatisfiable_length(self):
             _ = gen_json(schema=schema)
         assert ve.value.args[0] == "Unsatisfiable schema: minLength (10) is greater than maxLength (5)"
 
+    @pytest.mark.parametrize("length", range(2, 7))
+    @pytest.mark.parametrize("character", ["a", "b"])
+    def test_pattern_length_intersection(self, length, character):
+        schema = {"type": "string", "minLength": 3, "maxLength": 5, "pattern": "^a+$"}
+        string = character * length
+        if length < 3 or length > 5 or character != "a":
+            with pytest.raises(ValidationError):
+                # Sanity check
+                validate(instance=string, schema=schema)
+            check_match_failure(bad_string=json_dumps(string), schema_obj=schema)
+        else:
+            # Sanity check
+            validate(instance=string, schema=schema)
+            generate_and_check(string, schema)
 
 class TestSimpleObject:
     # These are objects without cross references
@@ -1403,22 +1408,69 @@ def test_oneOf_simple(self, target_obj):
         # The actual check
         generate_and_check(target_obj, schema_obj)
 
-    @pytest.mark.parametrize("target_obj", [123, True])
-    def test_oneOf_compound(self, target_obj):
-        schema = """{
-        "oneOf" : [{ "type": "integer" }, { "type": "boolean" }]
-        }
-        """
-        # First sanity check what we're setting up
-        schema_obj = json.loads(schema)
-        validate(instance=target_obj, schema=schema_obj)
+    @pytest.mark.parametrize(
+        "schema, instances",
+        [
+            # Simple case, disjoint types
+            ({"oneOf": [{"type": "integer"}, {"type": "boolean"}]}, [123, True]),
+            # Simple case, disjoint enums
+            ({"oneOf": [{"enum": ["a", "b", "c"]}, {"enum": [1,2,3]}]}, ["a", "b", "c", 1, 2, 3]),
+            # More complex case, discriminated union
+            (
+                {
+                    "oneOf": [
+                        # Only one of them needs the prop key to be required
+                        {"type": "object", "properties": {"prop": {"const": "foo"}}, "required": ["prop"]},
+                        {"type": "object", "properties": {"prop": {"const": "bar"}}}
+                    ]
+                },
+                [{"prop": "foo"}, {"prop": "bar"}]
+            ),
+            # Enums made disjoint by type
+            (
+                {"oneOf": [{"enum": [1,2,"foo"]}, {"enum": [2,3,"bar"]}], "type": "string"},
+                ["foo", "bar"]
+            ),
+        ]
+    )
+    def test_oneOf_disjoint(self, schema, instances):
+        for instance in instances:
+            # First sanity check what we're setting up
+            validate(instance=instance, schema=schema)
 
-        # The actual check; we expect a warning here because oneOf is not fully supported
-        with pytest.warns() as record:
-            generate_and_check(target_obj, schema_obj)
-        assert len(record) == 1
-        assert record[0].message.args[0].startswith("oneOf not fully supported")
+            # The actual check; we assert NO warning here because oneOf is disjoint
+            with warnings.catch_warnings():
+                warnings.simplefilter("error")
+                generate_and_check(instance, schema)
 
+    @pytest.mark.parametrize(
+        "schema, instances",
+        [
+            # Overlapping enums
+            ({"oneOf": [{"enum": ["a", "b", "c"]}, {"enum": ["c", 2, 3]}]}, ["a", "b", 2, 3]),
+            # More complex case, object without proper discriminator
+            (
+                {
+                    "oneOf": [
+                        # Only one of them needs the prop key to be required
+                        {"type": "object", "properties": {"prop": {"const": "foo"}}},
+                        {"type": "object", "properties": {"prop": {"const": "bar"}}}
+                    ]
+                },
+                [{"prop": "foo"}, {"prop": "bar"}]
+            )
+        ]
+    )
+    def test_oneOf_overlap(self, schema, instances):
+        for instance in instances:
+            # First sanity check what we're setting up
+            validate(instance=instance, schema=schema)
+
+            # The actual check; assert a warning here because oneOf is not disjoint and we can't guarantee correctness
+            with pytest.warns() as record:
+                generate_and_check(instance, schema)
+            assert len(record) == 1
+            assert record[0].message.args[0] == "oneOf not fully supported, falling back to anyOf. This may cause validation errors in some cases."
 
 class TestEnum:
     simple_schema = """{