From 8617190a3f5a0dc1ef9c68077ed2c5ce92ddce34 Mon Sep 17 00:00:00 2001
From: Dobiichi-Origami <56953648+Dobiichi-Origami@users.noreply.github.com>
Date: Tue, 14 May 2024 17:08:29 +0800
Subject: [PATCH 1/7] adding support for baidu qianfan and Ernie

---
 guidance/models/_qianfan.py | 177 ++++++++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 guidance/models/_qianfan.py

diff --git a/guidance/models/_qianfan.py b/guidance/models/_qianfan.py
new file mode 100644
index 000000000..d412e02ff
--- /dev/null
+++ b/guidance/models/_qianfan.py
@@ -0,0 +1,177 @@
+import copy
+import re
+from ._model import Chat
+from ._grammarless import Grammarless, GrammarlessEngine
+
+_image_token_pattern = re.compile(r"<\|_image:(.*)\|>")
+
+
+class ClassUnavailableException(Exception):
+    pass
+
+
+class QianfanAI(Grammarless):
+    def __init__(
+        self,
+        model=None,
+        echo=True,
+        max_streaming_tokens=None,
+        timeout=0.5,
+        compute_log_probs=False,
+        **kwargs,
+    ):
+        """Build a new QianfanAI model object that represents a model in a given state."""
+
+        # if we are called directly (as opposed to through super()) then we convert ourselves to a more specific subclass if possible
+        if self.__class__ is QianfanAI:
+            raise ClassUnavailableException("Cannot use `QianfanAI` directly, please use `QianfanAIChat` or `QianfanAICompletion` instead")
+
+        engine_map = {
+            QianfanAIChat: QianfanAIChatEngine,
+            QianfanAICompletion: QianfanAICompletionEngine,
+        }
+
+        super().__init__(
+            engine=engine_map[self.__class__](
+                model=model,
+                max_streaming_tokens=max_streaming_tokens,
+                timeout=timeout,
+                compute_log_probs=compute_log_probs,
+                **kwargs,
+            ),
+            echo=echo,
+        )
+
+
+class QianfanAIEngine(GrammarlessEngine):
+
+    def __init__(
+        self,
+        model,
+        max_streaming_tokens,
+        timeout,
+        compute_log_probs,
+        **kwargs,
+    ):
+        try:
+            from qianfan import ChatCompletion, Completion
+        except ModuleNotFoundError:
+            raise Exception(
+                "Please install the Baidu Qianfan package using `pip install qianfan` "
+                "in order to use guidance.models.QianfanAI!"
+            )
+
+        assert (
+            not compute_log_probs
+        ), "We don't support compute_log_probs=True yet for QianfanAIEngine!"
+        self.model_name = model
+
+        self.model_obj = ChatCompletion(model=model, **kwargs) if self.__class__ is QianfanAIChatEngine else Completion(model=model, **kwargs)
+
+        self.extra_arguments = copy.deepcopy(kwargs)
+        self.extra_arguments.pop("endpoint") if "endpoint" in kwargs else None
+
+        super().__init__(None, max_streaming_tokens, timeout, compute_log_probs)
+
+
+class QianfanAIChat(QianfanAI, Chat):
+    pass
+
+
+class QianfanAIChatEngine(QianfanAIEngine):
+    def _generator(self, prompt, temperature):
+
+        # find the system text
+        pos = 0
+
+        system_start = b"<|im_start|>system\n"
+        user_start = b"<|im_start|>user\n"
+        assistant_start = b"<|im_start|>assistant\n"
+        role_end = b"<|im_end|>"
+
+        # find the system text
+        system_text = ""
+        if prompt.startswith(system_start):
+            pos += len(system_start)
+            system_end_pos = prompt.find(role_end)
+            system_text = prompt[pos:system_end_pos].decode("utf8")
+            pos = system_end_pos + len(role_end)
+
+        # find the user/assistant pairs
+        messages = []
+        valid_end = False
+        while True:
+
+            # find the user text
+            if prompt[pos:].startswith(user_start):
+                pos += len(user_start)
+                end_pos = prompt[pos:].find(role_end)
+                if end_pos < 0:
+                    break
+                messages.append(
+                    dict(
+                        role="user",
+                        content=prompt[pos: pos + end_pos].decode("utf8"),
+                    )
+                )
+                pos += end_pos + len(role_end)
+            elif prompt[pos:].startswith(assistant_start):
+                pos += len(assistant_start)
+                end_pos = prompt[pos:].find(role_end)
+                if end_pos < 0:
+                    valid_end = True
+                    break
+                messages.append(
+                    dict(
+                        role="assistant",
+                        content=prompt[pos: pos + end_pos].decode("utf8"),
+                    )
+                )
+                pos += end_pos + len(role_end)
+            else:
+                raise Exception(
+                    "It looks like your prompt is not a well formed chat prompt! Please enclose all model state appends inside chat role blocks like `user()` or `assistant()`."
+                )
+
+        self._data = prompt[:pos]
+
+        assert len(messages) > 0, "Bad chat format! No chat blocks were defined."
+        assert (
+                messages[-1]["role"] == "user"
+        ), "Bad chat format! There must be a user() role before the last assistant() role."
+        assert valid_end, "Bad chat format! You must generate inside assistant() roles."
+
+        if temperature == 0.0:
+            temperature = 0.0001
+
+        input_kwargs = {"temperature": temperature}
+        input_kwargs.update(self.extra_arguments)
+
+        if system_text:
+            input_kwargs["system"] = system_text
+
+        input_kwargs["stream"] = True
+
+        result_iter = self.model_obj.do(messages, **input_kwargs)
+        for response in result_iter:
+            yield response.body["result"].encode("utf8")
+
+
+class QianfanAICompletion(QianfanAI):
+    pass
+
+
+class QianfanAICompletionEngine(QianfanAIEngine):
+    def _generator(self, prompt, temperature):
+        if temperature == 0.0:
+            temperature = 0.0001
+
+        input_kwargs = {"temperature": temperature}
+        input_kwargs.update(self.extra_arguments)
+        input_kwargs["stream"] = True
+
+        self._data = prompt
+
+        result_iter = self.model_obj.do(prompt.decode("utf8"), **input_kwargs)
+        for response in result_iter:
+            yield response.body["result"].encode("utf8")
\ No newline at end of file

From 574bb932b0a749dbab950285f6b2f4ffbc04248b Mon Sep 17 00:00:00 2001
From: Dobiichi-Origami <56953648+Dobiichi-Origami@users.noreply.github.com>
Date: Wed, 5 Jun 2024 14:57:20 +0800
Subject: [PATCH 2/7] refactor

---
 guidance/models/_qianfan.py | 61 +++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/guidance/models/_qianfan.py b/guidance/models/_qianfan.py
index d412e02ff..73e1a6d48 100644
--- a/guidance/models/_qianfan.py
+++ b/guidance/models/_qianfan.py
@@ -1,9 +1,16 @@
 import copy
-import re
-from ._model import Chat
+
+import typing
+
 from ._grammarless import Grammarless, GrammarlessEngine
 
-_image_token_pattern = re.compile(r"<\|_image:(.*)\|>")
+
+try:
+    import qianfan
+
+    client_class: typing.Optional[typing.Type[qianfan.ChatCompletion]] = qianfan.ChatCompletion
+except ImportError:
+    client_class = None
 
 
 class ClassUnavailableException(Exception):
@@ -18,25 +25,21 @@ def __init__(
         max_streaming_tokens=None,
         timeout=0.5,
         compute_log_probs=False,
+        is_chat_model=True,
         **kwargs,
     ):
         """Build a new QianfanAI model object that represents a model in a given state."""
 
-        # if we are called directly (as opposed to through super()) then we convert ourselves to a more specific subclass if possible
-        if self.__class__ is QianfanAI:
-            raise ClassUnavailableException("Cannot use `QianfanAI` directly, please use `QianfanAIChat` or `QianfanAICompletion` instead")
-
-        engine_map = {
-            QianfanAIChat: QianfanAIChatEngine,
-            QianfanAICompletion: QianfanAICompletionEngine,
-        }
+        if client_class is None:
+            raise ClassUnavailableException("Please execute `pip install qianfan` before using QianfanAI component")
 
         super().__init__(
-            engine=engine_map[self.__class__](
+            engine=QianfanAIEngine(
                 model=model,
                 max_streaming_tokens=max_streaming_tokens,
                 timeout=timeout,
                 compute_log_probs=compute_log_probs,
+                is_chat_model=is_chat_model,
                 **kwargs,
             ),
             echo=echo,
@@ -51,35 +54,33 @@ def __init__(
         max_streaming_tokens,
         timeout,
         compute_log_probs,
+        is_chat_model=True,
         **kwargs,
     ):
-        try:
-            from qianfan import ChatCompletion, Completion
-        except ModuleNotFoundError:
-            raise Exception(
-                "Please install the Baidu Qianfan package using `pip install qianfan` "
-                "in order to use guidance.models.QianfanAI!"
-            )
+        if client_class is None:
+            raise ClassUnavailableException("Please execute `pip install qianfan` before using QianfanAI component")
 
         assert (
             not compute_log_probs
         ), "We don't support compute_log_probs=True yet for QianfanAIEngine!"
+
         self.model_name = model
 
-        self.model_obj = ChatCompletion(model=model, **kwargs) if self.__class__ is QianfanAIChatEngine else Completion(model=model, **kwargs)
+        self.is_chat_model = is_chat_model
+        self.model_obj = qianfan.ChatCompletion(model=model, **kwargs) if self.is_chat_model else qianfan.Completion(model=model, **kwargs)
 
         self.extra_arguments = copy.deepcopy(kwargs)
         self.extra_arguments.pop("endpoint") if "endpoint" in kwargs else None
 
         super().__init__(None, max_streaming_tokens, timeout, compute_log_probs)
 
+    def _generator(self, prompt, temperature):
+        if self.is_chat_model:
+            return self._chat_generator(prompt, temperature)
 
-class QianfanAIChat(QianfanAI, Chat):
-    pass
-
+        return self._completion_generator(prompt, temperature)
 
-class QianfanAIChatEngine(QianfanAIEngine):
-    def _generator(self, prompt, temperature):
+    def _chat_generator(self, prompt, temperature):
 
         # find the system text
         pos = 0
@@ -156,13 +157,7 @@ def _generator(self, prompt, temperature):
         for response in result_iter:
             yield response.body["result"].encode("utf8")
 
-
-class QianfanAICompletion(QianfanAI):
-    pass
-
-
-class QianfanAICompletionEngine(QianfanAIEngine):
-    def _generator(self, prompt, temperature):
+    def _completion_generator(self, prompt, temperature):
         if temperature == 0.0:
             temperature = 0.0001
 
@@ -174,4 +169,4 @@ def _generator(self, prompt, temperature):
 
         result_iter = self.model_obj.do(prompt.decode("utf8"), **input_kwargs)
         for response in result_iter:
-            yield response.body["result"].encode("utf8")
\ No newline at end of file
+            yield response.body["result"].encode("utf8")

From 08af3e604c81e229d1bbf8745ab9477346f89570 Mon Sep 17 00:00:00 2001
From: Dobiichi-Origami <56953648+Dobiichi-Origami@users.noreply.github.com>
Date: Wed, 5 Jun 2024 23:24:17 +0800
Subject: [PATCH 3/7] fix mypy issue

---
 guidance/models/_qianfan.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance/models/_qianfan.py b/guidance/models/_qianfan.py
index 73e1a6d48..9a2444c87 100644
--- a/guidance/models/_qianfan.py
+++ b/guidance/models/_qianfan.py
@@ -6,7 +6,7 @@
 
 
 try:
-    import qianfan
+    import qianfan  # type: ignore
 
     client_class: typing.Optional[typing.Type[qianfan.ChatCompletion]] = qianfan.ChatCompletion
 except ImportError:

From 7ae2f5214f9efd15045171a166cf99820ea73af0 Mon Sep 17 00:00:00 2001
From: Hudson Cooper <hncooper96@gmail.com>
Date: Wed, 19 Jun 2024 12:44:25 -0700
Subject: [PATCH 4/7] Allow generating arbitrary (schemaless) JSON (#892)

Calling `guidance.json` with an empty schema generates arbitrary JSON.
This closes #887 -- to quote @wjn0, there are several motivations for
this:
- APIs such as OpenAI allow users to request only valid JSON be
generated sans schema, so in some sense this would give feature parity
for local LLMs.
- Large JSON schemas often include "arbitrary" properties, e.g.
properties that are allowed to be any valid JSON value:
https://json-schema.org/understanding-json-schema/basics#hello-world!
---
 guidance/library/_json.py       | 156 +++++++++++++-----
 tests/unit/library/test_json.py | 270 +++++++++++++++++++++++++++++++-
 2 files changed, 384 insertions(+), 42 deletions(-)

diff --git a/guidance/library/_json.py b/guidance/library/_json.py
index 40736a995..846b5f81b 100644
--- a/guidance/library/_json.py
+++ b/guidance/library/_json.py
@@ -1,4 +1,5 @@
 from json import dumps as json_dumps
+from enum import Enum
 from typing import (
     Any,
     Callable,
@@ -34,7 +35,47 @@ def _to_compact_json(target: Any) -> str:
     return json_dumps(target, separators=(",", ":"))
 
 
-_DEFS_KEYS = ["$defs", "definitions"]
+class Keyword(str, Enum):
+    ANYOF = "anyOf"
+    ALLOF = "allOf"
+    REF = "$ref"
+    CONST = "const"
+    ENUM = "enum"
+    TYPE = "type"
+
+
+KEYS = {member.value for member in Keyword}
+
+DEFS_KEYS = {"$defs", "definitions"}
+
+IGNORED_KEYS = {
+    "$schema",
+    "$id",
+    "$comment",
+    "title",
+    "description",
+    "default",
+    "examples",
+    "required", # TODO: implement and remove from ignored list
+}
+
+TYPE_SPECIFIC_KEYS = {
+    "array": {"items", "prefixItems", "minItems", "maxItems"},
+    "object": {"properties", "additionalProperties"},
+}
+
+
+def validate_json_node_keys(node: Mapping[str, Any]):
+    keys = set(node.keys())
+    valid_keys = KEYS | IGNORED_KEYS | DEFS_KEYS
+    if Keyword.TYPE in node:
+        valid_keys |= TYPE_SPECIFIC_KEYS.get(node[Keyword.TYPE], set())
+    invalid_keys = keys - valid_keys
+    if invalid_keys:
+        raise ValueError(
+            f"JSON schema had keys that could not be processed: {invalid_keys}"
+            f"\nSchema: {node}"
+        )
 
 
 @guidance(stateless=True)
@@ -73,21 +114,25 @@ def _gen_json_string(lm):
 def _gen_json_object(
     lm,
     *,
-    properties: Union[Mapping[str, Any], None],
-    additional_properties: Union[Mapping[str, Any], None],
+    properties: Mapping[str, Any],
+    additional_properties: Union[bool, Mapping[str, Any]],
     definitions: Mapping[str, Callable[[], GrammarFunction]],
 ):
+    if additional_properties is True:
+        # True means that anything goes
+        additional_properties = {}
+
     lm += "{"
     if properties:
         lm += _process_properties(properties=properties, definitions=definitions)
-    if properties and additional_properties:
+    if properties and additional_properties is not False:
         lm += optional(
             ","
             + _process_additional_properties(
                 additional_properties=additional_properties, definitions=definitions
             )
         )
-    elif additional_properties:
+    elif additional_properties is not False:
         lm += optional(
             _process_additional_properties(
                 additional_properties=additional_properties, definitions=definitions
@@ -138,19 +183,20 @@ def _process_additional_properties(
 def _gen_json_array(
     lm,
     *,
-    prefix_items_schema: Optional[Sequence[Mapping[str, Any]]],
-    item_schema: Optional[Mapping[str, Any]],
+    prefix_items_schema: Sequence[Mapping[str, Any]],
+    item_schema: Union[bool, Mapping[str, Any]],
     min_items: int,
     max_items: Optional[int],
     definitions: Mapping[str, Callable[[], GrammarFunction]],
 ):
-    if prefix_items_schema is None:
-        prefix_items_schema = []
+    if item_schema is True:
+        # True means that anything goes
+        item_schema = {}
 
-    if len(prefix_items_schema) < min_items and item_schema is None:
+    if len(prefix_items_schema) < min_items and item_schema is False:
         raise ValueError(
-            "No items schema provided, but prefixItems has too few elements "
-            f"({len(prefix_items_schema)}) to satisfy minItems ({min_items})"
+            f"PrefixItems has too few elements ({len(prefix_items_schema)}) to"
+            f" satisfy minItems ({min_items}) but no extra items were allowed"
         )
 
     if max_items is not None and max_items < min_items:
@@ -168,7 +214,7 @@ def _gen_json_array(
     for i in range(n_to_add):
         if i < len(prefix_items_schema):
             schema = prefix_items_schema[i]
-        elif item_schema is not None:
+        elif item_schema is not False:
             schema = item_schema
         else:
             assert i >= min_items
@@ -181,7 +227,7 @@ def _gen_json_array(
         else:
             optional_items.append(item)
 
-    if max_items is None and item_schema is not None:
+    if max_items is None and item_schema is not False:
         # Add an infinite tail of items
         item = _gen_json(json_schema=item_schema, definitions=definitions)
         optional_items.append(item + zero_or_more("," + item))
@@ -235,42 +281,66 @@ def _process_enum(lm, *, options: Sequence[Mapping[str, Any]]):
     return lm + select(options=all_opts)
 
 
+@guidance(stateless=True)
+def _gen_json_any(lm):
+    return lm + select(
+        [
+            _gen_json(json_schema={"type": "null"}, definitions={}),
+            _gen_json(json_schema={"type": "boolean"}, definitions={}),
+            _gen_json(json_schema={"type": "integer"}, definitions={}),
+            _gen_json(json_schema={"type": "number"}, definitions={}),
+            _gen_json(json_schema={"type": "string"}, definitions={}),
+            # Recursive cases
+            _gen_json(
+                json_schema={
+                    "type": "array",
+                    "items": True,
+                },
+                definitions={},
+            ),
+            _gen_json(
+                json_schema={
+                    "type": "object",
+                    "additionalProperties": True,
+                },
+                definitions={},
+            ),
+        ]
+    )
+
+
 @guidance(stateless=True)
 def _gen_json(
     lm,
     json_schema: Mapping[str, Any],
     definitions: Mapping[str, Callable[[], GrammarFunction]],
 ):
-    ANYOF_STRING = "anyOf"
-    if ANYOF_STRING in json_schema:
+    validate_json_node_keys(json_schema)
+
+    if Keyword.ANYOF in json_schema:
         return lm + _process_anyOf(
-            anyof_list=json_schema[ANYOF_STRING], definitions=definitions
+            anyof_list=json_schema[Keyword.ANYOF], definitions=definitions
         )
 
-    ALLOF_STRING = "allOf"
-    if ALLOF_STRING in json_schema:
-        allof_list = json_schema[ALLOF_STRING]
+    if Keyword.ALLOF in json_schema:
+        allof_list = json_schema[Keyword.ALLOF]
         if len(allof_list) != 1:
             raise ValueError("Only support allOf with exactly one item")
         return lm + _gen_json(allof_list[0], definitions)
 
-    REF_STRING = "$ref"
-    if REF_STRING in json_schema:
+    if Keyword.REF in json_schema:
         return lm + _get_definition(
-            reference=json_schema[REF_STRING], definitions=definitions
+            reference=json_schema[Keyword.REF], definitions=definitions
         )
 
-    CONST_STRING = "const"
-    if CONST_STRING in json_schema:
-        return lm + _to_compact_json(json_schema[CONST_STRING])
+    if Keyword.CONST in json_schema:
+        return lm + _to_compact_json(json_schema[Keyword.CONST])
 
-    ENUM_STRING = "enum"
-    if ENUM_STRING in json_schema:
-        return lm + _process_enum(options=json_schema["enum"])
+    if Keyword.ENUM in json_schema:
+        return lm + _process_enum(options=json_schema[Keyword.ENUM])
 
-    TYPE_STRING = "type"
-    if TYPE_STRING in json_schema:
-        target_type = json_schema["type"]
+    if Keyword.TYPE in json_schema:
+        target_type = json_schema[Keyword.TYPE]
         if target_type == "null":
             return lm + "null"
         if target_type == "boolean":
@@ -283,21 +353,21 @@ def _gen_json(
             return lm + _gen_json_string()
         if target_type == "array":
             return lm + _gen_json_array(
-                prefix_items_schema=json_schema.get("prefixItems"),
-                item_schema=json_schema.get("items"),
+                prefix_items_schema=json_schema.get("prefixItems", []),
+                item_schema=json_schema.get("items", True),
                 min_items=json_schema.get("minItems", 0),
                 max_items=json_schema.get("maxItems"),
                 definitions=definitions,
             )
         if target_type == "object":
             return lm + _gen_json_object(
-                properties=json_schema.get("properties"),
-                additional_properties=json_schema.get("additionalProperties"),
+                properties=json_schema.get("properties", {}),
+                additional_properties=json_schema.get("additionalProperties", True),
                 definitions=definitions,
             )
         raise ValueError(f"Unsupported type in schema: {target_type}")
 
-    raise ValueError(f"Can't process JSON node: {json_schema}")
+    return lm + _gen_json_any()
 
 
 @guidance(stateless=True)
@@ -306,10 +376,11 @@ def json(
     name: Optional[str] = None,
     *,
     schema: Union[
+        None,
         Mapping[str, Any],
         Type["pydantic.BaseModel"],
         "pydantic.TypeAdapter",
-    ],
+    ] = None,
     temperature: float = 0.0,
 ):
     """Generate valid JSON according to the supplied JSON schema or `pydantic` model.
@@ -347,8 +418,9 @@ def json(
         If this is not None then the the results of the generation will be saved as a variable on
         the Model object (so you can access the result as ``lm["var_name"]``).
 
-    schema : Union[Mapping[str, Any], Type[pydantic.BaseModel], pydantic.TypeAdapter]
+    schema : Union[None, Mapping[str, Any], Type[pydantic.BaseModel], pydantic.TypeAdapter]
         One of:
+            - None, in which case any valid JSON will be generated
             - A JSON schema object. This is a JSON schema string which has been passed to ``json.loads()``
             - A subclass of ``pydantic.BaseModel``
             - An instance of ``pydantic.TypeAdapter``
@@ -357,11 +429,13 @@ def json(
         # Raises jsonschema.exceptions.SchemaError or ValueError
         # if schema is not valid
         jsonschema.validators.Draft202012Validator.check_schema(schema)
+    elif schema is None:
+        schema = {}
     else:
         schema = pydantic_to_json_schema(schema)
 
     definitions: Mapping[str, Callable[[], GrammarFunction]] = {}
-    for dk in _DEFS_KEYS:
+    for dk in DEFS_KEYS:
         if dk in schema:
             assert len(definitions) == 0, "Found duplicate definitions"
             definitions = _build_definitions(schema[dk])
@@ -401,7 +475,7 @@ def _get_definition(
 ):
     assert definitions is not None
     target_definition = None
-    for dk in _DEFS_KEYS:
+    for dk in DEFS_KEYS:
         ref_start = f"#/{dk}/"
         if reference.startswith(ref_start):
             target_name = reference[len(ref_start) :]
diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py
index 942f8e33f..2b1753c24 100644
--- a/tests/unit/library/test_json.py
+++ b/tests/unit/library/test_json.py
@@ -320,7 +320,8 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes):
             "type": "object",
             "properties": {
                 "a" : {"type": "integer"}
-            }
+            },
+            "additionalProperties": false
         }
     """
         schema_obj = json.loads(schema)
@@ -635,6 +636,7 @@ def test_bad_with_prefix(
     ):
         schema_obj = {
             "prefixItems": self.prefix_schema_obj,
+            "items": False,
             "minItems": min_items,
             "maxItems": max_items,
             "type": "array",
@@ -858,6 +860,39 @@ def test_nested_ref(self, temperature):
         # The actual check
         generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
 
+    @pytest.mark.parametrize("temperature", [None, 0.1, 1])
+    def test_multiple_refs_to_same_def(self, temperature):
+        schema = """{
+        "$defs": {
+            "A": {
+                "properties": {
+                    "name": {
+                        "type": "string"
+                    }
+                },
+                "type": "object"
+            }
+        },
+        "properties": {
+            "A1": {
+                "$ref": "#/$defs/A"
+            },
+            "A2": {
+                "$ref": "#/$defs/A"
+            }
+        },
+        "type": "object"
+        }"""
+
+        target_obj = dict(A1=dict(name="Romulus"), A2=dict(name="Remus"))
+
+        # First sanity check what we're setting up
+        schema_obj = json.loads(schema)
+        validate(instance=target_obj, schema=schema_obj)
+
+        # The actual check
+        generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
+
 
 class TestAnyOf:
     @pytest.mark.parametrize("target_obj", [123, True])
@@ -1348,3 +1383,236 @@ def test_linked_list(self, target_obj):
 
         # The actual check
         generate_and_check(target_obj, schema_obj)
+
+
+class TestEmptySchemas:
+    empty_schema = "{}"
+    nested_empty_schema_with_props = """{
+    "properties" : {
+        "a": {},
+        "b": {"type": "number"}
+    },
+    "type" : "object"
+    }"""
+
+    @pytest.mark.parametrize(
+        "target_obj",
+        [
+            1,
+            "2",
+            False,
+            [1, 2, 3],
+            {"a": 1},
+            None,
+            [{"a": 1}],
+            {"a": [1, 2, 3]},
+            {"a": {"b": 1}},
+        ],
+    )
+    @pytest.mark.parametrize("temperature", [None, 0.1, 1])
+    def test_empty_schema(self, target_obj, temperature):
+        # First sanity check what we're setting up
+        schema_obj = json.loads(self.empty_schema)
+        validate(instance=target_obj, schema=schema_obj)
+
+        # The actual check
+        generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
+
+    @pytest.mark.parametrize(
+        "bad_string, good_bytes, failure_byte, allowed_bytes",
+        [
+            # {} is not carte blanche for malformed JSON
+            ("{a:1}", b"{", b"a", {Byte(b'"'), Byte(b"}")}),
+            (
+                "[1,2} ",
+                b"[1,2",
+                b"}",
+                {Byte(b","), Byte(b"]"), Byte(b"e"), Byte(b"."), *INTEGER_FOLLOWING},
+            ),
+            ("123a", b"123", b"a", {Byte(b"e"), Byte(b"."), *INTEGER_FOLLOWING}),
+            (
+                "]",
+                b"",
+                b"]",
+                {
+                    Byte(b"["),
+                    Byte(b"{"),
+                    Byte(b'"'),
+                    Byte(b"t"),
+                    Byte(b"f"),
+                    Byte(b"n"),
+                    *INTEGER_LEADING,
+                },
+            ),
+        ],
+    )
+    def test_bad_empty_schema(
+        self, bad_string, good_bytes, failure_byte, allowed_bytes
+    ):
+        schema_obj = json.loads(self.empty_schema)
+        check_match_failure(
+            bad_string=bad_string,
+            good_bytes=good_bytes,
+            failure_byte=failure_byte,
+            allowed_bytes=allowed_bytes,
+            schema_obj=schema_obj,
+        )
+
+    @pytest.mark.parametrize(
+        "schema_obj",
+        [
+            # Empty property
+            {"type": "object", "properties": { "a": {} }},
+            # Empty reference
+            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}},
+        ]
+    )
+    @pytest.mark.parametrize(
+        "target_obj",
+        [
+            {"a": 1},
+            {"a": "2"},
+            {"a": False},
+            {"a": [1, 2, 3]},
+            {"a": {"b": 1}},
+            {"a": None},
+            {"a": [{"b": 1}]},
+            {"a": {"b": [1, 2, 3]}},
+            {"a": {"b": {"c": 1}}},
+        ],
+    )
+    @pytest.mark.parametrize("temperature", [None, 0.1, 1])
+    def test_nested_empty_schema(self, schema_obj, target_obj, temperature):
+        # First sanity check what we're setting up
+        validate(instance=target_obj, schema=schema_obj)
+
+        # The actual check
+        generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
+
+    @pytest.mark.parametrize(
+        "schema_obj",
+        [
+            # Empty property
+            {"type": "object", "properties": { "a": {} }},
+            # Empty reference
+            {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}},
+        ]
+    )
+    @pytest.mark.parametrize(
+        "bad_obj, good_bytes, failure_byte, allowed_bytes",
+        [
+            # Missing property -- presence of {} deeper in the schema isn't carte blanche
+            ({"b": 42}, b'{"', b"b", {Byte(b"a")}),
+        ],
+    )
+    def test_nested_empty_schema_bad(
+        self, schema_obj, bad_obj, good_bytes, failure_byte, allowed_bytes
+    ):
+        bad_string = _to_compact_json(bad_obj)
+        check_match_failure(
+            bad_string=bad_string,
+            good_bytes=good_bytes,
+            failure_byte=failure_byte,
+            allowed_bytes=allowed_bytes,
+            schema_obj=schema_obj,
+        )
+
+    @pytest.mark.parametrize(
+        "target_obj",
+        [
+            {"a": 1, "b": 2},
+            {"a": "2", "b": 1.998},
+            {"a": False, "b": -3.14},
+            {"a": [1, 2, 3], "b": 42},
+            {"a": {"b": 1}, "b": 0.2},
+            {"a": None, "b": 5e-4},
+            {"a": [{"b": 1}], "b": -5e2},
+            {"a": {"b": [1, 2, 3]}, "b": 1},
+            {"a": {"b": {"c": 1}}, "b": -1},
+        ],
+    )
+    @pytest.mark.parametrize("temperature", [None, 0.1, 1])
+    def test_nested_empty_schema_with_props(self, target_obj, temperature):
+        # First sanity check what we're setting up
+        schema_obj = json.loads(self.nested_empty_schema_with_props)
+        validate(instance=target_obj, schema=schema_obj)
+
+        # The actual check
+        generate_and_check(target_obj, schema_obj, desired_temperature=temperature)
+
+    @pytest.mark.parametrize(
+        "bad_obj, good_bytes, failure_byte, allowed_bytes",
+        [
+            # Missing property -- presence of {} deeper in the schema isn't carte blanche
+            ({"b": 42}, b'{"', b"b", {Byte(b"a")}),
+        ],
+    )
+    def test_nested_empty_schema_with_props_bad(
+        self, bad_obj, good_bytes, failure_byte, allowed_bytes
+    ):
+        schema_obj = json.loads(self.nested_empty_schema_with_props)
+
+        bad_string = _to_compact_json(bad_obj)
+        check_match_failure(
+            bad_string=bad_string,
+            good_bytes=good_bytes,
+            failure_byte=failure_byte,
+            allowed_bytes=allowed_bytes,
+            schema_obj=schema_obj,
+        )
+
+    @pytest.mark.parametrize(
+        "schema_obj",
+        [
+            {"type": "array"},
+            {"type": "array", "items": {}},
+            {"type": "array", "items": True},
+        ],
+    )
+    def test_items(self, schema_obj):
+        schema_obj = {"type": "array"}
+        generate_and_check(
+            [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj
+        )
+
+    def test_no_items(self):
+        schema_obj = {"type": "array", "items": False}
+        check_match_failure(
+            bad_string="[42]",
+            good_bytes=b"[",
+            failure_byte=b"4",
+            allowed_bytes={Byte(b"]")},  # array must be empty
+            schema_obj=schema_obj,
+        )
+
+    @pytest.mark.parametrize(
+        "schema_obj",
+        [
+            {"type": "object"},
+            {"type": "object", "additionalProperties": {}},
+            {"type": "object", "additionalProperties": True},
+        ],
+    )
+    def test_additionalProperties(self, schema_obj):
+        generate_and_check(
+            {
+                "a": 1,
+                "b": 0.4,
+                "c": "hello",
+                "d": False,
+                "e": None,
+                "f": {"a": 42},
+                "g": [1, 2, 3, "four"],
+            },
+            schema_obj,
+        )
+
+    def test_no_additionalProperties(self):
+        schema_obj = {"type": "object", "additionalProperties": False}
+        check_match_failure(
+            bad_string='{"a": 42}',
+            good_bytes=b"{",
+            failure_byte=b'"',
+            allowed_bytes={Byte(b"}")},  # object must be empty
+            schema_obj=schema_obj,
+        )

From c796eeb9d498bcc63117cb0872e4d4d8d8ffae60 Mon Sep 17 00:00:00 2001
From: Richard Edgar <riedgar@microsoft.com>
Date: Thu, 20 Jun 2024 14:04:15 -0400
Subject: [PATCH 5/7] [Bug] Exclude llama-cpp-python version (#915)

The latest release of llama-cpp-python (0.2.79) is causing issues with one of our tests (on Windows). The test in question is `test_repeat_calls`, and assumes that at T=0 (the default for `gen()`), then we can repeatedly call a LlamaCpp model and get the same result. This isn't happening, although stepping through the test itself with a debugger, I don't see anything untoward (I'm not seeing a pile up of previous prompts for example).

For now, exclude the latest llama-cpp-python version, but we may want to revisit this test if the problem persists.
---
 .github/workflows/action_plain_basic_tests.yml | 2 +-
 tests/model_specific/test_llama_cpp.py         | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/action_plain_basic_tests.yml b/.github/workflows/action_plain_basic_tests.yml
index 929ce5347..e939239db 100644
--- a/.github/workflows/action_plain_basic_tests.yml
+++ b/.github/workflows/action_plain_basic_tests.yml
@@ -39,7 +39,7 @@ jobs:
         run: |
           pip install sentencepiece
           pip uninstall -y llama-cpp-python
-          pip install "llama-cpp-python!=0.2.58"
+          pip install "llama-cpp-python!=0.2.58,!=0.2.79"
       - name: Run tests (except server)
         shell: bash
         run: |
diff --git a/tests/model_specific/test_llama_cpp.py b/tests/model_specific/test_llama_cpp.py
index 647a41aa0..246690b26 100644
--- a/tests/model_specific/test_llama_cpp.py
+++ b/tests/model_specific/test_llama_cpp.py
@@ -50,6 +50,7 @@ def test_llama_cpp_select2(llamacpp_model: guidance.models.Model):
 
 
 def test_repeat_calls(llamacpp_model: guidance.models.Model):
+    # llama-cpp-python 0.2.79 appears to have made models non-deterministic on Windows
     llama2 = llamacpp_model
     a = []
     lm = llama2 + "How much is 2 + 2? " + gen(name="test", max_tokens=10)

From 2261d23fdcfbd693e974521e660fd4875b9ced2e Mon Sep 17 00:00:00 2001
From: Richard Edgar <riedgar@microsoft.com>
Date: Fri, 21 Jun 2024 13:07:13 -0400
Subject: [PATCH 6/7] [Bug] Update Mistral chat template (#918)

It appears that the Mistral chat template has [had an update](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/commit/1296dc8fd9b21e6424c9c305c06db9ae60c03ace), so we need to match this
---
 guidance/chat.py | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/guidance/chat.py b/guidance/chat.py
index faca5008d..1e90e0088 100644
--- a/guidance/chat.py
+++ b/guidance/chat.py
@@ -1,37 +1,40 @@
-import warnings
-import uuid
 import inspect
+import warnings
+
+from typing import Dict, Union
+
 
 class ChatTemplate:
     """Contains template for all chat and instruct tuned models."""
 
-    def get_role_start(self, role_name, **kwargs):
+    def get_role_start(self, role_name: str, **kwargs):
         raise NotImplementedError(
             "You need to use a ChatTemplate subclass that overrides the get_role_start method"
         )
 
-    def get_role_end(self, role_name=None):
+    def get_role_end(self, role_name: Union[str, None] = None):
         raise NotImplementedError(
             "You need to use a ChatTemplate subclass that overrides the get_role_start method"
         )
-    
-class ChatTemplateCache: 
+
+
+class ChatTemplateCache:
     def __init__(self):
-        self._cache = {}
+        self._cache: Dict[str, ChatTemplate] = {}
 
-    def __getitem__(self, key):
+    def __getitem__(self, key: str) -> ChatTemplate:
         key_compact = key.replace(" ", "")
         return self._cache[key_compact]
 
-
-    def __setitem__(self, key, value):
+    def __setitem__(self, key: str, value):
         key_compact = key.replace(" ", "")
         self._cache[key_compact] = value
 
-    def __contains__(self, key):
+    def __contains__(self, key: str):
         key_compact = key.replace(" ", "")
         return key_compact in self._cache
-    
+
+
 # Feels weird having to instantiate this, but it's a singleton for all purposes
 # TODO [HN]: Add an alias system so we can instantiate with other simple keys (e.g. "llama2" instead of the full template string)
 CHAT_TEMPLATE_CACHE = ChatTemplateCache() 
@@ -76,7 +79,7 @@ def load_template_class(chat_template=None):
     
     # By default, use the ChatML Template. Warnings to user will happen downstream only if they use chat roles.
     return ChatMLTemplate
-        
+
 
 def _template_class_from_string(template_str):
     """Utility method to try to create a chat template class from a string."""
@@ -99,7 +102,7 @@ def get_role_start(self, role_name):
         
     def get_role_end(self, role_name=None):
         return "<|im_end|>\n"
-    
+
 CHAT_TEMPLATE_CACHE[chatml_template] = ChatMLTemplate
 
 
@@ -156,7 +159,7 @@ def get_role_start(self, role_name):
         
     def get_role_end(self, role_name=None):
         return "<|eot_id|>"
-    
+
 CHAT_TEMPLATE_CACHE[llama3_template] = Llama3ChatTemplate
 
 # --------------------------------------------------
@@ -178,7 +181,7 @@ def get_role_start(self, role_name):
         
     def get_role_end(self, role_name=None):
         return "<|end|>"
-    
+
 CHAT_TEMPLATE_CACHE[phi3_template] = Phi3ChatTemplate
 
 
@@ -186,7 +189,7 @@ def get_role_end(self, role_name=None):
 # @@@@ Mistral-7B-Instruct-v0.2 @@@@
 # --------------------------------------------------
 # [05/08/24] https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/main/tokenizer_config.json#L42
-mistral_7b_instruct_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
+mistral_7b_instruct_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}"
 class Mistral7BInstructChatTemplate(ChatTemplate):
     # available_roles = ["user", "assistant"]
     template_str = mistral_7b_instruct_template
@@ -206,5 +209,5 @@ def get_role_end(self, role_name=None):
             return "</s>"
         else:
             raise UnsupportedRoleException(role_name, self)
-        
-CHAT_TEMPLATE_CACHE[mistral_7b_instruct_template] = Mistral7BInstructChatTemplate
\ No newline at end of file
+
+CHAT_TEMPLATE_CACHE[mistral_7b_instruct_template] = Mistral7BInstructChatTemplate

From a06ba636697524dbec279e31a09bb8abea361c36 Mon Sep 17 00:00:00 2001
From: Harsha-Nori <harsha.nori@live.com>
Date: Fri, 21 Jun 2024 12:50:30 -0700
Subject: [PATCH 7/7] Fixing update to mistral chat template (#919)

---
 guidance/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guidance/chat.py b/guidance/chat.py
index 1e90e0088..aab8e13b1 100644
--- a/guidance/chat.py
+++ b/guidance/chat.py
@@ -198,7 +198,7 @@ def get_role_start(self, role_name):
         if role_name == "user":
             return "[INST] "
         elif role_name == "assistant":
-            return ""
+            return " "
         else:
             raise UnsupportedRoleException(role_name, self)