From 8617190a3f5a0dc1ef9c68077ed2c5ce92ddce34 Mon Sep 17 00:00:00 2001 From: Dobiichi-Origami <56953648+Dobiichi-Origami@users.noreply.github.com> Date: Tue, 14 May 2024 17:08:29 +0800 Subject: [PATCH 1/7] adding support for baidu qianfan and Ernie --- guidance/models/_qianfan.py | 177 ++++++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 guidance/models/_qianfan.py diff --git a/guidance/models/_qianfan.py b/guidance/models/_qianfan.py new file mode 100644 index 000000000..d412e02ff --- /dev/null +++ b/guidance/models/_qianfan.py @@ -0,0 +1,177 @@ +import copy +import re +from ._model import Chat +from ._grammarless import Grammarless, GrammarlessEngine + +_image_token_pattern = re.compile(r"<\|_image:(.*)\|>") + + +class ClassUnavailableException(Exception): + pass + + +class QianfanAI(Grammarless): + def __init__( + self, + model=None, + echo=True, + max_streaming_tokens=None, + timeout=0.5, + compute_log_probs=False, + **kwargs, + ): + """Build a new QianfanAI model object that represents a model in a given state.""" + + # if we are called directly (as opposed to through super()) then we convert ourselves to a more specific subclass if possible + if self.__class__ is QianfanAI: + raise ClassUnavailableException("Cannot use `QianfanAI` directly, please use `QianfanAIChat` or `QianfanAICompletion` instead") + + engine_map = { + QianfanAIChat: QianfanAIChatEngine, + QianfanAICompletion: QianfanAICompletionEngine, + } + + super().__init__( + engine=engine_map[self.__class__]( + model=model, + max_streaming_tokens=max_streaming_tokens, + timeout=timeout, + compute_log_probs=compute_log_probs, + **kwargs, + ), + echo=echo, + ) + + +class QianfanAIEngine(GrammarlessEngine): + + def __init__( + self, + model, + max_streaming_tokens, + timeout, + compute_log_probs, + **kwargs, + ): + try: + from qianfan import ChatCompletion, Completion + except ModuleNotFoundError: + raise Exception( + "Please install the Baidu Qianfan package using `pip install qianfan` " + "in order to use guidance.models.QianfanAI!" + ) + + assert ( + not compute_log_probs + ), "We don't support compute_log_probs=True yet for QianfanAIEngine!" + self.model_name = model + + self.model_obj = ChatCompletion(model=model, **kwargs) if self.__class__ is QianfanAIChatEngine else Completion(model=model, **kwargs) + + self.extra_arguments = copy.deepcopy(kwargs) + self.extra_arguments.pop("endpoint") if "endpoint" in kwargs else None + + super().__init__(None, max_streaming_tokens, timeout, compute_log_probs) + + +class QianfanAIChat(QianfanAI, Chat): + pass + + +class QianfanAIChatEngine(QianfanAIEngine): + def _generator(self, prompt, temperature): + + # find the system text + pos = 0 + + system_start = b"<|im_start|>system\n" + user_start = b"<|im_start|>user\n" + assistant_start = b"<|im_start|>assistant\n" + role_end = b"<|im_end|>" + + # find the system text + system_text = "" + if prompt.startswith(system_start): + pos += len(system_start) + system_end_pos = prompt.find(role_end) + system_text = prompt[pos:system_end_pos].decode("utf8") + pos = system_end_pos + len(role_end) + + # find the user/assistant pairs + messages = [] + valid_end = False + while True: + + # find the user text + if prompt[pos:].startswith(user_start): + pos += len(user_start) + end_pos = prompt[pos:].find(role_end) + if end_pos < 0: + break + messages.append( + dict( + role="user", + content=prompt[pos: pos + end_pos].decode("utf8"), + ) + ) + pos += end_pos + len(role_end) + elif prompt[pos:].startswith(assistant_start): + pos += len(assistant_start) + end_pos = prompt[pos:].find(role_end) + if end_pos < 0: + valid_end = True + break + messages.append( + dict( + role="assistant", + content=prompt[pos: pos + end_pos].decode("utf8"), + ) + ) + pos += end_pos + len(role_end) + else: + raise Exception( + "It looks like your prompt is not a well formed chat prompt! Please enclose all model state appends inside chat role blocks like `user()` or `assistant()`." + ) + + self._data = prompt[:pos] + + assert len(messages) > 0, "Bad chat format! No chat blocks were defined." + assert ( + messages[-1]["role"] == "user" + ), "Bad chat format! There must be a user() role before the last assistant() role." + assert valid_end, "Bad chat format! You must generate inside assistant() roles." + + if temperature == 0.0: + temperature = 0.0001 + + input_kwargs = {"temperature": temperature} + input_kwargs.update(self.extra_arguments) + + if system_text: + input_kwargs["system"] = system_text + + input_kwargs["stream"] = True + + result_iter = self.model_obj.do(messages, **input_kwargs) + for response in result_iter: + yield response.body["result"].encode("utf8") + + +class QianfanAICompletion(QianfanAI): + pass + + +class QianfanAICompletionEngine(QianfanAIEngine): + def _generator(self, prompt, temperature): + if temperature == 0.0: + temperature = 0.0001 + + input_kwargs = {"temperature": temperature} + input_kwargs.update(self.extra_arguments) + input_kwargs["stream"] = True + + self._data = prompt + + result_iter = self.model_obj.do(prompt.decode("utf8"), **input_kwargs) + for response in result_iter: + yield response.body["result"].encode("utf8") \ No newline at end of file From 574bb932b0a749dbab950285f6b2f4ffbc04248b Mon Sep 17 00:00:00 2001 From: Dobiichi-Origami <56953648+Dobiichi-Origami@users.noreply.github.com> Date: Wed, 5 Jun 2024 14:57:20 +0800 Subject: [PATCH 2/7] refactor --- guidance/models/_qianfan.py | 61 +++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 33 deletions(-) diff --git a/guidance/models/_qianfan.py b/guidance/models/_qianfan.py index d412e02ff..73e1a6d48 100644 --- a/guidance/models/_qianfan.py +++ b/guidance/models/_qianfan.py @@ -1,9 +1,16 @@ import copy -import re -from ._model import Chat + +import typing + from ._grammarless import Grammarless, GrammarlessEngine -_image_token_pattern = re.compile(r"<\|_image:(.*)\|>") + +try: + import qianfan + + client_class: typing.Optional[typing.Type[qianfan.ChatCompletion]] = qianfan.ChatCompletion +except ImportError: + client_class = None class ClassUnavailableException(Exception): @@ -18,25 +25,21 @@ def __init__( max_streaming_tokens=None, timeout=0.5, compute_log_probs=False, + is_chat_model=True, **kwargs, ): """Build a new QianfanAI model object that represents a model in a given state.""" - # if we are called directly (as opposed to through super()) then we convert ourselves to a more specific subclass if possible - if self.__class__ is QianfanAI: - raise ClassUnavailableException("Cannot use `QianfanAI` directly, please use `QianfanAIChat` or `QianfanAICompletion` instead") - - engine_map = { - QianfanAIChat: QianfanAIChatEngine, - QianfanAICompletion: QianfanAICompletionEngine, - } + if client_class is None: + raise ClassUnavailableException("Please execute `pip install qianfan` before using QianfanAI component") super().__init__( - engine=engine_map[self.__class__]( + engine=QianfanAIEngine( model=model, max_streaming_tokens=max_streaming_tokens, timeout=timeout, compute_log_probs=compute_log_probs, + is_chat_model=is_chat_model, **kwargs, ), echo=echo, @@ -51,35 +54,33 @@ def __init__( max_streaming_tokens, timeout, compute_log_probs, + is_chat_model=True, **kwargs, ): - try: - from qianfan import ChatCompletion, Completion - except ModuleNotFoundError: - raise Exception( - "Please install the Baidu Qianfan package using `pip install qianfan` " - "in order to use guidance.models.QianfanAI!" - ) + if client_class is None: + raise ClassUnavailableException("Please execute `pip install qianfan` before using QianfanAI component") assert ( not compute_log_probs ), "We don't support compute_log_probs=True yet for QianfanAIEngine!" + self.model_name = model - self.model_obj = ChatCompletion(model=model, **kwargs) if self.__class__ is QianfanAIChatEngine else Completion(model=model, **kwargs) + self.is_chat_model = is_chat_model + self.model_obj = qianfan.ChatCompletion(model=model, **kwargs) if self.is_chat_model else qianfan.Completion(model=model, **kwargs) self.extra_arguments = copy.deepcopy(kwargs) self.extra_arguments.pop("endpoint") if "endpoint" in kwargs else None super().__init__(None, max_streaming_tokens, timeout, compute_log_probs) + def _generator(self, prompt, temperature): + if self.is_chat_model: + return self._chat_generator(prompt, temperature) -class QianfanAIChat(QianfanAI, Chat): - pass - + return self._completion_generator(prompt, temperature) -class QianfanAIChatEngine(QianfanAIEngine): - def _generator(self, prompt, temperature): + def _chat_generator(self, prompt, temperature): # find the system text pos = 0 @@ -156,13 +157,7 @@ def _generator(self, prompt, temperature): for response in result_iter: yield response.body["result"].encode("utf8") - -class QianfanAICompletion(QianfanAI): - pass - - -class QianfanAICompletionEngine(QianfanAIEngine): - def _generator(self, prompt, temperature): + def _completion_generator(self, prompt, temperature): if temperature == 0.0: temperature = 0.0001 @@ -174,4 +169,4 @@ def _generator(self, prompt, temperature): result_iter = self.model_obj.do(prompt.decode("utf8"), **input_kwargs) for response in result_iter: - yield response.body["result"].encode("utf8") \ No newline at end of file + yield response.body["result"].encode("utf8") From 08af3e604c81e229d1bbf8745ab9477346f89570 Mon Sep 17 00:00:00 2001 From: Dobiichi-Origami <56953648+Dobiichi-Origami@users.noreply.github.com> Date: Wed, 5 Jun 2024 23:24:17 +0800 Subject: [PATCH 3/7] fix mypy issue --- guidance/models/_qianfan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/models/_qianfan.py b/guidance/models/_qianfan.py index 73e1a6d48..9a2444c87 100644 --- a/guidance/models/_qianfan.py +++ b/guidance/models/_qianfan.py @@ -6,7 +6,7 @@ try: - import qianfan + import qianfan # type: ignore client_class: typing.Optional[typing.Type[qianfan.ChatCompletion]] = qianfan.ChatCompletion except ImportError: From 7ae2f5214f9efd15045171a166cf99820ea73af0 Mon Sep 17 00:00:00 2001 From: Hudson Cooper Date: Wed, 19 Jun 2024 12:44:25 -0700 Subject: [PATCH 4/7] Allow generating arbitrary (schemaless) JSON (#892) Calling `guidance.json` with an empty schema generates arbitrary JSON. This closes #887 -- to quote @wjn0, there are several motivations for this: - APIs such as OpenAI allow users to request only valid JSON be generated sans schema, so in some sense this would give feature parity for local LLMs. - Large JSON schemas often include "arbitrary" properties, e.g. properties that are allowed to be any valid JSON value: https://json-schema.org/understanding-json-schema/basics#hello-world! --- guidance/library/_json.py | 156 +++++++++++++----- tests/unit/library/test_json.py | 270 +++++++++++++++++++++++++++++++- 2 files changed, 384 insertions(+), 42 deletions(-) diff --git a/guidance/library/_json.py b/guidance/library/_json.py index 40736a995..846b5f81b 100644 --- a/guidance/library/_json.py +++ b/guidance/library/_json.py @@ -1,4 +1,5 @@ from json import dumps as json_dumps +from enum import Enum from typing import ( Any, Callable, @@ -34,7 +35,47 @@ def _to_compact_json(target: Any) -> str: return json_dumps(target, separators=(",", ":")) -_DEFS_KEYS = ["$defs", "definitions"] +class Keyword(str, Enum): + ANYOF = "anyOf" + ALLOF = "allOf" + REF = "$ref" + CONST = "const" + ENUM = "enum" + TYPE = "type" + + +KEYS = {member.value for member in Keyword} + +DEFS_KEYS = {"$defs", "definitions"} + +IGNORED_KEYS = { + "$schema", + "$id", + "$comment", + "title", + "description", + "default", + "examples", + "required", # TODO: implement and remove from ignored list +} + +TYPE_SPECIFIC_KEYS = { + "array": {"items", "prefixItems", "minItems", "maxItems"}, + "object": {"properties", "additionalProperties"}, +} + + +def validate_json_node_keys(node: Mapping[str, Any]): + keys = set(node.keys()) + valid_keys = KEYS | IGNORED_KEYS | DEFS_KEYS + if Keyword.TYPE in node: + valid_keys |= TYPE_SPECIFIC_KEYS.get(node[Keyword.TYPE], set()) + invalid_keys = keys - valid_keys + if invalid_keys: + raise ValueError( + f"JSON schema had keys that could not be processed: {invalid_keys}" + f"\nSchema: {node}" + ) @guidance(stateless=True) @@ -73,21 +114,25 @@ def _gen_json_string(lm): def _gen_json_object( lm, *, - properties: Union[Mapping[str, Any], None], - additional_properties: Union[Mapping[str, Any], None], + properties: Mapping[str, Any], + additional_properties: Union[bool, Mapping[str, Any]], definitions: Mapping[str, Callable[[], GrammarFunction]], ): + if additional_properties is True: + # True means that anything goes + additional_properties = {} + lm += "{" if properties: lm += _process_properties(properties=properties, definitions=definitions) - if properties and additional_properties: + if properties and additional_properties is not False: lm += optional( "," + _process_additional_properties( additional_properties=additional_properties, definitions=definitions ) ) - elif additional_properties: + elif additional_properties is not False: lm += optional( _process_additional_properties( additional_properties=additional_properties, definitions=definitions @@ -138,19 +183,20 @@ def _process_additional_properties( def _gen_json_array( lm, *, - prefix_items_schema: Optional[Sequence[Mapping[str, Any]]], - item_schema: Optional[Mapping[str, Any]], + prefix_items_schema: Sequence[Mapping[str, Any]], + item_schema: Union[bool, Mapping[str, Any]], min_items: int, max_items: Optional[int], definitions: Mapping[str, Callable[[], GrammarFunction]], ): - if prefix_items_schema is None: - prefix_items_schema = [] + if item_schema is True: + # True means that anything goes + item_schema = {} - if len(prefix_items_schema) < min_items and item_schema is None: + if len(prefix_items_schema) < min_items and item_schema is False: raise ValueError( - "No items schema provided, but prefixItems has too few elements " - f"({len(prefix_items_schema)}) to satisfy minItems ({min_items})" + f"PrefixItems has too few elements ({len(prefix_items_schema)}) to" + f" satisfy minItems ({min_items}) but no extra items were allowed" ) if max_items is not None and max_items < min_items: @@ -168,7 +214,7 @@ def _gen_json_array( for i in range(n_to_add): if i < len(prefix_items_schema): schema = prefix_items_schema[i] - elif item_schema is not None: + elif item_schema is not False: schema = item_schema else: assert i >= min_items @@ -181,7 +227,7 @@ def _gen_json_array( else: optional_items.append(item) - if max_items is None and item_schema is not None: + if max_items is None and item_schema is not False: # Add an infinite tail of items item = _gen_json(json_schema=item_schema, definitions=definitions) optional_items.append(item + zero_or_more("," + item)) @@ -235,42 +281,66 @@ def _process_enum(lm, *, options: Sequence[Mapping[str, Any]]): return lm + select(options=all_opts) +@guidance(stateless=True) +def _gen_json_any(lm): + return lm + select( + [ + _gen_json(json_schema={"type": "null"}, definitions={}), + _gen_json(json_schema={"type": "boolean"}, definitions={}), + _gen_json(json_schema={"type": "integer"}, definitions={}), + _gen_json(json_schema={"type": "number"}, definitions={}), + _gen_json(json_schema={"type": "string"}, definitions={}), + # Recursive cases + _gen_json( + json_schema={ + "type": "array", + "items": True, + }, + definitions={}, + ), + _gen_json( + json_schema={ + "type": "object", + "additionalProperties": True, + }, + definitions={}, + ), + ] + ) + + @guidance(stateless=True) def _gen_json( lm, json_schema: Mapping[str, Any], definitions: Mapping[str, Callable[[], GrammarFunction]], ): - ANYOF_STRING = "anyOf" - if ANYOF_STRING in json_schema: + validate_json_node_keys(json_schema) + + if Keyword.ANYOF in json_schema: return lm + _process_anyOf( - anyof_list=json_schema[ANYOF_STRING], definitions=definitions + anyof_list=json_schema[Keyword.ANYOF], definitions=definitions ) - ALLOF_STRING = "allOf" - if ALLOF_STRING in json_schema: - allof_list = json_schema[ALLOF_STRING] + if Keyword.ALLOF in json_schema: + allof_list = json_schema[Keyword.ALLOF] if len(allof_list) != 1: raise ValueError("Only support allOf with exactly one item") return lm + _gen_json(allof_list[0], definitions) - REF_STRING = "$ref" - if REF_STRING in json_schema: + if Keyword.REF in json_schema: return lm + _get_definition( - reference=json_schema[REF_STRING], definitions=definitions + reference=json_schema[Keyword.REF], definitions=definitions ) - CONST_STRING = "const" - if CONST_STRING in json_schema: - return lm + _to_compact_json(json_schema[CONST_STRING]) + if Keyword.CONST in json_schema: + return lm + _to_compact_json(json_schema[Keyword.CONST]) - ENUM_STRING = "enum" - if ENUM_STRING in json_schema: - return lm + _process_enum(options=json_schema["enum"]) + if Keyword.ENUM in json_schema: + return lm + _process_enum(options=json_schema[Keyword.ENUM]) - TYPE_STRING = "type" - if TYPE_STRING in json_schema: - target_type = json_schema["type"] + if Keyword.TYPE in json_schema: + target_type = json_schema[Keyword.TYPE] if target_type == "null": return lm + "null" if target_type == "boolean": @@ -283,21 +353,21 @@ def _gen_json( return lm + _gen_json_string() if target_type == "array": return lm + _gen_json_array( - prefix_items_schema=json_schema.get("prefixItems"), - item_schema=json_schema.get("items"), + prefix_items_schema=json_schema.get("prefixItems", []), + item_schema=json_schema.get("items", True), min_items=json_schema.get("minItems", 0), max_items=json_schema.get("maxItems"), definitions=definitions, ) if target_type == "object": return lm + _gen_json_object( - properties=json_schema.get("properties"), - additional_properties=json_schema.get("additionalProperties"), + properties=json_schema.get("properties", {}), + additional_properties=json_schema.get("additionalProperties", True), definitions=definitions, ) raise ValueError(f"Unsupported type in schema: {target_type}") - raise ValueError(f"Can't process JSON node: {json_schema}") + return lm + _gen_json_any() @guidance(stateless=True) @@ -306,10 +376,11 @@ def json( name: Optional[str] = None, *, schema: Union[ + None, Mapping[str, Any], Type["pydantic.BaseModel"], "pydantic.TypeAdapter", - ], + ] = None, temperature: float = 0.0, ): """Generate valid JSON according to the supplied JSON schema or `pydantic` model. @@ -347,8 +418,9 @@ def json( If this is not None then the the results of the generation will be saved as a variable on the Model object (so you can access the result as ``lm["var_name"]``). - schema : Union[Mapping[str, Any], Type[pydantic.BaseModel], pydantic.TypeAdapter] + schema : Union[None, Mapping[str, Any], Type[pydantic.BaseModel], pydantic.TypeAdapter] One of: + - None, in which case any valid JSON will be generated - A JSON schema object. This is a JSON schema string which has been passed to ``json.loads()`` - A subclass of ``pydantic.BaseModel`` - An instance of ``pydantic.TypeAdapter`` @@ -357,11 +429,13 @@ def json( # Raises jsonschema.exceptions.SchemaError or ValueError # if schema is not valid jsonschema.validators.Draft202012Validator.check_schema(schema) + elif schema is None: + schema = {} else: schema = pydantic_to_json_schema(schema) definitions: Mapping[str, Callable[[], GrammarFunction]] = {} - for dk in _DEFS_KEYS: + for dk in DEFS_KEYS: if dk in schema: assert len(definitions) == 0, "Found duplicate definitions" definitions = _build_definitions(schema[dk]) @@ -401,7 +475,7 @@ def _get_definition( ): assert definitions is not None target_definition = None - for dk in _DEFS_KEYS: + for dk in DEFS_KEYS: ref_start = f"#/{dk}/" if reference.startswith(ref_start): target_name = reference[len(ref_start) :] diff --git a/tests/unit/library/test_json.py b/tests/unit/library/test_json.py index 942f8e33f..2b1753c24 100644 --- a/tests/unit/library/test_json.py +++ b/tests/unit/library/test_json.py @@ -320,7 +320,8 @@ def test_bad_object(self, bad_string, good_bytes, failure_byte, allowed_bytes): "type": "object", "properties": { "a" : {"type": "integer"} - } + }, + "additionalProperties": false } """ schema_obj = json.loads(schema) @@ -635,6 +636,7 @@ def test_bad_with_prefix( ): schema_obj = { "prefixItems": self.prefix_schema_obj, + "items": False, "minItems": min_items, "maxItems": max_items, "type": "array", @@ -858,6 +860,39 @@ def test_nested_ref(self, temperature): # The actual check generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + @pytest.mark.parametrize("temperature", [None, 0.1, 1]) + def test_multiple_refs_to_same_def(self, temperature): + schema = """{ + "$defs": { + "A": { + "properties": { + "name": { + "type": "string" + } + }, + "type": "object" + } + }, + "properties": { + "A1": { + "$ref": "#/$defs/A" + }, + "A2": { + "$ref": "#/$defs/A" + } + }, + "type": "object" + }""" + + target_obj = dict(A1=dict(name="Romulus"), A2=dict(name="Remus")) + + # First sanity check what we're setting up + schema_obj = json.loads(schema) + validate(instance=target_obj, schema=schema_obj) + + # The actual check + generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + class TestAnyOf: @pytest.mark.parametrize("target_obj", [123, True]) @@ -1348,3 +1383,236 @@ def test_linked_list(self, target_obj): # The actual check generate_and_check(target_obj, schema_obj) + + +class TestEmptySchemas: + empty_schema = "{}" + nested_empty_schema_with_props = """{ + "properties" : { + "a": {}, + "b": {"type": "number"} + }, + "type" : "object" + }""" + + @pytest.mark.parametrize( + "target_obj", + [ + 1, + "2", + False, + [1, 2, 3], + {"a": 1}, + None, + [{"a": 1}], + {"a": [1, 2, 3]}, + {"a": {"b": 1}}, + ], + ) + @pytest.mark.parametrize("temperature", [None, 0.1, 1]) + def test_empty_schema(self, target_obj, temperature): + # First sanity check what we're setting up + schema_obj = json.loads(self.empty_schema) + validate(instance=target_obj, schema=schema_obj) + + # The actual check + generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + + @pytest.mark.parametrize( + "bad_string, good_bytes, failure_byte, allowed_bytes", + [ + # {} is not carte blanche for malformed JSON + ("{a:1}", b"{", b"a", {Byte(b'"'), Byte(b"}")}), + ( + "[1,2} ", + b"[1,2", + b"}", + {Byte(b","), Byte(b"]"), Byte(b"e"), Byte(b"."), *INTEGER_FOLLOWING}, + ), + ("123a", b"123", b"a", {Byte(b"e"), Byte(b"."), *INTEGER_FOLLOWING}), + ( + "]", + b"", + b"]", + { + Byte(b"["), + Byte(b"{"), + Byte(b'"'), + Byte(b"t"), + Byte(b"f"), + Byte(b"n"), + *INTEGER_LEADING, + }, + ), + ], + ) + def test_bad_empty_schema( + self, bad_string, good_bytes, failure_byte, allowed_bytes + ): + schema_obj = json.loads(self.empty_schema) + check_match_failure( + bad_string=bad_string, + good_bytes=good_bytes, + failure_byte=failure_byte, + allowed_bytes=allowed_bytes, + schema_obj=schema_obj, + ) + + @pytest.mark.parametrize( + "schema_obj", + [ + # Empty property + {"type": "object", "properties": { "a": {} }}, + # Empty reference + {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}}, + ] + ) + @pytest.mark.parametrize( + "target_obj", + [ + {"a": 1}, + {"a": "2"}, + {"a": False}, + {"a": [1, 2, 3]}, + {"a": {"b": 1}}, + {"a": None}, + {"a": [{"b": 1}]}, + {"a": {"b": [1, 2, 3]}}, + {"a": {"b": {"c": 1}}}, + ], + ) + @pytest.mark.parametrize("temperature", [None, 0.1, 1]) + def test_nested_empty_schema(self, schema_obj, target_obj, temperature): + # First sanity check what we're setting up + validate(instance=target_obj, schema=schema_obj) + + # The actual check + generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + + @pytest.mark.parametrize( + "schema_obj", + [ + # Empty property + {"type": "object", "properties": { "a": {} }}, + # Empty reference + {"type": "object", "properties": {"a": {"$ref": "#/$defs/A"}}, "$defs": {"A": {}}}, + ] + ) + @pytest.mark.parametrize( + "bad_obj, good_bytes, failure_byte, allowed_bytes", + [ + # Missing property -- presence of {} deeper in the schema isn't carte blanche + ({"b": 42}, b'{"', b"b", {Byte(b"a")}), + ], + ) + def test_nested_empty_schema_bad( + self, schema_obj, bad_obj, good_bytes, failure_byte, allowed_bytes + ): + bad_string = _to_compact_json(bad_obj) + check_match_failure( + bad_string=bad_string, + good_bytes=good_bytes, + failure_byte=failure_byte, + allowed_bytes=allowed_bytes, + schema_obj=schema_obj, + ) + + @pytest.mark.parametrize( + "target_obj", + [ + {"a": 1, "b": 2}, + {"a": "2", "b": 1.998}, + {"a": False, "b": -3.14}, + {"a": [1, 2, 3], "b": 42}, + {"a": {"b": 1}, "b": 0.2}, + {"a": None, "b": 5e-4}, + {"a": [{"b": 1}], "b": -5e2}, + {"a": {"b": [1, 2, 3]}, "b": 1}, + {"a": {"b": {"c": 1}}, "b": -1}, + ], + ) + @pytest.mark.parametrize("temperature", [None, 0.1, 1]) + def test_nested_empty_schema_with_props(self, target_obj, temperature): + # First sanity check what we're setting up + schema_obj = json.loads(self.nested_empty_schema_with_props) + validate(instance=target_obj, schema=schema_obj) + + # The actual check + generate_and_check(target_obj, schema_obj, desired_temperature=temperature) + + @pytest.mark.parametrize( + "bad_obj, good_bytes, failure_byte, allowed_bytes", + [ + # Missing property -- presence of {} deeper in the schema isn't carte blanche + ({"b": 42}, b'{"', b"b", {Byte(b"a")}), + ], + ) + def test_nested_empty_schema_with_props_bad( + self, bad_obj, good_bytes, failure_byte, allowed_bytes + ): + schema_obj = json.loads(self.nested_empty_schema_with_props) + + bad_string = _to_compact_json(bad_obj) + check_match_failure( + bad_string=bad_string, + good_bytes=good_bytes, + failure_byte=failure_byte, + allowed_bytes=allowed_bytes, + schema_obj=schema_obj, + ) + + @pytest.mark.parametrize( + "schema_obj", + [ + {"type": "array"}, + {"type": "array", "items": {}}, + {"type": "array", "items": True}, + ], + ) + def test_items(self, schema_obj): + schema_obj = {"type": "array"} + generate_and_check( + [1, 0.4, "hello", False, None, {"a": 42}, [1, 2, 3, "four"]], schema_obj + ) + + def test_no_items(self): + schema_obj = {"type": "array", "items": False} + check_match_failure( + bad_string="[42]", + good_bytes=b"[", + failure_byte=b"4", + allowed_bytes={Byte(b"]")}, # array must be empty + schema_obj=schema_obj, + ) + + @pytest.mark.parametrize( + "schema_obj", + [ + {"type": "object"}, + {"type": "object", "additionalProperties": {}}, + {"type": "object", "additionalProperties": True}, + ], + ) + def test_additionalProperties(self, schema_obj): + generate_and_check( + { + "a": 1, + "b": 0.4, + "c": "hello", + "d": False, + "e": None, + "f": {"a": 42}, + "g": [1, 2, 3, "four"], + }, + schema_obj, + ) + + def test_no_additionalProperties(self): + schema_obj = {"type": "object", "additionalProperties": False} + check_match_failure( + bad_string='{"a": 42}', + good_bytes=b"{", + failure_byte=b'"', + allowed_bytes={Byte(b"}")}, # object must be empty + schema_obj=schema_obj, + ) From c796eeb9d498bcc63117cb0872e4d4d8d8ffae60 Mon Sep 17 00:00:00 2001 From: Richard Edgar Date: Thu, 20 Jun 2024 14:04:15 -0400 Subject: [PATCH 5/7] [Bug] Exclude llama-cpp-python version (#915) The latest release of llama-cpp-python (0.2.79) is causing issues with one of our tests (on Windows). The test in question is `test_repeat_calls`, and assumes that at T=0 (the default for `gen()`), then we can repeatedly call a LlamaCpp model and get the same result. This isn't happening, although stepping through the test itself with a debugger, I don't see anything untoward (I'm not seeing a pile up of previous prompts for example). For now, exclude the latest llama-cpp-python version, but we may want to revisit this test if the problem persists. --- .github/workflows/action_plain_basic_tests.yml | 2 +- tests/model_specific/test_llama_cpp.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/action_plain_basic_tests.yml b/.github/workflows/action_plain_basic_tests.yml index 929ce5347..e939239db 100644 --- a/.github/workflows/action_plain_basic_tests.yml +++ b/.github/workflows/action_plain_basic_tests.yml @@ -39,7 +39,7 @@ jobs: run: | pip install sentencepiece pip uninstall -y llama-cpp-python - pip install "llama-cpp-python!=0.2.58" + pip install "llama-cpp-python!=0.2.58,!=0.2.79" - name: Run tests (except server) shell: bash run: | diff --git a/tests/model_specific/test_llama_cpp.py b/tests/model_specific/test_llama_cpp.py index 647a41aa0..246690b26 100644 --- a/tests/model_specific/test_llama_cpp.py +++ b/tests/model_specific/test_llama_cpp.py @@ -50,6 +50,7 @@ def test_llama_cpp_select2(llamacpp_model: guidance.models.Model): def test_repeat_calls(llamacpp_model: guidance.models.Model): + # llama-cpp-python 0.2.79 appears to have made models non-deterministic on Windows llama2 = llamacpp_model a = [] lm = llama2 + "How much is 2 + 2? " + gen(name="test", max_tokens=10) From 2261d23fdcfbd693e974521e660fd4875b9ced2e Mon Sep 17 00:00:00 2001 From: Richard Edgar Date: Fri, 21 Jun 2024 13:07:13 -0400 Subject: [PATCH 6/7] [Bug] Update Mistral chat template (#918) It appears that the Mistral chat template has [had an update](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/commit/1296dc8fd9b21e6424c9c305c06db9ae60c03ace), so we need to match this --- guidance/chat.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/guidance/chat.py b/guidance/chat.py index faca5008d..1e90e0088 100644 --- a/guidance/chat.py +++ b/guidance/chat.py @@ -1,37 +1,40 @@ -import warnings -import uuid import inspect +import warnings + +from typing import Dict, Union + class ChatTemplate: """Contains template for all chat and instruct tuned models.""" - def get_role_start(self, role_name, **kwargs): + def get_role_start(self, role_name: str, **kwargs): raise NotImplementedError( "You need to use a ChatTemplate subclass that overrides the get_role_start method" ) - def get_role_end(self, role_name=None): + def get_role_end(self, role_name: Union[str, None] = None): raise NotImplementedError( "You need to use a ChatTemplate subclass that overrides the get_role_start method" ) - -class ChatTemplateCache: + + +class ChatTemplateCache: def __init__(self): - self._cache = {} + self._cache: Dict[str, ChatTemplate] = {} - def __getitem__(self, key): + def __getitem__(self, key: str) -> ChatTemplate: key_compact = key.replace(" ", "") return self._cache[key_compact] - - def __setitem__(self, key, value): + def __setitem__(self, key: str, value): key_compact = key.replace(" ", "") self._cache[key_compact] = value - def __contains__(self, key): + def __contains__(self, key: str): key_compact = key.replace(" ", "") return key_compact in self._cache - + + # Feels weird having to instantiate this, but it's a singleton for all purposes # TODO [HN]: Add an alias system so we can instantiate with other simple keys (e.g. "llama2" instead of the full template string) CHAT_TEMPLATE_CACHE = ChatTemplateCache() @@ -76,7 +79,7 @@ def load_template_class(chat_template=None): # By default, use the ChatML Template. Warnings to user will happen downstream only if they use chat roles. return ChatMLTemplate - + def _template_class_from_string(template_str): """Utility method to try to create a chat template class from a string.""" @@ -99,7 +102,7 @@ def get_role_start(self, role_name): def get_role_end(self, role_name=None): return "<|im_end|>\n" - + CHAT_TEMPLATE_CACHE[chatml_template] = ChatMLTemplate @@ -156,7 +159,7 @@ def get_role_start(self, role_name): def get_role_end(self, role_name=None): return "<|eot_id|>" - + CHAT_TEMPLATE_CACHE[llama3_template] = Llama3ChatTemplate # -------------------------------------------------- @@ -178,7 +181,7 @@ def get_role_start(self, role_name): def get_role_end(self, role_name=None): return "<|end|>" - + CHAT_TEMPLATE_CACHE[phi3_template] = Phi3ChatTemplate @@ -186,7 +189,7 @@ def get_role_end(self, role_name=None): # @@@@ Mistral-7B-Instruct-v0.2 @@@@ # -------------------------------------------------- # [05/08/24] https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/main/tokenizer_config.json#L42 -mistral_7b_instruct_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" +mistral_7b_instruct_template = "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}" class Mistral7BInstructChatTemplate(ChatTemplate): # available_roles = ["user", "assistant"] template_str = mistral_7b_instruct_template @@ -206,5 +209,5 @@ def get_role_end(self, role_name=None): return "" else: raise UnsupportedRoleException(role_name, self) - -CHAT_TEMPLATE_CACHE[mistral_7b_instruct_template] = Mistral7BInstructChatTemplate \ No newline at end of file + +CHAT_TEMPLATE_CACHE[mistral_7b_instruct_template] = Mistral7BInstructChatTemplate From a06ba636697524dbec279e31a09bb8abea361c36 Mon Sep 17 00:00:00 2001 From: Harsha-Nori Date: Fri, 21 Jun 2024 12:50:30 -0700 Subject: [PATCH 7/7] Fixing update to mistral chat template (#919) --- guidance/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guidance/chat.py b/guidance/chat.py index 1e90e0088..aab8e13b1 100644 --- a/guidance/chat.py +++ b/guidance/chat.py @@ -198,7 +198,7 @@ def get_role_start(self, role_name): if role_name == "user": return "[INST] " elif role_name == "assistant": - return "" + return " " else: raise UnsupportedRoleException(role_name, self)