Skip to content

Commit

Permalink
enhance: Rename tokenizer_params to analyzer_params (#2323)
Browse files Browse the repository at this point in the history
Signed-off-by: aoiasd <[email protected]>
  • Loading branch information
aoiasd authored Nov 5, 2024
1 parent 47c71af commit 55800a6
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 6 deletions.
2 changes: 1 addition & 1 deletion examples/hello_bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="sparse", dtype=DataType.SPARSE_FLOAT_VECTOR),
FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=1000, enable_tokenizer=True),
FieldSchema(name="document", dtype=DataType.VARCHAR, max_length=1000, enable_analyzer=True),
]

bm25_function = Function(
Expand Down
2 changes: 1 addition & 1 deletion examples/hello_hybrid_bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def random_embedding(texts):
# Use auto generated id as primary key
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=True, max_length=100),
# Store the original text to retrieve based on semantically distance
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=512, enable_tokenizer=True),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=512, enable_analyzer=True),
# We need a sparse vector field to perform full text search with BM25,
# but you don't need to provide data for it when inserting data.
FieldSchema(name="sparse_vector", dtype=DataType.SPARSE_FLOAT_VECTOR),
Expand Down
2 changes: 1 addition & 1 deletion examples/milvus_client/bm25.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

schema = milvus_client.create_schema()
schema.add_field("id", DataType.INT64, is_primary=True, auto_id=False)
schema.add_field("document_content", DataType.VARCHAR, max_length=9000, enable_tokenizer=True)
schema.add_field("document_content", DataType.VARCHAR, max_length=9000, enable_analyzer=True)
schema.add_field("sparse_vector", DataType.SPARSE_FLOAT_VECTOR)

bm25_function = Function(
Expand Down
4 changes: 2 additions & 2 deletions pymilvus/orm/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
"max_length",
"max_capacity",
"enable_match",
"enable_tokenizer",
"tokenizer_params",
"enable_analyzer",
"analyzer_params",
)

CALC_DIST_IDS = "ids"
Expand Down
2 changes: 1 addition & 1 deletion pymilvus/orm/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def _parse_type_params(self):
if self._kwargs[k].lower() == "false":
self._type_params[k] = False
continue
if k == "tokenizer_params":
if k == "analyzer_params":
# TODO: a more complicate json may be reordered which
# can still cause server_schema == schema to be False.
# need a better approach.
Expand Down

0 comments on commit 55800a6

Please sign in to comment.