Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python: Introducing vector search to the redis collections #9664

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ qdrant = [
]
redis = [
"redis[hiredis] ~= 5.0",
"types-redis ~= 4.6.0.20240425"
"types-redis ~= 4.6.0.20240425",
"redisvl >= 0.3.6",
]
usearch = [
"usearch ~= 2.9",
Expand Down Expand Up @@ -206,3 +207,4 @@ name = "semantic_kernel"
requires = ["flit-core >= 3.9,<4.0"]
build-backend = "flit_core.buildapi"


90 changes: 59 additions & 31 deletions python/samples/concepts/memory/new_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,28 +10,32 @@
import numpy as np

from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import OpenAIEmbeddingPromptExecutionSettings, OpenAITextEmbedding
from semantic_kernel.connectors.ai.open_ai.services.azure_text_embedding import AzureTextEmbedding
from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_collection import (
AzureCosmosDBNoSQLCollection,
from semantic_kernel.connectors.ai.open_ai import (
AzureTextEmbedding,
OpenAIEmbeddingPromptExecutionSettings,
OpenAITextEmbedding,
)
from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
from semantic_kernel.connectors.memory.azure_cosmos_db import AzureCosmosDBNoSQLCollection
from semantic_kernel.connectors.memory.in_memory import InMemoryVectorCollection
from semantic_kernel.connectors.memory.postgres.postgres_collection import PostgresCollection
from semantic_kernel.connectors.memory.postgres import PostgresCollection
from semantic_kernel.connectors.memory.qdrant import QdrantCollection
from semantic_kernel.connectors.memory.redis import RedisHashsetCollection, RedisJsonCollection
from semantic_kernel.connectors.memory.weaviate.weaviate_collection import WeaviateCollection
from semantic_kernel.connectors.memory.weaviate import WeaviateCollection
from semantic_kernel.data import (
DistanceFunction,
IndexKind,
VectorizedSearchMixin,
VectorSearchFilter,
VectorSearchOptions,
VectorStoreRecordCollection,
VectorStoreRecordDataField,
VectorStoreRecordKeyField,
VectorStoreRecordUtils,
VectorStoreRecordVectorField,
VectorTextSearchMixin,
vectorstoremodel,
)
from semantic_kernel.data.const import DistanceFunction, IndexKind
from semantic_kernel.data.vector_search.vector_search_options import VectorSearchOptions
from semantic_kernel.data.vector_search.vectorized_search import VectorizedSearchMixin


def get_data_model_array(index_kind: IndexKind, distance_function: DistanceFunction) -> type:
Expand All @@ -50,11 +54,12 @@ class DataModelArray:
deserialize_function=np.array,
),
] = None
other: str | None = None
id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
content: Annotated[
str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
] = "content1"
title: Annotated[str, VectorStoreRecordDataField(property_type="str", is_full_text_searchable=True)] = "title"
tag: Annotated[str, VectorStoreRecordDataField(property_type="str", is_filterable=True)] = "tag"

return DataModelArray

Expand All @@ -73,11 +78,12 @@ class DataModelList:
property_type="float",
),
] = None
other: str | None = None
id: Annotated[str, VectorStoreRecordKeyField()] = field(default_factory=lambda: str(uuid4()))
content: Annotated[
str, VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector", property_type="str")
] = "content1"
title: Annotated[str, VectorStoreRecordDataField(property_type="str", is_full_text_searchable=True)] = "title"
tag: Annotated[str, VectorStoreRecordDataField(property_type="str", is_filterable=True)] = "tag"

return DataModelList

Expand Down Expand Up @@ -121,7 +127,7 @@ class DataModelList:
collection_name=collection_name,
prefix_collection_name_to_key_names=True,
),
"redis_hashset": lambda: RedisHashsetCollection[DataModel](
"redis_hash": lambda: RedisHashsetCollection[DataModel](
data_model_type=DataModel,
collection_name=collection_name,
prefix_collection_name_to_key_names=True,
Expand All @@ -146,6 +152,13 @@ class DataModelList:
}


def print_record(record):
print(f" Found id: {record.id}")
print(f" Content: {record.content}")
if record.vector is not None:
print(f" Vector (first five): {record.vector[:5]}")


async def main(collection: str, use_azure_openai: bool, embedding_model: str):
print("-" * 30)
kernel = Kernel()
Expand All @@ -159,44 +172,59 @@ async def main(collection: str, use_azure_openai: bool, embedding_model: str):
print(f"Creating {collection} collection!")
await record_collection.create_collection_if_not_exists()

record1 = DataModel(content="Semantic Kernel is awesome", id="e6103c03-487f-4d7d-9c23-4723651c17f4")
record1 = DataModel(
content="Semantic Kernel is awesome",
id="e6103c03-487f-4d7d-9c23-4723651c17f4",
title="Semantic Kernel Languages",
tag="general",
)
record2 = DataModel(
content="Semantic Kernel is available in dotnet, python and Java.",
id="09caec77-f7e1-466a-bcec-f1d51c5b15be",
title="Semantic Kernel Languages",
tag="general",
)

print("Adding records!")
records = await VectorStoreRecordUtils(kernel).add_vector_to_records(
[record1, record2], data_model_type=DataModel
)

keys = await record_collection.upsert_batch(records)
print(f" Upserted {keys=}")
print("Getting records!")
results = await record_collection.get_batch([record1.id, record2.id])
if results:
for result in results:
print(f" Found id: {result.id}")
print(f" Content: {result.content}")
if result.vector is not None:
print(f" Vector (first five): {result.vector[:5]}")
for record in results:
print_record(record)
else:
print("Nothing found...")
options = VectorSearchOptions(
vector_field_name="vector",
include_vectors=True,
filter=VectorSearchFilter.equal_to("tag", "general"),
)
if isinstance(record_collection, VectorTextSearchMixin):
print("-" * 30)
print("Using text search")
search_results = await record_collection.text_search("python", options)
if search_results.total_count == 0:
print("\nNothing found...\n")
else:
[print_record(result.record) async for result in search_results.results]
if isinstance(record_collection, VectorizedSearchMixin):
print("-" * 30)
print("Using vectorized search, the distance function is set to cosine_similarity.")
print("This means that the higher the score the more similar.")
print("Using vectorized search, for `python`")
print("The distance function is set to the default of the store.")
search_results = await record_collection.vectorized_search(
vector=(await embedder.generate_raw_embeddings(["python"]))[0],
options=VectorSearchOptions(vector_field_name="vector", include_vectors=True),
vector=(await embedder.generate_raw_embeddings(["python"]))[0], options=options
)
results = [record async for record in search_results.results]
for result in results:
print(f" Found id: {result.record.id}")
print(f" Content: {result.record.content}")
if result.record.vector is not None:
print(f" Vector (first five): {result.record.vector[:5]}")
print(f" Score: {result.score:.4f}")
print("")
if search_results.total_count == 0:
print("\nNothing found...\n")
else:
async for result in search_results.results:
print_record(result.record)
print(f" Score: {result.score:.4f}\n")
print("-" * 30)
print("Deleting collection!")
await record_collection.delete_collection()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@
}

DISTANCE_FUNCTION_MAP = {
DistanceFunction.COSINE_SIMILARITY: VectorSearchAlgorithmMetric.COSINE,
DistanceFunction.COSINE_DISTANCE: VectorSearchAlgorithmMetric.COSINE,
DistanceFunction.DOT_PROD: VectorSearchAlgorithmMetric.DOT_PRODUCT,
DistanceFunction.EUCLIDEAN_DISTANCE: VectorSearchAlgorithmMetric.EUCLIDEAN,
DistanceFunction.HAMMING: VectorSearchAlgorithmMetric.HAMMING,
"default": VectorSearchAlgorithmMetric.COSINE,
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,21 @@ def data_model_definition_to_azure_ai_search_index(
algorithm_configuration_name=algo_name,
)
)
algo_class, algo_params = INDEX_ALGORITHM_MAP[field.index_kind or "default"]
try:
algo_class, algo_params = INDEX_ALGORITHM_MAP[field.index_kind or "default"]
except KeyError as e:
raise ServiceInitializationError(f"Error: {field.index_kind} not found in INDEX_ALGORITHM_MAP.") from e
try:
distance_metric = DISTANCE_FUNCTION_MAP[field.distance_function or "default"]
except KeyError as e:
raise ServiceInitializationError(
f"Error: {field.distance_function} not found in DISTANCE_FUNCTION_MAP."
) from e
search_algos.append(
algo_class(
name=algo_name,
parameters=algo_params(
metric=DISTANCE_FUNCTION_MAP[field.distance_function or "default"],
metric=distance_metric,
),
)
)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) Microsoft. All rights reserved.

from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_collection import (
AzureCosmosDBNoSQLCollection,
)
from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_composite_key import (
AzureCosmosDBNoSQLCompositeKey,
)
from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_settings import AzureCosmosDBNoSQLSettings
from semantic_kernel.connectors.memory.azure_cosmos_db.azure_cosmos_db_no_sql_store import AzureCosmosDBNoSQLStore

__all__ = [
"AzureCosmosDBNoSQLCollection",
"AzureCosmosDBNoSQLCompositeKey",
"AzureCosmosDBNoSQLSettings",
"AzureCosmosDBNoSQLStore",
]
6 changes: 6 additions & 0 deletions python/semantic_kernel/connectors/memory/redis/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from enum import Enum

from redis.commands.search.indexDefinition import IndexType
from redisvl.schema import StorageType

from semantic_kernel.data.const import DistanceFunction

Expand All @@ -18,6 +19,11 @@ class RedisCollectionTypes(str, Enum):
RedisCollectionTypes.HASHSET: IndexType.HASH,
}

STORAGE_TYPE_MAP = {
RedisCollectionTypes.JSON: StorageType.JSON,
RedisCollectionTypes.HASHSET: StorageType.HASH,
}

DISTANCE_FUNCTION_MAP = {
DistanceFunction.COSINE_SIMILARITY: "COSINE",
DistanceFunction.DOT_PROD: "IP",
Expand Down
Loading
Loading