Cannot change the default indexType from AutoIndex to HNSW when creating an index in the Milvus collection via the RESTful API #38463
-
I have successfully created a Milvus collection and inserted data directly without any noticeable challenges. Now, I want to create a Milvus collection using the RESTful API as detailed in the documentation at Create Collection. I utilized the following code, which I adapted from the provided link, to create the collection, configure it, insert data, and then create an index. import time
import requests
import json
import numpy as np
def generate_random_vector():
float_array = np.random.uniform(-1, 1, 512)
normalized_array = float_array / np.linalg.norm(float_array, ord=2)
return normalized_array
def create_collection(collection_name: str, server_address: str):
url = f"{server_address}/v2/vectordb/collections/create"
payload = json.dumps({
"collectionName": collection_name,
"dimension": 512,
"metricType": "COSINE",
"vectorFieldName": "Embedding_Features",
"primaryFieldName": "IDs_Features",
"idType": "Int64",
"indexType": "HNSW",
"primaryKey": {"name": "IDs_Features", "type": "INT64", "primaryKey": True},
"vectorField": {
"name": "Embedding_Features",
"type": "FLOAT_VECTOR",
"primaryKey": False,
},
"indexes": [{
"fieldName": "Embedding_Features",
"indexName": "Embedding_Features",
"metricType": "COSINE",
}],
"auto_index": False,
})
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
return response.json()
def validate_collection(collection_name: str, server_address: str):
url = f"{server_address}/v2/vectordb/collections/describe"
payload = json.dumps({"collectionName": collection_name})
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
return response.json()
def drop_collection(collection_name: str, server_address: str):
url = f"{server_address}/v2/vectordb/collections/drop"
payload = json.dumps({"collectionName": collection_name})
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
return response.json()
def insert_data_into_collection(
collection_name: str,
start_id: int,
n: int,
ids_name: str,
features_name: str,
server_address: str,
):
url = f"{server_address}/v2/vectordb/entities/insert"
data = [{"IDs_Features": start_id + i + 1, "Embedding_Features": list(generate_random_vector())} for i in range(n)]
payload = json.dumps({"data": data, "collectionName": collection_name})
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
return response.json()
def describe_collection(collection_name: str, server_address: str):
url = f"{server_address}/v2/vectordb/collections/describe"
payload = json.dumps({"collectionName": collection_name})
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
return response.json()
def describe_index(collection_name: str, server_address: str):
url = f"{server_address}/v2/vectordb/indexes/describe"
payload = json.dumps({"indexName": "Embedding_Features", "collectionName": collection_name})
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
return response.json()
def create_index(collection_name: str, server_address: str):
url = f"{server_address}/v2/vectordb/indexes/create"
payload = json.dumps({
"collectionName": collection_name,
"indexParams": [{
"metricType": "COSINE",
"index_type": "HNSW",
"fieldName": "Embedding_Features",
"params": {"M": 128, "efConstruction": 256},
}],
})
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
return response.json() When I attempted to create the collection using: create_collection(collection_name=collection_name, server_address=server_address) I received the following message: {'code': 0, 'data': {}} Subsequently, when I described the collection: describe_collection(collection_name=collection_name, server_address=server_address) I got this response: {'code': 0,
'data': {'aliases': [],
'autoId': False,
'collectionID': 454176377651168636,
'collectionName': 'collection1',
'consistencyLevel': 'Bounded',
'description': '',
'enableDynamicField': True,
'fields': [{'autoId': False,
'clusteringKey': False,
'description': '',
'id': 100,
'name': 'IDs_Features',
'nullable': False,
'partitionKey': False,
'primaryKey': True,
'type': 'Int64'},
{'autoId': False,
'clusteringKey': False,
'description': '',
'id': 101,
'name': 'Embedding_Features',
'nullable': False,
'params': [{'key': 'dim', 'value': '512'}],
'partitionKey': False,
'primaryKey': False,
'type': 'FloatVector'}],
'functions': None,
'indexes': [{'fieldName': 'Embedding_Features',
'indexName': 'Embedding_Features',
'metricType': 'COSINE'}],
'load': 'LoadStateLoading',
'partitionsNum': 1,
'properties': [],
'shardsNum': 1},
'message': ''} When I described the collection index: describe_index(collection_name=collection_name, server_address=server_address) I received this message: {'code': 0,
'data': [{'failReason': '',
'fieldName': 'Embedding_Features',
'indexName': 'Embedding_Features',
'indexState': 'Finished',
'indexType': 'AUTOINDEX',
'indexedRows': 0,
'metricType': 'COSINE',
'pendingRows': 0,
'totalRows': 0}]} This indicated that the index type was set to AUTOINDEX, despite my configuration specifying HNSW, which caused issues with indexing. Afterwards, I inserted 10,000 rows into the collection: number_vectors = 10000
for i in range(0, number_vectors, 500):
response = insert_data_into_collection(
collection_name=collection_name,
start_id=i,
n=500,
ids_name="IDs_Features",
features_name="Embedding_Features",
server_address=server_address,
)
if response["data"]["insertCount"] == 500:
print(f"Great! inserted ids {i} to {i+500} successfully")
else:
print(f"There are some errors for {i}")
time.sleep(1) Finally, when I tried to create the index: create_index(collection_name=collection_name, server_address=server_address) I encountered the following error: {'code': 65535, 'message': 'only metric type can be passed when use AutoIndex'} Is it possible for me to set the indexType to HNSW and either ignore or remove the default AutoIndex value? Update 1 By adjusting the def create_index(collection_name: str, server_address: str):
url = f"{server_address}/v2/vectordb/indexes/create"
payload = json.dumps(
{
"collectionName": collection_name,
"indexParams": [
{
"metricType": "COSINE",
"fieldName": "Embedding_Features",
"params": {"M": 128, "efConstruction": 256, "index_type": "HNSW"},
}
],
}
)
# Example: {'metric_type': 'IP', 'index_type': 'HNSW', 'params': {'nlist': 1024, 'efConstruction': 40, 'M': 1024}}
headers = {
"x-api-key": "YOUR_API_KEY",
"Content-Type": "application/json",
}
response = requests.post(url, headers=headers, data=payload)
response = json.loads(response.text)
return response I encountered the following error: {'code': 65535,
'message': 'CreateIndex failed: at most one distinct index is allowed per field'} |
Beta Was this translation helpful? Give feedback.
Replies: 2 comments
-
@smellthemoon |
Beta Was this translation helpful? Give feedback.
-
It seems that you want to create a specific index when creating a collection. It is not recommended to use the quick create collection method here. If you do not specify the schema parameter, an autoindex will be automatically created for you when creating the collection. This cannot be changed later. You can follow the CUSTOM SETUP WITH INDEX instructions in https://milvus.io/api-reference/restful/v2.5.x/v2/Collection%20(v2)/Create.md, example here `export TOKEN="root:Milvus" curl --request POST |
Beta Was this translation helpful? Give feedback.
It seems that you want to create a specific index when creating a collection. It is not recommended to use the quick create collection method here. If you do not specify the schema parameter, an autoindex will be automatically created for you when creating the collection. This cannot be changed later. You can follow the CUSTOM SETUP WITH INDEX instructions in https://milvus.io/api-reference/restful/v2.5.x/v2/Collection%20(v2)/Create.md, example here `export TOKEN="root:Milvus"
curl --request POST
--url "${CLUSTER_ENDPOINT}/v2/vectordb/collections/create"
--header "Authorization: Bearer ${TOKEN}"
--header "Content-Type: application/json"
-d '{
"collectionName": "custom_setup_indexed",
…