From b9788dc2f167a51f8cf8a7644c2c85cef7d8a1da Mon Sep 17 00:00:00 2001
From: zhuwenxing <wenxing.zhu@zilliz.com>
Date: Fri, 8 Nov 2024 14:08:41 +0800
Subject: [PATCH] test: add rbac backup restore check in ci

Signed-off-by: zhuwenxing <wenxing.zhu@zilliz.com>
---
 example/prepare_data.py | 210 ++++++++++++++++++++--------------------
 example/verify_data.py  | 208 ++++++++++++++++++++-------------------
 2 files changed, 213 insertions(+), 205 deletions(-)

diff --git a/example/prepare_data.py b/example/prepare_data.py
index 531e0700..067e0303 100644
--- a/example/prepare_data.py
+++ b/example/prepare_data.py
@@ -15,108 +15,108 @@
     FieldSchema, CollectionSchema, DataType,
     Collection,
 )
-
-fmt = "\n=== {:30} ===\n"
-search_latency_fmt = "search latency = {:.4f}s"
-num_entities, dim = 3000, 8
-
-#################################################################################
-# 1. connect to Milvus
-# Add a new connection alias `default` for Milvus server in `localhost:19530`
-# Actually the "default" alias is a buildin in PyMilvus.
-# If the address of Milvus is the same as `localhost:19530`, you can omit all
-# parameters and call the method as: `connections.connect()`.
-#
-# Note: the `using` parameter of the following methods is default to "default".
-print(fmt.format("start connecting to Milvus"))
-
-host = os.environ.get('MILVUS_HOST')
-if host == None:
-    host = "localhost"
-print(fmt.format(f"Milvus host: {host}"))
-connections.connect("default", host=host, port="19530")
-
-has = utility.has_collection("hello_milvus")
-print(f"Does collection hello_milvus exist in Milvus: {has}")
-
-#################################################################################
-# 2. create collection
-# We're going to create a collection with 3 fields.
-# +-+------------+------------+------------------+------------------------------+
-# | | field name | field type | other attributes |       field description      |
-# +-+------------+------------+------------------+------------------------------+
-# |1|    "pk"    |    Int64   |  is_primary=True |      "primary field"         |
-# | |            |            |   auto_id=False  |                              |
-# +-+------------+------------+------------------+------------------------------+
-# |2|  "random"  |    Double  |                  |      "a double field"        |
-# +-+------------+------------+------------------+------------------------------+
-# |3|"embeddings"| FloatVector|     dim=8        |  "float vector with dim 8"   |
-# +-+------------+------------+------------------+------------------------------+
-fields = [
-    FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
-    FieldSchema(name="random", dtype=DataType.DOUBLE),
-    FieldSchema(name="var", dtype=DataType.VARCHAR, max_length=65535),
-    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
-]
-
-schema = CollectionSchema(fields, "hello_milvus")
-
-print(fmt.format("Create collection `hello_milvus`"))
-hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong")
-
-################################################################################
-# 3. insert data
-# We are going to insert 3000 rows of data into `hello_milvus`
-# Data to be inserted must be organized in fields.
-#
-# The insert() method returns:
-# - either automatically generated primary keys by Milvus if auto_id=True in the schema;
-# - or the existing primary key field from the entities if auto_id=False in the schema.
-
-print(fmt.format("Start inserting entities"))
-rng = np.random.default_rng(seed=19530)
-entities = [
-    # provide the pk field because `auto_id` is set to False
-    [i for i in range(num_entities)],
-    rng.random(num_entities).tolist(),  # field random, only supports list
-    [str(i) for i in range(num_entities)],
-    rng.random((num_entities, dim)),    # field embeddings, supports numpy.ndarray and list
-]
-
-insert_result = hello_milvus.insert(entities)
-hello_milvus.flush()
-print(f"Number of entities in hello_milvus: {hello_milvus.num_entities}")  # check the num_entites
-
-# create another collection
-fields2 = [
-    FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
-    FieldSchema(name="random", dtype=DataType.DOUBLE),
-    FieldSchema(name="var", dtype=DataType.VARCHAR, max_length=65535),
-    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
-]
-
-schema2 = CollectionSchema(fields2, "hello_milvus2")
-
-print(fmt.format("Create collection `hello_milvus2`"))
-hello_milvus2 = Collection("hello_milvus2", schema2, consistency_level="Strong")
-
-entities2 = [
-    rng.random(num_entities).tolist(),  # field random, only supports list
-    [str(i) for i in range(num_entities)],
-    rng.random((num_entities, dim)),    # field embeddings, supports numpy.ndarray and list
-]
-
-insert_result2 = hello_milvus2.insert(entities2)
-hello_milvus2.flush()
-insert_result2 = hello_milvus2.insert(entities2)
-hello_milvus2.flush()
-
-# index_params = {"index_type": "IVF_FLAT", "params": {"nlist": 128}, "metric_type": "L2"}
-# hello_milvus.create_index("embeddings", index_params)
-# hello_milvus2.create_index(field_name="var",index_name="scalar_index")
-
-# index_params2 = {"index_type": "Trie"}
-# hello_milvus2.create_index("var", index_params2)
-
-print(f"Number of entities in hello_milvus2: {hello_milvus2.num_entities}")  # check the num_entites
-
+import argparse
+
+
+
+def main(uri="http://127.0.0.1:19530", token="root:Milvus"):
+    fmt = "\n=== {:30} ===\n"
+    num_entities, dim = 3000, 8
+
+    #################################################################################
+    # 1. connect to Milvus
+    # Add a new connection alias `default` for Milvus server in `localhost:19530`
+    # Actually the "default" alias is a buildin in PyMilvus.
+    # If the address of Milvus is the same as `localhost:19530`, you can omit all
+    # parameters and call the method as: `connections.connect()`.
+    #
+    # Note: the `using` parameter of the following methods is default to "default".
+    print(fmt.format("start connecting to Milvus"))
+
+    print(fmt.format(f"Milvus uri: {uri}"))
+    connections.connect("default", uri=uri, token=token)
+
+    has = utility.has_collection("hello_milvus")
+    print(f"Does collection hello_milvus exist in Milvus: {has}")
+
+    #################################################################################
+    # 2. create collection
+    # We're going to create a collection with 3 fields.
+    # +-+------------+------------+------------------+------------------------------+
+    # | | field name | field type | other attributes |       field description      |
+    # +-+------------+------------+------------------+------------------------------+
+    # |1|    "pk"    |    Int64   |  is_primary=True |      "primary field"         |
+    # | |            |            |   auto_id=False  |                              |
+    # +-+------------+------------+------------------+------------------------------+
+    # |2|  "random"  |    Double  |                  |      "a double field"        |
+    # +-+------------+------------+------------------+------------------------------+
+    # |3|"embeddings"| FloatVector|     dim=8        |  "float vector with dim 8"   |
+    # +-+------------+------------+------------------+------------------------------+
+    fields = [
+        FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=False),
+        FieldSchema(name="random", dtype=DataType.DOUBLE),
+        FieldSchema(name="var", dtype=DataType.VARCHAR, max_length=65535),
+        FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
+    ]
+
+    schema = CollectionSchema(fields, "hello_milvus")
+
+    print(fmt.format("Create collection `hello_milvus`"))
+    hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong")
+
+    ################################################################################
+    # 3. insert data
+    # We are going to insert 3000 rows of data into `hello_milvus`
+    # Data to be inserted must be organized in fields.
+    #
+    # The insert() method returns:
+    # - either automatically generated primary keys by Milvus if auto_id=True in the schema;
+    # - or the existing primary key field from the entities if auto_id=False in the schema.
+
+    print(fmt.format("Start inserting entities"))
+    rng = np.random.default_rng(seed=19530)
+    entities = [
+        # provide the pk field because `auto_id` is set to False
+        [i for i in range(num_entities)],
+        rng.random(num_entities).tolist(),  # field random, only supports list
+        [str(i) for i in range(num_entities)],
+        rng.random((num_entities, dim)),  # field embeddings, supports numpy.ndarray and list
+    ]
+
+    insert_result = hello_milvus.insert(entities)
+    hello_milvus.flush()
+    print(f"Number of entities in hello_milvus: {hello_milvus.num_entities}")  # check the num_entites
+
+    # create another collection
+    fields2 = [
+        FieldSchema(name="pk", dtype=DataType.INT64, is_primary=True, auto_id=True),
+        FieldSchema(name="random", dtype=DataType.DOUBLE),
+        FieldSchema(name="var", dtype=DataType.VARCHAR, max_length=65535),
+        FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
+    ]
+
+    schema2 = CollectionSchema(fields2, "hello_milvus2")
+
+    print(fmt.format("Create collection `hello_milvus2`"))
+    hello_milvus2 = Collection("hello_milvus2", schema2, consistency_level="Strong")
+
+    entities2 = [
+        rng.random(num_entities).tolist(),  # field random, only supports list
+        [str(i) for i in range(num_entities)],
+        rng.random((num_entities, dim)),  # field embeddings, supports numpy.ndarray and list
+    ]
+
+    insert_result2 = hello_milvus2.insert(entities2)
+    hello_milvus2.flush()
+    insert_result2 = hello_milvus2.insert(entities2)
+    hello_milvus2.flush()
+
+    print(f"Number of entities in hello_milvus2: {hello_milvus2.num_entities}")  # check the num_entities
+
+
+if __name__ == "__main__":
+    args = argparse.ArgumentParser(description="prepare data")
+    args.add_argument("--uri", type=str, default="http://127.0.0.1:19530", help="Milvus server uri")
+    args.add_argument("--token", type=str, default="root:Milvus", help="Milvus server token")
+    args = args.parse_args()
+    main(args.uri, args.token)
diff --git a/example/verify_data.py b/example/verify_data.py
index df2058b0..01db7218 100644
--- a/example/verify_data.py
+++ b/example/verify_data.py
@@ -7,107 +7,115 @@
     FieldSchema, CollectionSchema, DataType,
     Collection,
 )
+import argparse
 
-fmt = "\n=== {:30} ===\n"
-search_latency_fmt = "search latency = {:.4f}s"
-num_entities, dim = 3000, 8
-rng = np.random.default_rng(seed=19530)
-entities = [
-    # provide the pk field because `auto_id` is set to False
-    [i for i in range(num_entities)],
-    rng.random(num_entities).tolist(),  # field random, only supports list
-    rng.random((num_entities, dim)),    # field embeddings, supports numpy.ndarray and list
-]
-
-################################################################################
-# 1. get recovered collection hello_milvus_recover
-print(fmt.format("start connecting to Milvus"))
-host = os.environ.get('MILVUS_HOST')
-if host == None:
-    host = "localhost"
-print(fmt.format(f"Milvus host: {host}"))
-connections.connect("default", host=host, port="19530")
-
-recover_collections = ["hello_milvus_recover", "hello_milvus2_recover"]
-
-for recover_collection_name in recover_collections:
-    has = utility.has_collection(recover_collection_name)
-    print(f"Does collection {recover_collection_name} exist in Milvus: {has}")
-    recover_collection = Collection(recover_collection_name)
-    print(recover_collection.schema)
-    recover_collection.flush()
-
-    print(f"Number of entities in Milvus: {recover_collection_name} : {recover_collection.num_entities}")  # check the num_entites
 
-    ################################################################################
-    # 4. create index
-    # We are going to create an IVF_FLAT index for hello_milvus_recover collection.
-    # create_index() can only be applied to `FloatVector` and `BinaryVector` fields.
-    print(fmt.format("Start Creating index IVF_FLAT"))
-    index = {
-        "index_type": "IVF_FLAT",
-        "metric_type": "L2",
-        "params": {"nlist": 128},
-    }
-
-    recover_collection.create_index("embeddings", index)
+def main(uri, token):
+    fmt = "\n=== {:30} ===\n"
+    search_latency_fmt = "search latency = {:.4f}s"
+    num_entities, dim = 3000, 8
+    rng = np.random.default_rng(seed=19530)
+    entities = [
+        # provide the pk field because `auto_id` is set to False
+        [i for i in range(num_entities)],
+        rng.random(num_entities).tolist(),  # field random, only supports list
+        rng.random((num_entities, dim)),    # field embeddings, supports numpy.ndarray and list
+    ]
 
     ################################################################################
-    # 5. search, query, and hybrid search
-    # After data were inserted into Milvus and indexed, you can perform:
-    # - search based on vector similarity
-    # - query based on scalar filtering(boolean, int, etc.)
-    # - hybrid search based on vector similarity and scalar filtering.
-    #
-
-    # Before conducting a search or a query, you need to load the data in `hello_milvus` into memory.
-    print(fmt.format("Start loading"))
-    recover_collection.load()
-
-    # -----------------------------------------------------------------------------
-    # search based on vector similarity
-    print(fmt.format("Start searching based on vector similarity"))
-    vectors_to_search = entities[-1][-2:]
-    search_params = {
-        "metric_type": "L2",
-        "params": {"nprobe": 10},
-    }
-
-    start_time = time.time()
-    result = recover_collection.search(vectors_to_search, "embeddings", search_params, limit=3, output_fields=["random"])
-    end_time = time.time()
-
-    for hits in result:
-        for hit in hits:
-            print(f"hit: {hit}, random field: {hit.entity.get('random')}")
-    print(search_latency_fmt.format(end_time - start_time))
-
-    # -----------------------------------------------------------------------------
-    # query based on scalar filtering(boolean, int, etc.)
-    print(fmt.format("Start querying with `random > 0.5`"))
-
-    start_time = time.time()
-    result = recover_collection.query(expr="random > 0.5", output_fields=["random", "embeddings"])
-    end_time = time.time()
-
-    print(f"query result:\n-{result[0]}")
-    print(search_latency_fmt.format(end_time - start_time))
-
-    # -----------------------------------------------------------------------------
-    # hybrid search
-    print(fmt.format("Start hybrid searching with `random > 0.5`"))
-
-    start_time = time.time()
-    result = recover_collection.search(vectors_to_search, "embeddings", search_params, limit=3, expr="random > 0.5", output_fields=["random"])
-    end_time = time.time()
-
-    for hits in result:
-        for hit in hits:
-            print(f"hit: {hit}, random field: {hit.entity.get('random')}")
-    print(search_latency_fmt.format(end_time - start_time))
-
-    ###############################################################################
-    # 7. drop collection
-    # Finally, drop the hello_milvus, hello_milvus_recover collection
-    print(fmt.format(f"Drop collection {recover_collection_name}"))
-    utility.drop_collection(recover_collection_name)
\ No newline at end of file
+    # 1. get recovered collection hello_milvus_recover
+    print(fmt.format("start connecting to Milvus"))
+    print(fmt.format(f"Milvus uri: {uri}"))
+    connections.connect("default", uri=uri, token=token)
+
+    recover_collections = ["hello_milvus_recover", "hello_milvus2_recover"]
+
+    for recover_collection_name in recover_collections:
+        has = utility.has_collection(recover_collection_name)
+        print(f"Does collection {recover_collection_name} exist in Milvus: {has}")
+        recover_collection = Collection(recover_collection_name)
+        print(recover_collection.schema)
+        recover_collection.flush()
+
+        print(f"Number of entities in Milvus: {recover_collection_name} : {recover_collection.num_entities}")  # check the num_entites
+
+        ################################################################################
+        # 4. create index
+        # We are going to create an IVF_FLAT index for hello_milvus_recover collection.
+        # create_index() can only be applied to `FloatVector` and `BinaryVector` fields.
+        print(fmt.format("Start Creating index IVF_FLAT"))
+        index = {
+            "index_type": "IVF_FLAT",
+            "metric_type": "L2",
+            "params": {"nlist": 128},
+        }
+
+        recover_collection.create_index("embeddings", index)
+
+        ################################################################################
+        # 5. search, query, and hybrid search
+        # After data were inserted into Milvus and indexed, you can perform:
+        # - search based on vector similarity
+        # - query based on scalar filtering(boolean, int, etc.)
+        # - hybrid search based on vector similarity and scalar filtering.
+        #
+
+        # Before conducting a search or a query, you need to load the data in `hello_milvus` into memory.
+        print(fmt.format("Start loading"))
+        recover_collection.load()
+
+        # -----------------------------------------------------------------------------
+        # search based on vector similarity
+        print(fmt.format("Start searching based on vector similarity"))
+        vectors_to_search = entities[-1][-2:]
+        search_params = {
+            "metric_type": "L2",
+            "params": {"nprobe": 10},
+        }
+
+        start_time = time.time()
+        result = recover_collection.search(vectors_to_search, "embeddings", search_params, limit=3, output_fields=["random"])
+        end_time = time.time()
+
+        for hits in result:
+            for hit in hits:
+                print(f"hit: {hit}, random field: {hit.entity.get('random')}")
+        print(search_latency_fmt.format(end_time - start_time))
+
+        # -----------------------------------------------------------------------------
+        # query based on scalar filtering(boolean, int, etc.)
+        print(fmt.format("Start querying with `random > 0.5`"))
+
+        start_time = time.time()
+        result = recover_collection.query(expr="random > 0.5", output_fields=["random", "embeddings"])
+        end_time = time.time()
+
+        print(f"query result:\n-{result[0]}")
+        print(search_latency_fmt.format(end_time - start_time))
+
+        # -----------------------------------------------------------------------------
+        # hybrid search
+        print(fmt.format("Start hybrid searching with `random > 0.5`"))
+
+        start_time = time.time()
+        result = recover_collection.search(vectors_to_search, "embeddings", search_params, limit=3, expr="random > 0.5", output_fields=["random"])
+        end_time = time.time()
+
+        for hits in result:
+            for hit in hits:
+                print(f"hit: {hit}, random field: {hit.entity.get('random')}")
+        print(search_latency_fmt.format(end_time - start_time))
+
+        ###############################################################################
+        # 7. drop collection
+        # Finally, drop the hello_milvus, hello_milvus_recover collection
+        print(fmt.format(f"Drop collection {recover_collection_name}"))
+        utility.drop_collection(recover_collection_name)
+
+
+if __name__ == "__main__":
+    args = argparse.ArgumentParser(description="verify data")
+    args.add_argument("--uri", type=str, default="http://127.0.0.1:19530", help="Milvus server uri")
+    args.add_argument("--token",type=str, default="root:Milvus", help="Milvus server token")
+    args = args.parse_args()
+    main(args.uri, args.token)
\ No newline at end of file