From dc50cbab6b43c23fde0b0c4030df0611b4bbdc26 Mon Sep 17 00:00:00 2001 From: Jo Kristian Bergum Date: Thu, 19 Oct 2023 21:38:15 +0200 Subject: [PATCH] Reduce number of operations to be within test timeout --- vespa/test_integration_vespa_cloud_vector_search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vespa/test_integration_vespa_cloud_vector_search.py b/vespa/test_integration_vespa_cloud_vector_search.py index 56a7bcc5..5c3d22fe 100644 --- a/vespa/test_integration_vespa_cloud_vector_search.py +++ b/vespa/test_integration_vespa_cloud_vector_search.py @@ -80,7 +80,7 @@ def test_vector_indexing_and_query(self): self.assertEqual(200, self.app.get_application_status().status_code) from datasets import load_dataset - sample_size = 100000 + sample_size = 10000 # streaming=True pages the data from S3. This is needed to avoid memory issues when loading the dataset. dataset = load_dataset("KShivendu/dbpedia-entities-openai-1M", split="train", streaming=True).take(sample_size) # Map does not page, this allows chaining of maps where the lambda is yielding the next document. @@ -103,7 +103,7 @@ def callback(response:VespaResponse, id:str): callbacks +=1 start = time.time() - self.app.feed_iterable(iter=docs, schema="vector", namespace="benchmark", callback=callback, max_workers=48, max_connections=48) + self.app.feed_iterable(iter=docs, schema="vector", namespace="benchmark", callback=callback, max_workers=48, max_connections=48, max_queue_size=4000) self.assertEqual(ok, sample_size) duration = time.time() - start docs_per_second = sample_size / duration @@ -124,12 +124,12 @@ def callback(response:VespaResponse, id:str): ok = 0 callbacks = 0 start_time = time.time() - dataset = load_dataset("KShivendu/dbpedia-entities-openai-1M", split="train", streaming=True).take(10000) + dataset = load_dataset("KShivendu/dbpedia-entities-openai-1M", split="train", streaming=True).take(100) feed_with_wrong_field = dataset.map(lambda x: {"id": x["_id"], "fields": {"id": x["_id"], "vector":x["openai"]}}) faulty_docs = list(feed_with_wrong_field) self.app.feed_iterable(iter=faulty_docs, schema="vector", namespace="benchmark", callback=callback, max_workers=48, max_connections=48) self.assertEqual(ok, 0) - self.assertEqual(callbacks, 10000) + self.assertEqual(callbacks, 100) # Async test to compare time ok = 0