From 94893bc9879c374dbdfd13da5b6efc8fefc564f8 Mon Sep 17 00:00:00 2001 From: thomasht86 Date: Thu, 15 Aug 2024 08:27:57 +0200 Subject: [PATCH 1/2] add approved commit sha of action --- .github/workflows/integration-except-cloud.yml | 2 +- .github/workflows/notebooks-cloud.yml | 2 +- .github/workflows/notebooks-except-cloud.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/integration-except-cloud.yml b/.github/workflows/integration-except-cloud.yml index 1fbcd4ca..5460d9c1 100644 --- a/.github/workflows/integration-except-cloud.yml +++ b/.github/workflows/integration-except-cloud.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Free disk space - uses: jlumbroso/free-disk-space@main + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - uses: actions/checkout@v4 diff --git a/.github/workflows/notebooks-cloud.yml b/.github/workflows/notebooks-cloud.yml index 2c3a9a4d..988ba1a5 100644 --- a/.github/workflows/notebooks-cloud.yml +++ b/.github/workflows/notebooks-cloud.yml @@ -37,7 +37,7 @@ jobs: steps: - name: Free disk space - uses: jlumbroso/free-disk-space@main + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - uses: actions/checkout@v4 diff --git a/.github/workflows/notebooks-except-cloud.yml b/.github/workflows/notebooks-except-cloud.yml index 38bbff30..d197a3f8 100644 --- a/.github/workflows/notebooks-except-cloud.yml +++ b/.github/workflows/notebooks-except-cloud.yml @@ -33,7 +33,7 @@ jobs: notebook: ${{fromJson(needs.get-non-cloud-notebooks.outputs.notebooks)}} steps: - name: Free disk space - uses: jlumbroso/free-disk-space@main + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be - uses: actions/checkout@v4 From 116a62b9734d5cbd02e3dae3c95d49db63804498 Mon Sep 17 00:00:00 2001 From: thomasht86 Date: Thu, 15 Aug 2024 10:11:24 +0200 Subject: [PATCH 2/2] remove cli feeding in notebook --- .../examples/feed_performance_cloud.ipynb | 1680 ++--------------- 1 file changed, 158 insertions(+), 1522 deletions(-) diff --git a/docs/sphinx/source/examples/feed_performance_cloud.ipynb b/docs/sphinx/source/examples/feed_performance_cloud.ipynb index 85fc8e6e..358116be 100644 --- a/docs/sphinx/source/examples/feed_performance_cloud.ipynb +++ b/docs/sphinx/source/examples/feed_performance_cloud.ipynb @@ -228,37 +228,35 @@ "name": "stdout", "output_type": "stream", "text": [ - "Deployment started in run 3 of dev-aws-us-east-1c for vespa-team.feedperformancecloud. This may take a few minutes the first time.\n", - "INFO [08:04:48] Deploying platform version 8.387.10 and application dev build 3 for dev-aws-us-east-1c of default ...\n", - "INFO [08:04:48] Using CA signed certificate version 1\n", - "INFO [08:04:49] Using 1 nodes in container cluster 'feedperformancecloud_container'\n", - "WARNING [08:04:50] Auto-overriding validation which would be disallowed in production: certificate-removal: Data plane certificate(s) from cluster 'feedperformancecloud_container' is removed (removed certificates: [CN=cloud.vespa.example]) This can cause client connection issues.. To allow this add certificate-removal to validation-overrides.xml, see https://docs.vespa.ai/en/reference/validation-overrides.html\n", - "INFO [08:04:50] Using 1 nodes in container cluster 'feedperformancecloud_container'\n", - "WARNING [08:04:53] Auto-overriding validation which would be disallowed in production: certificate-removal: Data plane certificate(s) from cluster 'feedperformancecloud_container' is removed (removed certificates: [CN=cloud.vespa.example]) This can cause client connection issues.. To allow this add certificate-removal to validation-overrides.xml, see https://docs.vespa.ai/en/reference/validation-overrides.html\n", - "INFO [08:04:55] Session 303878 for tenant 'vespa-team' prepared and activated.\n", - "INFO [08:04:55] ######## Details for all nodes ########\n", - "INFO [08:04:55] h95731a.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", - "INFO [08:04:55] --- platform vespa/cloud-tenant-rhel8:8.387.10\n", - "INFO [08:04:55] --- container on port 4080 has not started \n", - "INFO [08:04:55] --- metricsproxy-container on port 19092 has config generation 303870, wanted is 303878\n", - "INFO [08:04:55] h95729b.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", - "INFO [08:04:55] --- platform vespa/cloud-tenant-rhel8:8.387.10\n", - "INFO [08:04:55] --- storagenode on port 19102 has config generation 303870, wanted is 303878\n", - "INFO [08:04:55] --- searchnode on port 19107 has config generation 303878, wanted is 303878\n", - "INFO [08:04:55] --- distributor on port 19111 has config generation 303878, wanted is 303878\n", - "INFO [08:04:55] --- metricsproxy-container on port 19092 has config generation 303878, wanted is 303878\n", - "INFO [08:04:55] h93272g.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", - "INFO [08:04:55] --- platform vespa/cloud-tenant-rhel8:8.387.10\n", - "INFO [08:04:55] --- logserver-container on port 4080 has config generation 303878, wanted is 303878\n", - "INFO [08:04:55] --- metricsproxy-container on port 19092 has config generation 303878, wanted is 303878\n", - "INFO [08:04:55] h93272h.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", - "INFO [08:04:55] --- platform vespa/cloud-tenant-rhel8:8.387.10\n", - "INFO [08:04:55] --- container-clustercontroller on port 19050 has config generation 303878, wanted is 303878\n", - "INFO [08:04:55] --- metricsproxy-container on port 19092 has config generation 303878, wanted is 303878\n", - "INFO [08:05:03] Found endpoints:\n", - "INFO [08:05:03] - dev.aws-us-east-1c\n", - "INFO [08:05:03] |-- https://b48e8812.bc737822.z.vespa-app.cloud/ (cluster 'feedperformancecloud_container')\n", - "INFO [08:05:04] Deployment of new application complete!\n", + "Deployment started in run 9 of dev-aws-us-east-1c for vespa-team.feedperformancecloud. This may take a few minutes the first time.\n", + "INFO [07:22:29] Deploying platform version 8.392.14 and application dev build 7 for dev-aws-us-east-1c of default ...\n", + "INFO [07:22:30] Using CA signed certificate version 1\n", + "INFO [07:22:30] Using 1 nodes in container cluster 'feedperformancecloud_container'\n", + "WARNING [07:22:33] Auto-overriding validation which would be disallowed in production: certificate-removal: Data plane certificate(s) from cluster 'feedperformancecloud_container' is removed (removed certificates: [CN=cloud.vespa.example]) This can cause client connection issues.. To allow this add certificate-removal to validation-overrides.xml, see https://docs.vespa.ai/en/reference/validation-overrides.html\n", + "INFO [07:22:34] Session 304192 for tenant 'vespa-team' prepared and activated.\n", + "INFO [07:22:35] ######## Details for all nodes ########\n", + "INFO [07:22:35] h95731a.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", + "INFO [07:22:35] --- platform vespa/cloud-tenant-rhel8:8.392.14\n", + "INFO [07:22:35] --- container on port 4080 has not started \n", + "INFO [07:22:35] --- metricsproxy-container on port 19092 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] h95729b.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", + "INFO [07:22:35] --- platform vespa/cloud-tenant-rhel8:8.392.14\n", + "INFO [07:22:35] --- storagenode on port 19102 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] --- searchnode on port 19107 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] --- distributor on port 19111 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] --- metricsproxy-container on port 19092 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] h93272g.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", + "INFO [07:22:35] --- platform vespa/cloud-tenant-rhel8:8.392.14\n", + "INFO [07:22:35] --- logserver-container on port 4080 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] --- metricsproxy-container on port 19092 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] h93272h.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n", + "INFO [07:22:35] --- platform vespa/cloud-tenant-rhel8:8.392.14\n", + "INFO [07:22:35] --- container-clustercontroller on port 19050 has config generation 304192, wanted is 304192\n", + "INFO [07:22:35] --- metricsproxy-container on port 19092 has config generation 304192, wanted is 304192\n", + "INFO [07:22:42] Found endpoints:\n", + "INFO [07:22:42] - dev.aws-us-east-1c\n", + "INFO [07:22:42] |-- https://b48e8812.bc737822.z.vespa-app.cloud/ (cluster 'feedperformancecloud_container')\n", + "INFO [07:22:44] Deployment of new application complete!\n", "Found mtls endpoint for feedperformancecloud_container\n", "URL: https://b48e8812.bc737822.z.vespa-app.cloud/\n", "Connecting to https://b48e8812.bc737822.z.vespa-app.cloud/\n", @@ -729,53 +727,34 @@ "output_type": "stream", "text": [ "Using mtls_key_cert Authentication against endpoint https://b48e8812.bc737822.z.vespa-app.cloud//ApplicationStatus\n", - "9.478203773498535\n", + "7.062151908874512\n", "Deleting data\n", "--------------------------------------------------\n", "Starting feed with params:\n", "FeedParams(name='5000_1_64_feed_async_iterable', num_docs=5000, max_connections=1, function_name='feed_async_iterable', max_workers=64, max_queue_size=2500)\n", - "32.890751123428345\n", + "20.979923963546753\n", "Deleting data\n", "--------------------------------------------------\n", "Starting feed with params:\n", "FeedParams(name='10000_1_64_feed_async_iterable', num_docs=10000, max_connections=1, function_name='feed_async_iterable', max_workers=64, max_queue_size=2500)\n", - "77.85460019111633\n", + "41.321199893951416\n", + "Deleting data\n", + "--------------------------------------------------\n", + "Starting feed with params:\n", + "FeedParams(name='1000_64_64_feed_iterable', num_docs=1000, max_connections=64, function_name='feed_iterable', max_workers=64, max_queue_size=2500)\n", + "16.278107166290283\n", + "Deleting data\n", + "--------------------------------------------------\n", + "Starting feed with params:\n", + "FeedParams(name='5000_64_64_feed_iterable', num_docs=5000, max_connections=64, function_name='feed_iterable', max_workers=64, max_queue_size=2500)\n", + "78.27990508079529\n", + "Deleting data\n", + "--------------------------------------------------\n", + "Starting feed with params:\n", + "FeedParams(name='10000_64_64_feed_iterable', num_docs=10000, max_connections=64, function_name='feed_iterable', max_workers=64, max_queue_size=2500)\n", + "156.38266611099243\n", "Deleting data\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Exception in thread Thread-7:\n", - "Traceback (most recent call last):\n", - " File \"/Users/thomas/.pyenv/versions/3.9.19/lib/python3.9/threading.py\", line 980, in _bootstrap_inner\n", - " self.run()\n", - " File \"/Users/thomas/.pyenv/versions/3.9.19/envs/pyvespa-dev/lib/python3.9/site-packages/ipykernel/ipkernel.py\", line 766, in run_closure\n", - " _threading_Thread_run(self)\n", - " File \"/Users/thomas/.pyenv/versions/3.9.19/lib/python3.9/threading.py\", line 917, in run\n", - " self._target(*self._args, **self._kwargs)\n", - " File \"/Users/thomas/Repos/pyvespa/vespa/application.py\", line 480, in _consumer\n", - " future: Future = executor.submit(_submit, doc, sync_session)\n", - " File \"/Users/thomas/.pyenv/versions/3.9.19/lib/python3.9/concurrent/futures/thread.py\", line 167, in submit\n", - " raise RuntimeError('cannot schedule new futures after shutdown')\n", - "RuntimeError: cannot schedule new futures after shutdown\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[17], line 22\u001b[0m\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDeleting data\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 21\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(\u001b[38;5;241m3\u001b[39m)\n\u001b[0;32m---> 22\u001b[0m \u001b[43mdelete_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mapp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n", - "Cell \u001b[0;32mIn[15], line 6\u001b[0m, in \u001b[0;36mdelete_data\u001b[0;34m(app, data)\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdelete_data\u001b[39m(app: Vespa, data: Iterable[Dict]):\n\u001b[0;32m----> 6\u001b[0m \u001b[43mapp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfeed_iterable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43miter\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdoc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mnamespace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mpyvespa-feed\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43moperation_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdelete\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallback\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallback\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_workers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[43mmax_connections\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Repos/pyvespa/vespa/application.py:579\u001b[0m, in \u001b[0;36mVespa.feed_iterable\u001b[0;34m(self, iter, schema, namespace, callback, operation_type, max_queue_size, max_workers, max_connections, **kwargs)\u001b[0m\n\u001b[1;32m 577\u001b[0m consumer_thread\u001b[38;5;241m.\u001b[39mstart()\n\u001b[1;32m 578\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28miter\u001b[39m:\n\u001b[0;32m--> 579\u001b[0m \u001b[43mqueue\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mput\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdoc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 580\u001b[0m queue\u001b[38;5;241m.\u001b[39mput(\u001b[38;5;28;01mNone\u001b[39;00m, block\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 581\u001b[0m queue\u001b[38;5;241m.\u001b[39mjoin()\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/queue.py:140\u001b[0m, in \u001b[0;36mQueue.put\u001b[0;34m(self, item, block, timeout)\u001b[0m\n\u001b[1;32m 138\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_qsize() \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmaxsize:\n\u001b[0;32m--> 140\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_full\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m timeout \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtimeout\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m must be a non-negative number\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/threading.py:312\u001b[0m, in \u001b[0;36mCondition.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 312\u001b[0m \u001b[43mwaiter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 313\u001b[0m gotit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] } ], "source": [ @@ -849,8 +828,8 @@ " feed_async_iterable\n", " 64\n", " 2500\n", - " 9.478204\n", - " 105.505223\n", + " 7.062152\n", + " 141.599899\n", " \n", " \n", " 1\n", @@ -860,8 +839,8 @@ " feed_async_iterable\n", " 64\n", " 2500\n", - " 32.890751\n", - " 152.018419\n", + " 20.979924\n", + " 238.323075\n", " \n", " \n", " 2\n", @@ -871,8 +850,41 @@ " feed_async_iterable\n", " 64\n", " 2500\n", - " 77.854600\n", - " 128.444562\n", + " 41.321200\n", + " 242.006525\n", + " \n", + " \n", + " 3\n", + " 1000_64_64_feed_iterable\n", + " 1000\n", + " 64\n", + " feed_iterable\n", + " 64\n", + " 2500\n", + " 16.278107\n", + " 61.432204\n", + " \n", + " \n", + " 4\n", + " 5000_64_64_feed_iterable\n", + " 5000\n", + " 64\n", + " feed_iterable\n", + " 64\n", + " 2500\n", + " 78.279905\n", + " 63.873353\n", + " \n", + " \n", + " 5\n", + " 10000_64_64_feed_iterable\n", + " 10000\n", + " 64\n", + " feed_iterable\n", + " 64\n", + " 2500\n", + " 156.382666\n", + " 63.945706\n", " \n", " \n", "\n", @@ -883,16 +895,25 @@ "0 1000_1_64_feed_async_iterable 1000 1 \n", "1 5000_1_64_feed_async_iterable 5000 1 \n", "2 10000_1_64_feed_async_iterable 10000 1 \n", + "3 1000_64_64_feed_iterable 1000 64 \n", + "4 5000_64_64_feed_iterable 5000 64 \n", + "5 10000_64_64_feed_iterable 10000 64 \n", "\n", - " function_name max_workers max_queue_size feed_time \\\n", - "0 feed_async_iterable 64 2500 9.478204 \n", - "1 feed_async_iterable 64 2500 32.890751 \n", - "2 feed_async_iterable 64 2500 77.854600 \n", + " function_name max_workers max_queue_size feed_time \\\n", + "0 feed_async_iterable 64 2500 7.062152 \n", + "1 feed_async_iterable 64 2500 20.979924 \n", + "2 feed_async_iterable 64 2500 41.321200 \n", + "3 feed_iterable 64 2500 16.278107 \n", + "4 feed_iterable 64 2500 78.279905 \n", + "5 feed_iterable 64 2500 156.382666 \n", "\n", " requests_per_second \n", - "0 105.505223 \n", - "1 152.018419 \n", - "2 128.444562 " + "0 141.599899 \n", + "1 238.323075 \n", + "2 242.006525 \n", + "3 61.432204 \n", + "4 63.873353 \n", + "5 63.945706 " ] }, "execution_count": 18, @@ -921,7 +942,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 19, "id": "b94ef835", "metadata": {}, "outputs": [ @@ -970,9 +991,53 @@ ], "xaxis": "x", "y": [ - 105.5052227085519, - 152.01841944066945, - 128.44456172727297 + 141.59989942206852, + 238.32307536898847, + 242.00652511699684 + ], + "yaxis": "y" + }, + { + "customdata": [ + [ + 64, + 2500, + 64 + ], + [ + 64, + 2500, + 64 + ], + [ + 64, + 2500, + 64 + ] + ], + "hovertemplate": "function_name=feed_iterable
Number of Documents=%{x}
Requests per Second=%{y}
max_workers=%{customdata[0]}
max_queue_size=%{customdata[1]}
max_connections=%{customdata[2]}", + "legendgroup": "feed_iterable", + "marker": { + "color": "#EF553B", + "opacity": 0.7, + "size": 12, + "symbol": "circle" + }, + "mode": "markers", + "name": "feed_iterable", + "orientation": "v", + "showlegend": true, + "type": "scatter", + "x": [ + 1000, + 5000, + 10000 + ], + "xaxis": "x", + "y": [ + 61.43220398934725, + 63.87335287184283, + 63.94570605991914 ], "yaxis": "y" } @@ -1919,1460 +1984,31 @@ "source": [ "Interesting. Let's try to summarize the insights we got from this experiment:\n", "\n", - "- The `feed_async_iterable` method is approximately 3x faster than the `feed_iterable` method.\n", - "- Note that this will vary depending on the network latency between the client and the Vespa instance.\n" - ] - }, - { - "cell_type": "markdown", - "id": "5630e540", - "metadata": {}, - "source": [ - "## Feeding with Vespa CLI\n", - "\n", - "[Vespa CLI](https://docs.vespa.ai/en/vespa-cli) is a command-line interface for interacting with Vespa.\n", - "\n", - "Among many useful features are a `vespa feed` command that is the recommended way of feeding large datasets into Vespa.\n", - "This is optimized for high feeding performance, and it will be interesting to get a feel for how performant feeding to a local Vespa instance is using the CLI.\n", - "\n", - "Note that comparing feeding with the CLI is not entirely fair, as the CLI relies on prepared data files, while the pyvespa methods are streaming the data before feeding it.\n" - ] - }, - { - "cell_type": "markdown", - "id": "f82d48fd", - "metadata": {}, - "source": [ - "## Prepare the data for Vespa CLI\n", - "\n", - "Vespa CLI can feed data from either many .json files or a single .jsonl file with many documents.\n", - "The json format needs to be in the following format:\n", - "\n", - "```json\n", - "{\n", - " \"put\": \"id:namespace:document-type::document-id\",\n", - " \"fields\": {\n", - " \"field1\": \"value1\",\n", - " \"field2\": \"value2\"\n", - " }\n", - "}\n", - "```\n", - "\n", - "Where, `put` is the document operation in this case. Other allowed operations are `get`, `update` and `remove`.\n", - "\n", - "For reference, see https://docs.vespa.ai/en/vespa-cli#cheat-sheet\n", - "\n", - "### Getting the datasets as .jsonl files\n", - "\n", - "Now, let`s save the dataset to 3 different jsonl files of 1k, 5k, and 10k documents.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "9b377ee3", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Getting dataset with 1000 docs...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Creating json from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 164.05ba/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Getting dataset with 5000 docs...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Creating json from Arrow format: 100%|██████████| 5/5 [00:00<00:00, 301.13ba/s]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Getting dataset with 10000 docs...\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Creating json from Arrow format: 100%|██████████| 10/10 [00:00<00:00, 302.73ba/s]\n" - ] - } - ], - "source": [ - "for n in num_docs:\n", - " print(f\"Getting dataset with {n} docs...\")\n", - " # First, let's load the dataset in non-streaming mode this time, as we want to save it to a jsonl file\n", - " dataset_cli = load_dataset(\n", - " \"Cohere/wikipedia-2023-11-embed-multilingual-v3\",\n", - " \"simple\",\n", - " split=f\"train[:{n}]\", # Notice the slicing here, see https://huggingface.co/docs/datasets/loading#slice-splits\n", - " streaming=False,\n", - " )\n", - " # Map to the format expected by the CLI.\n", - " # Note that this differs a little bit from the format expected by the Python API.\n", - " dataset_cli = dataset_cli.map(\n", - " lambda x: {\n", - " \"put\": f\"id:pyvespa-feed:doc::{x['_id']}-json\",\n", - " \"fields\": {\"text\": x[\"text\"]},\n", - " }\n", - " ).select_columns([\"put\", \"fields\"])\n", - " # Save to a jsonl file\n", - " assert len(dataset_cli) == n\n", - " dataset_cli.to_json(f\"vespa_feed-{n}.json\", orient=\"records\", lines=True)" + "- The `feed_async_iterable` method is approximately 3x faster than the `feed_iterable` method for this specific setup.\n", + "- Note that this will vary depending on the network latency between the client and the Vespa instance.\n", + "- If you are feeding from a cloud instance with less latency to the Vespa instance, the difference between the methods will be less, and the `feed_iterable` method might even be faster.\n" ] }, { "cell_type": "markdown", - "id": "fe173828", - "metadata": {}, - "source": [ - "Let's look at the first line of one of the saved files to verify the format.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "cd95d014", + "id": "2d91581b", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'fields': {'text': 'April (Apr.) is the fourth month of the year in the '\n", - " 'Julian and Gregorian calendars, and comes between March '\n", - " 'and May. It is one of the four months to have 30 days.'},\n", - " 'put': 'id:pyvespa-feed:doc::20231101.simple_1_0-json'}\n" - ] - } - ], "source": [ - "from pprint import pprint\n", - "import json\n", - "\n", - "with open(\"vespa_feed-1000.json\", \"r\") as f:\n", - " sample = f.readline()\n", - " pprint(json.loads(sample))" + "- Still prefer to use the [Vespa CLI](https://docs.vespa.ai/en/vespa-cli) if you _really_ care about performance. 🚀\n", + "- If you want to use pyvespa, prefer the `feed_async_iterable`- method, if you are I/O-bound.\n" ] }, { "cell_type": "markdown", - "id": "68374774", - "metadata": {}, - "source": [ - "Ok, now we are ready to feed the data using Vespa CLI.\n", - "We also want to capture the output of feed statistics for each file.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "2ea17d30", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Feeding 1000 docs...\n", - "{'feeder.error.count': 0,\n", - " 'feeder.inflight.count': 0,\n", - " 'feeder.ok.count': 1000,\n", - " 'feeder.ok.rate': 196.932,\n", - " 'feeder.operation.count': 1000,\n", - " 'feeder.seconds': 5.078,\n", - " 'http.exception.count': 0,\n", - " 'http.request.MBps': 0.058,\n", - " 'http.request.bytes': 293011,\n", - " 'http.request.count': 1000,\n", - " 'http.response.MBps': 0.025,\n", - " 'http.response.bytes': 129388,\n", - " 'http.response.code.counts': {'200': 1000},\n", - " 'http.response.count': 1000,\n", - " 'http.response.error.count': 0,\n", - " 'http.response.latency.millis.avg': 141,\n", - " 'http.response.latency.millis.max': 624,\n", - " 'http.response.latency.millis.min': 127}\n", - "Feeding 5000 docs...\n", - "{'feeder.error.count': 0,\n", - " 'feeder.inflight.count': 0,\n", - " 'feeder.ok.count': 5000,\n", - " 'feeder.ok.rate': 302.62,\n", - " 'feeder.operation.count': 5000,\n", - " 'feeder.seconds': 16.522,\n", - " 'http.exception.count': 0,\n", - " 'http.request.MBps': 0.088,\n", - " 'http.request.bytes': 1450480,\n", - " 'http.request.count': 5000,\n", - " 'http.response.MBps': 0.04,\n", - " 'http.response.bytes': 652778,\n", - " 'http.response.code.counts': {'200': 5000},\n", - " 'http.response.count': 5000,\n", - " 'http.response.error.count': 0,\n", - " 'http.response.latency.millis.avg': 135,\n", - " 'http.response.latency.millis.max': 532,\n", - " 'http.response.latency.millis.min': 125}\n", - "Feeding 10000 docs...\n", - "{'feeder.error.count': 0,\n", - " 'feeder.inflight.count': 0,\n", - " 'feeder.ok.count': 10000,\n", - " 'feeder.ok.rate': 660.536,\n", - " 'feeder.operation.count': 10000,\n", - " 'feeder.seconds': 15.139,\n", - " 'http.exception.count': 0,\n", - " 'http.request.MBps': 0.192,\n", - " 'http.request.bytes': 2905878,\n", - " 'http.request.count': 10000,\n", - " 'http.response.MBps': 0.087,\n", - " 'http.response.bytes': 1317226,\n", - " 'http.response.code.counts': {'200': 10000},\n", - " 'http.response.count': 10000,\n", - " 'http.response.error.count': 0,\n", - " 'http.response.latency.millis.avg': 139,\n", - " 'http.response.latency.millis.max': 563,\n", - " 'http.response.latency.millis.min': 125}\n" - ] - } - ], - "source": [ - "import subprocess\n", - "\n", - "cli_results = {}\n", - "for n in num_docs:\n", - " print(f\"Feeding {n} docs...\")\n", - " # Run the CLI command to feed the data\n", - " command = f\"vespa feed vespa_feed-{n}.json\"\n", - " results = subprocess.run(command, shell=True, capture_output=True, text=True)\n", - " result_dict = json.loads(results.stdout)\n", - " pprint(result_dict)\n", - " cli_results[n] = result_dict" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "be2c18ae", + "id": "28591491", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{1000: {'feeder.operation.count': 1000,\n", - " 'feeder.seconds': 5.078,\n", - " 'feeder.ok.count': 1000,\n", - " 'feeder.ok.rate': 196.932,\n", - " 'feeder.error.count': 0,\n", - " 'feeder.inflight.count': 0,\n", - " 'http.request.count': 1000,\n", - " 'http.request.bytes': 293011,\n", - " 'http.request.MBps': 0.058,\n", - " 'http.exception.count': 0,\n", - " 'http.response.count': 1000,\n", - " 'http.response.bytes': 129388,\n", - " 'http.response.MBps': 0.025,\n", - " 'http.response.error.count': 0,\n", - " 'http.response.latency.millis.min': 127,\n", - " 'http.response.latency.millis.avg': 141,\n", - " 'http.response.latency.millis.max': 624,\n", - " 'http.response.code.counts': {'200': 1000}},\n", - " 5000: {'feeder.operation.count': 5000,\n", - " 'feeder.seconds': 16.522,\n", - " 'feeder.ok.count': 5000,\n", - " 'feeder.ok.rate': 302.62,\n", - " 'feeder.error.count': 0,\n", - " 'feeder.inflight.count': 0,\n", - " 'http.request.count': 5000,\n", - " 'http.request.bytes': 1450480,\n", - " 'http.request.MBps': 0.088,\n", - " 'http.exception.count': 0,\n", - " 'http.response.count': 5000,\n", - " 'http.response.bytes': 652778,\n", - " 'http.response.MBps': 0.04,\n", - " 'http.response.error.count': 0,\n", - " 'http.response.latency.millis.min': 125,\n", - " 'http.response.latency.millis.avg': 135,\n", - " 'http.response.latency.millis.max': 532,\n", - " 'http.response.code.counts': {'200': 5000}},\n", - " 10000: {'feeder.operation.count': 10000,\n", - " 'feeder.seconds': 15.139,\n", - " 'feeder.ok.count': 10000,\n", - " 'feeder.ok.rate': 660.536,\n", - " 'feeder.error.count': 0,\n", - " 'feeder.inflight.count': 0,\n", - " 'http.request.count': 10000,\n", - " 'http.request.bytes': 2905878,\n", - " 'http.request.MBps': 0.192,\n", - " 'http.exception.count': 0,\n", - " 'http.response.count': 10000,\n", - " 'http.response.bytes': 1317226,\n", - " 'http.response.MBps': 0.087,\n", - " 'http.response.error.count': 0,\n", - " 'http.response.latency.millis.min': 125,\n", - " 'http.response.latency.millis.avg': 139,\n", - " 'http.response.latency.millis.max': 563,\n", - " 'http.response.code.counts': {'200': 10000}}}" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "cli_results" + "## Cleanup\n" ] }, { "cell_type": "code", "execution_count": 26, - "id": "812bac07", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
namenum_docsmax_connectionsfunction_namemax_workersmax_queue_sizefeed_timerequests_per_second
01000_cli1000unknowncliunknownn/a5.078196.927924
15000_cli5000unknowncliunknownn/a16.522302.626801
210000_cli10000unknowncliunknownn/a15.139660.545611
\n", - "
" - ], - "text/plain": [ - " name num_docs max_connections function_name max_workers \\\n", - "0 1000_cli 1000 unknown cli unknown \n", - "1 5000_cli 5000 unknown cli unknown \n", - "2 10000_cli 10000 unknown cli unknown \n", - "\n", - " max_queue_size feed_time requests_per_second \n", - "0 n/a 5.078 196.927924 \n", - "1 n/a 16.522 302.626801 \n", - "2 n/a 15.139 660.545611 " - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# Let's add the CLI results to the DataFrame\n", - "df_cli = pd.DataFrame(\n", - " [\n", - " {\n", - " \"name\": f\"{n}_cli\",\n", - " \"num_docs\": n,\n", - " \"max_connections\": \"unknown\",\n", - " \"function_name\": \"cli\",\n", - " \"max_workers\": \"unknown\",\n", - " \"max_queue_size\": \"n/a\",\n", - " \"feed_time\": result[\"feeder.seconds\"],\n", - " }\n", - " for n, result in cli_results.items()\n", - " ]\n", - ")\n", - "df_cli[\"requests_per_second\"] = df_cli[\"num_docs\"] / df_cli[\"feed_time\"]\n", - "df_cli" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "b3395710", - "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.plotly.v1+json": { - "config": { - "plotlyServerURL": "https://plot.ly" - }, - "data": [ - { - "customdata": [ - [ - 64, - 2500, - 1 - ], - [ - 64, - 2500, - 1 - ], - [ - 64, - 2500, - 1 - ] - ], - "hovertemplate": "function_name=feed_async_iterable
Number of Documents=%{x}
Requests per Second=%{y}
max_workers=%{customdata[0]}
max_queue_size=%{customdata[1]}
max_connections=%{customdata[2]}", - "legendgroup": "feed_async_iterable", - "marker": { - "color": "#636efa", - "opacity": 0.7, - "size": 12, - "symbol": "circle" - }, - "mode": "markers", - "name": "feed_async_iterable", - "orientation": "v", - "showlegend": true, - "type": "scatter", - "x": [ - 1000, - 5000, - 10000 - ], - "xaxis": "x", - "y": [ - 105.5052227085519, - 152.01841944066945, - 128.44456172727297 - ], - "yaxis": "y" - }, - { - "customdata": [ - [ - "unknown", - "n/a", - "unknown" - ], - [ - "unknown", - "n/a", - "unknown" - ], - [ - "unknown", - "n/a", - "unknown" - ] - ], - "hovertemplate": "function_name=cli
Number of Documents=%{x}
Requests per Second=%{y}
max_workers=%{customdata[0]}
max_queue_size=%{customdata[1]}
max_connections=%{customdata[2]}", - "legendgroup": "cli", - "marker": { - "color": "#EF553B", - "opacity": 0.7, - "size": 12, - "symbol": "circle" - }, - "mode": "markers", - "name": "cli", - "orientation": "v", - "showlegend": true, - "type": "scatter", - "x": [ - 1000, - 5000, - 10000 - ], - "xaxis": "x", - "y": [ - 196.92792437967702, - 302.6268006294638, - 660.545610674417 - ], - "yaxis": "y" - } - ], - "layout": { - "font": { - "size": 16 - }, - "legend": { - "title": { - "font": { - "size": 16 - }, - "text": "Function Details" - }, - "tracegroupgap": 0, - "x": 800, - "xanchor": "auto", - "y": 1, - "yanchor": "auto" - }, - "template": { - "data": { - "bar": [ - { - "error_x": { - "color": "#2a3f5f" - }, - "error_y": { - "color": "#2a3f5f" - }, - "marker": { - "line": { - "color": "white", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "bar" - } - ], - "barpolar": [ - { - "marker": { - "line": { - "color": "white", - "width": 0.5 - }, - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "barpolar" - } - ], - "carpet": [ - { - "aaxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "#C8D4E3", - "linecolor": "#C8D4E3", - "minorgridcolor": "#C8D4E3", - "startlinecolor": "#2a3f5f" - }, - "baxis": { - "endlinecolor": "#2a3f5f", - "gridcolor": "#C8D4E3", - "linecolor": "#C8D4E3", - "minorgridcolor": "#C8D4E3", - "startlinecolor": "#2a3f5f" - }, - "type": "carpet" - } - ], - "choropleth": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "choropleth" - } - ], - "contour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "contour" - } - ], - "contourcarpet": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "contourcarpet" - } - ], - "heatmap": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmap" - } - ], - "heatmapgl": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "heatmapgl" - } - ], - "histogram": [ - { - "marker": { - "pattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - } - }, - "type": "histogram" - } - ], - "histogram2d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2d" - } - ], - "histogram2dcontour": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "histogram2dcontour" - } - ], - "mesh3d": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "type": "mesh3d" - } - ], - "parcoords": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "parcoords" - } - ], - "pie": [ - { - "automargin": true, - "type": "pie" - } - ], - "scatter": [ - { - "fillpattern": { - "fillmode": "overlay", - "size": 10, - "solidity": 0.2 - }, - "type": "scatter" - } - ], - "scatter3d": [ - { - "line": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatter3d" - } - ], - "scattercarpet": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattercarpet" - } - ], - "scattergeo": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergeo" - } - ], - "scattergl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattergl" - } - ], - "scattermapbox": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scattermapbox" - } - ], - "scatterpolar": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolar" - } - ], - "scatterpolargl": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterpolargl" - } - ], - "scatterternary": [ - { - "marker": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "type": "scatterternary" - } - ], - "surface": [ - { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - }, - "colorscale": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "type": "surface" - } - ], - "table": [ - { - "cells": { - "fill": { - "color": "#EBF0F8" - }, - "line": { - "color": "white" - } - }, - "header": { - "fill": { - "color": "#C8D4E3" - }, - "line": { - "color": "white" - } - }, - "type": "table" - } - ] - }, - "layout": { - "annotationdefaults": { - "arrowcolor": "#2a3f5f", - "arrowhead": 0, - "arrowwidth": 1 - }, - "autotypenumbers": "strict", - "coloraxis": { - "colorbar": { - "outlinewidth": 0, - "ticks": "" - } - }, - "colorscale": { - "diverging": [ - [ - 0, - "#8e0152" - ], - [ - 0.1, - "#c51b7d" - ], - [ - 0.2, - "#de77ae" - ], - [ - 0.3, - "#f1b6da" - ], - [ - 0.4, - "#fde0ef" - ], - [ - 0.5, - "#f7f7f7" - ], - [ - 0.6, - "#e6f5d0" - ], - [ - 0.7, - "#b8e186" - ], - [ - 0.8, - "#7fbc41" - ], - [ - 0.9, - "#4d9221" - ], - [ - 1, - "#276419" - ] - ], - "sequential": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ], - "sequentialminus": [ - [ - 0, - "#0d0887" - ], - [ - 0.1111111111111111, - "#46039f" - ], - [ - 0.2222222222222222, - "#7201a8" - ], - [ - 0.3333333333333333, - "#9c179e" - ], - [ - 0.4444444444444444, - "#bd3786" - ], - [ - 0.5555555555555556, - "#d8576b" - ], - [ - 0.6666666666666666, - "#ed7953" - ], - [ - 0.7777777777777778, - "#fb9f3a" - ], - [ - 0.8888888888888888, - "#fdca26" - ], - [ - 1, - "#f0f921" - ] - ] - }, - "colorway": [ - "#636efa", - "#EF553B", - "#00cc96", - "#ab63fa", - "#FFA15A", - "#19d3f3", - "#FF6692", - "#B6E880", - "#FF97FF", - "#FECB52" - ], - "font": { - "color": "#2a3f5f" - }, - "geo": { - "bgcolor": "white", - "lakecolor": "white", - "landcolor": "white", - "showlakes": true, - "showland": true, - "subunitcolor": "#C8D4E3" - }, - "hoverlabel": { - "align": "left" - }, - "hovermode": "closest", - "mapbox": { - "style": "light" - }, - "paper_bgcolor": "white", - "plot_bgcolor": "white", - "polar": { - "angularaxis": { - "gridcolor": "#EBF0F8", - "linecolor": "#EBF0F8", - "ticks": "" - }, - "bgcolor": "white", - "radialaxis": { - "gridcolor": "#EBF0F8", - "linecolor": "#EBF0F8", - "ticks": "" - } - }, - "scene": { - "xaxis": { - "backgroundcolor": "white", - "gridcolor": "#DFE8F3", - "gridwidth": 2, - "linecolor": "#EBF0F8", - "showbackground": true, - "ticks": "", - "zerolinecolor": "#EBF0F8" - }, - "yaxis": { - "backgroundcolor": "white", - "gridcolor": "#DFE8F3", - "gridwidth": 2, - "linecolor": "#EBF0F8", - "showbackground": true, - "ticks": "", - "zerolinecolor": "#EBF0F8" - }, - "zaxis": { - "backgroundcolor": "white", - "gridcolor": "#DFE8F3", - "gridwidth": 2, - "linecolor": "#EBF0F8", - "showbackground": true, - "ticks": "", - "zerolinecolor": "#EBF0F8" - } - }, - "shapedefaults": { - "line": { - "color": "#2a3f5f" - } - }, - "ternary": { - "aaxis": { - "gridcolor": "#DFE8F3", - "linecolor": "#A2B1C6", - "ticks": "" - }, - "baxis": { - "gridcolor": "#DFE8F3", - "linecolor": "#A2B1C6", - "ticks": "" - }, - "bgcolor": "white", - "caxis": { - "gridcolor": "#DFE8F3", - "linecolor": "#A2B1C6", - "ticks": "" - } - }, - "title": { - "x": 0.05 - }, - "xaxis": { - "automargin": true, - "gridcolor": "#EBF0F8", - "linecolor": "#EBF0F8", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "#EBF0F8", - "zerolinewidth": 2 - }, - "yaxis": { - "automargin": true, - "gridcolor": "#EBF0F8", - "linecolor": "#EBF0F8", - "ticks": "", - "title": { - "standoff": 15 - }, - "zerolinecolor": "#EBF0F8", - "zerolinewidth": 2 - } - } - }, - "title": { - "text": "Performance: Requests per Second vs. Number of Documents" - }, - "width": 800, - "xaxis": { - "anchor": "y", - "domain": [ - 0, - 1 - ], - "ticktext": [ - "1k", - "5k", - "10k" - ], - "tickvals": [ - 1000, - 5000, - 10000 - ], - "title": { - "text": "Number of Documents" - }, - "type": "log" - }, - "yaxis": { - "anchor": "x", - "domain": [ - 0, - 1 - ], - "title": { - "text": "Requests per Second" - } - } - } - } - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "df_total = pd.concat([df, df_cli])\n", - "\n", - "plot_performance(df_total)" - ] - }, - { - "cell_type": "markdown", - "id": "1f745d73", - "metadata": {}, - "source": [ - "As you can tell, the CLI is still almost 2x faster than the `feed_async_iterable` method.\n", - "\n", - "We might improve the performance of the `feed_async_iterable` method by introducing parallelism (threading) for that method as well.\n" - ] - }, - { - "cell_type": "markdown", - "id": "18fc6282", - "metadata": {}, - "source": [ - "## Conclusion\n" - ] - }, - { - "cell_type": "markdown", - "id": "2d91581b", - "metadata": {}, - "source": [ - "- Prefer to use the CLI if you care about performance. 🚀\n", - "- If you want to use pyvespa, prefer the `feed_async_iterable`- method, if you are I/O-bound.\n" - ] - }, - { - "cell_type": "markdown", - "id": "28591491", - "metadata": {}, - "source": [ - "## Cleanup\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, "id": "e5064bd2", "metadata": {}, "outputs": [