initial commit of ch2 lab

vespa-engine · Sep 17, 2024 · fcd66dd · fcd66dd
1 parent 5273d70
commit fcd66dd
Show file tree

Hide file tree

Showing 7 changed files with 25,396 additions and 0 deletions.
diff --git a/examples/training-artifacts/101/ch2/ecommerce/.vespaignore b/examples/training-artifacts/101/ch2/ecommerce/.vespaignore
@@ -0,0 +1,7 @@
+# This file excludes unnecessary files from the application package. See
+# https://docs.vespa.ai/en/reference/vespaignore.html for more information.
+.DS_Store
+.gitignore
+README.md
+ext/
+**/*.bak
diff --git a/examples/training-artifacts/101/ch2/ecommerce/ext/generate_jsonl.py b/examples/training-artifacts/101/ch2/ecommerce/ext/generate_jsonl.py
@@ -0,0 +1,21 @@
+import csv
+import json
+
+csv_file_path = '/Users/radu/gits/sample-apps/examples/training-artifacts/101/ch2/ecommerce/ext/myntra_products_catalog.csv'
+jsonl_file_path = '/Users/radu/gits/sample-apps/examples/training-artifacts/101/ch2/ecommerce/ext/products.jsonl'
+
+def csv_to_jsonl(csv_file, jsonl_file):
+    with open(csv_file, mode='r', encoding='utf-8') as csvfile, open(jsonl_file, mode='w', encoding='utf-8') as jsonlfile:
+        reader = csv.DictReader(csvfile)
+        for row in reader:
+            # strip leading and trailing whitespaces from all values
+            row = {k: v.strip() for k, v in row.items()}
+
+            # Create a Vespa write command
+            write_command = {
+                "put": "id:ecommerce:product::" + row['ProductID'],
+                "fields": row
+            }
+            jsonlfile.write(json.dumps(write_command) + '\n')
+
+csv_to_jsonl(csv_file_path, jsonl_file_path)
diff --git a/examples/training-artifacts/101/ch2/ecommerce/ext/myntra_products_catalog.csv b/examples/training-artifacts/101/ch2/ecommerce/ext/myntra_products_catalog.csv
diff --git a/examples/training-artifacts/101/ch2/ecommerce/ext/products.jsonl b/examples/training-artifacts/101/ch2/ecommerce/ext/products.jsonl
diff --git a/examples/training-artifacts/101/ch2/ecommerce/schemas/product.sd b/examples/training-artifacts/101/ch2/ecommerce/schemas/product.sd
@@ -0,0 +1,21 @@
+schema product {
+    document product {
+        field field1 type string {
+            indexing: summary | attribute
+        }
+
+        field field2 type string {
+            indexing: summary | index
+        }
+    }
+
+    fieldset default {
+        fields: field1, field2
+    }
+
+    rank-profile default {
+        first-phase {
+            expression: nativeRank(field1,field2)
+        }
+    }
+}
diff --git a/examples/training-artifacts/101/ch2/ecommerce/services.xml b/examples/training-artifacts/101/ch2/ecommerce/services.xml
@@ -0,0 +1,84 @@
+<?xml version="1.0" encoding="utf-8" ?>
+<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
+<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">
+
+    <!--
+        A container cluster handles incoming requests to the application and processes those requests,
+        and their results. The processing to do and the API's to expose can be provides by Vespa
+        or by the application through Java components supplied as part of the application.
+
+        See:
+          - Reference: https://docs.vespa.ai/en/reference/services-container.html
+    -->
+    <container id="default" version="1.0">
+
+          <clients>
+            <!-- 
+              Required for mTLS to still work, the below
+              configuration is equivalent to using a single
+              security/clients.pem
+            -->
+            <client id="mtls" permissions="read,write">
+              <certificate file="security/clients.pem"/>
+            </client>
+
+            <!--
+              The below sets up a client with read permissions.
+              Note that the "myToken" token must be created in
+              the console before deploying.
+            -->
+            <client id="tokenClient" permissions="read,write">
+              <token id="myToken"/>
+            </client>
+          </clients>
+        <!--
+            <document-api> tells the container that it should accept documents for indexing. Through the
+            Document REST API you can PUT new documents, UPDATE existing documents, and DELETE documents
+            already in the cluster.
+
+            Documents sent to the Document REST API will be passed through document processors on the way
+            to the content cluster.
+
+            See:
+             - Reference: https://docs.vespa.ai/en/reference/services-container.html#document-api
+             - Operations: https://docs.vespa.ai/en/document-v1-api-guide.html
+        -->
+        <document-api/>
+
+        <!--
+            <search> tells the container to answers queries and serve results for those queries.
+            Inside the <search /> cluster you can configure chains of "searchers" -
+            Java components processing the query and/or result.
+
+            See:
+             - Reference: https://docs.vespa.ai/en/query-api.html
+             - Searchers: https://docs.vespa.ai/en/searcher-development.html
+        -->
+        <search/>
+
+        <!--
+            <nodes> specifies the nodes that should run this cluster.
+        -->
+        <nodes>
+            <node hostalias="node1" />
+        </nodes>
+    </container>
+
+    <!--
+        <content/> content clusters store application data, maintain indexes and executes the
+        distributed parts of a query.
+
+        See:
+          - Reference: https://docs.vespa.ai/en/reference/services-content.html
+    -->
+    <content id="ecommercetest" version="1.0">
+        <min-redundancy>2</min-redundancy>
+        <documents>
+            <document type="product" mode="index" />
+        </documents>
+        <nodes>
+            <node hostalias="node1" distribution-key="0" />
+        </nodes>
+    </content>
+
+</services>
diff --git a/examples/training-artifacts/101/ch2/queries.http b/examples/training-artifacts/101/ch2/queries.http
@@ -0,0 +1,10 @@
+### query
+curl -H "Content-Type:application/json" https://e01bdc0f.ee10cd00.z.vespa-app.cloud/search/ -d '{
+  "yql": "select * from product where Description contains \"premium cotton\" limit 5",
+  "presentation.summary": "short"
+}'
+
+### group by color
+curl -H "Content-Type:application/json" https://e01bdc0f.ee10cd00.z.vespa-app.cloud/search/ -d '{
+  "yql": "select * from product where true | all(group(PrimaryColor) each(output(count())))"
+}'