Skip to content

Commit

Permalink
Merge pull request #1453 from vespa-engine/radu/training
Browse files Browse the repository at this point in the history
initial Vespa 101 ch1 lab sample app
  • Loading branch information
kkraune authored Sep 17, 2024
2 parents 7ae99ab + fcd66dd commit 74e68c9
Show file tree
Hide file tree
Showing 12 changed files with 25,516 additions and 0 deletions.
4 changes: 4 additions & 0 deletions examples/training-artifacts/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
**/application.zip
**/security/
**/.idea/
**/*.bak
7 changes: 7 additions & 0 deletions examples/training-artifacts/101/ch1/ecommerce/.vespaignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# This file excludes unnecessary files from the application package. See
# https://docs.vespa.ai/en/reference/vespaignore.html for more information.
.DS_Store
.gitignore
README.md
ext/
**/*.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"put": "id:ecommerce:product::1", "fields": {"title": "phone", "price": 2.0}}
{"put": "id:ecommerce:product::2", "fields": {"title": "pen", "price": 4.2}}
{"put": "id:ecommerce:product::3", "fields": {"title": "new laptop", "price": 3.5}}

21 changes: 21 additions & 0 deletions examples/training-artifacts/101/ch1/ecommerce/schemas/product.sd
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
schema product {
document product {
field field1 type string {
indexing: summary | attribute
}

field field2 type string {
indexing: summary | index
}
}

fieldset default {
fields: field1, field2
}

rank-profile default {
first-phase {
expression: nativeRank(field1,field2)
}
}
}
84 changes: 84 additions & 0 deletions examples/training-artifacts/101/ch1/ecommerce/services.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?xml version="1.0" encoding="utf-8" ?>
<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">

<!--
A container cluster handles incoming requests to the application and processes those requests,
and their results. The processing to do and the API's to expose can be provides by Vespa
or by the application through Java components supplied as part of the application.
See:
- Reference: https://docs.vespa.ai/en/reference/services-container.html
-->
<container id="default" version="1.0">

<clients>
<!--
Required for mTLS to still work, the below
configuration is equivalent to using a single
security/clients.pem
-->
<client id="mtls" permissions="read,write">
<certificate file="security/clients.pem"/>
</client>

<!--
The below sets up a client with read permissions.
Note that the "myToken" token must be created in
the console before deploying.
-->
<client id="tokenClient" permissions="read,write">
<token id="myToken"/>
</client>
</clients>
<!--
<document-api> tells the container that it should accept documents for indexing. Through the
Document REST API you can PUT new documents, UPDATE existing documents, and DELETE documents
already in the cluster.
Documents sent to the Document REST API will be passed through document processors on the way
to the content cluster.
See:
- Reference: https://docs.vespa.ai/en/reference/services-container.html#document-api
- Operations: https://docs.vespa.ai/en/document-v1-api-guide.html
-->
<document-api/>

<!--
<search> tells the container to answers queries and serve results for those queries.
Inside the <search /> cluster you can configure chains of "searchers" -
Java components processing the query and/or result.
See:
- Reference: https://docs.vespa.ai/en/query-api.html
- Searchers: https://docs.vespa.ai/en/searcher-development.html
-->
<search/>

<!--
<nodes> specifies the nodes that should run this cluster.
-->
<nodes>
<node hostalias="node1" />
</nodes>
</container>

<!--
<content/> content clusters store application data, maintain indexes and executes the
distributed parts of a query.
See:
- Reference: https://docs.vespa.ai/en/reference/services-content.html
-->
<content id="ecommercetest" version="1.0">
<min-redundancy>2</min-redundancy>
<documents>
<document type="product" mode="index" />
</documents>
<nodes>
<node hostalias="node1" distribution-key="0" />
</nodes>
</content>

</services>
7 changes: 7 additions & 0 deletions examples/training-artifacts/101/ch2/ecommerce/.vespaignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# This file excludes unnecessary files from the application package. See
# https://docs.vespa.ai/en/reference/vespaignore.html for more information.
.DS_Store
.gitignore
README.md
ext/
**/*.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import csv
import json

csv_file_path = '/Users/radu/gits/sample-apps/examples/training-artifacts/101/ch2/ecommerce/ext/myntra_products_catalog.csv'
jsonl_file_path = '/Users/radu/gits/sample-apps/examples/training-artifacts/101/ch2/ecommerce/ext/products.jsonl'

def csv_to_jsonl(csv_file, jsonl_file):
with open(csv_file, mode='r', encoding='utf-8') as csvfile, open(jsonl_file, mode='w', encoding='utf-8') as jsonlfile:
reader = csv.DictReader(csvfile)
for row in reader:
# strip leading and trailing whitespaces from all values
row = {k: v.strip() for k, v in row.items()}

# Create a Vespa write command
write_command = {
"put": "id:ecommerce:product::" + row['ProductID'],
"fields": row
}
jsonlfile.write(json.dumps(write_command) + '\n')

csv_to_jsonl(csv_file_path, jsonl_file_path)
Loading

0 comments on commit 74e68c9

Please sign in to comment.