Skip to content

Commit

Permalink
initial commit of ch2 lab
Browse files Browse the repository at this point in the history
  • Loading branch information
radu-gheorghe committed Sep 17, 2024
1 parent 5273d70 commit fcd66dd
Show file tree
Hide file tree
Showing 7 changed files with 25,396 additions and 0 deletions.
7 changes: 7 additions & 0 deletions examples/training-artifacts/101/ch2/ecommerce/.vespaignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# This file excludes unnecessary files from the application package. See
# https://docs.vespa.ai/en/reference/vespaignore.html for more information.
.DS_Store
.gitignore
README.md
ext/
**/*.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import csv
import json

csv_file_path = '/Users/radu/gits/sample-apps/examples/training-artifacts/101/ch2/ecommerce/ext/myntra_products_catalog.csv'
jsonl_file_path = '/Users/radu/gits/sample-apps/examples/training-artifacts/101/ch2/ecommerce/ext/products.jsonl'

def csv_to_jsonl(csv_file, jsonl_file):
with open(csv_file, mode='r', encoding='utf-8') as csvfile, open(jsonl_file, mode='w', encoding='utf-8') as jsonlfile:
reader = csv.DictReader(csvfile)
for row in reader:
# strip leading and trailing whitespaces from all values
row = {k: v.strip() for k, v in row.items()}

# Create a Vespa write command
write_command = {
"put": "id:ecommerce:product::" + row['ProductID'],
"fields": row
}
jsonlfile.write(json.dumps(write_command) + '\n')

csv_to_jsonl(csv_file_path, jsonl_file_path)
12,762 changes: 12,762 additions & 0 deletions examples/training-artifacts/101/ch2/ecommerce/ext/myntra_products_catalog.csv

Large diffs are not rendered by default.

12,491 changes: 12,491 additions & 0 deletions examples/training-artifacts/101/ch2/ecommerce/ext/products.jsonl

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions examples/training-artifacts/101/ch2/ecommerce/schemas/product.sd
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
schema product {
document product {
field field1 type string {
indexing: summary | attribute
}

field field2 type string {
indexing: summary | index
}
}

fieldset default {
fields: field1, field2
}

rank-profile default {
first-phase {
expression: nativeRank(field1,field2)
}
}
}
84 changes: 84 additions & 0 deletions examples/training-artifacts/101/ch2/ecommerce/services.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?xml version="1.0" encoding="utf-8" ?>
<!-- Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -->
<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties">

<!--
A container cluster handles incoming requests to the application and processes those requests,
and their results. The processing to do and the API's to expose can be provides by Vespa
or by the application through Java components supplied as part of the application.
See:
- Reference: https://docs.vespa.ai/en/reference/services-container.html
-->
<container id="default" version="1.0">

<clients>
<!--
Required for mTLS to still work, the below
configuration is equivalent to using a single
security/clients.pem
-->
<client id="mtls" permissions="read,write">
<certificate file="security/clients.pem"/>
</client>

<!--
The below sets up a client with read permissions.
Note that the "myToken" token must be created in
the console before deploying.
-->
<client id="tokenClient" permissions="read,write">
<token id="myToken"/>
</client>
</clients>
<!--
<document-api> tells the container that it should accept documents for indexing. Through the
Document REST API you can PUT new documents, UPDATE existing documents, and DELETE documents
already in the cluster.
Documents sent to the Document REST API will be passed through document processors on the way
to the content cluster.
See:
- Reference: https://docs.vespa.ai/en/reference/services-container.html#document-api
- Operations: https://docs.vespa.ai/en/document-v1-api-guide.html
-->
<document-api/>

<!--
<search> tells the container to answers queries and serve results for those queries.
Inside the <search /> cluster you can configure chains of "searchers" -
Java components processing the query and/or result.
See:
- Reference: https://docs.vespa.ai/en/query-api.html
- Searchers: https://docs.vespa.ai/en/searcher-development.html
-->
<search/>

<!--
<nodes> specifies the nodes that should run this cluster.
-->
<nodes>
<node hostalias="node1" />
</nodes>
</container>

<!--
<content/> content clusters store application data, maintain indexes and executes the
distributed parts of a query.
See:
- Reference: https://docs.vespa.ai/en/reference/services-content.html
-->
<content id="ecommercetest" version="1.0">
<min-redundancy>2</min-redundancy>
<documents>
<document type="product" mode="index" />
</documents>
<nodes>
<node hostalias="node1" distribution-key="0" />
</nodes>
</content>

</services>
10 changes: 10 additions & 0 deletions examples/training-artifacts/101/ch2/queries.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
### query
curl -H "Content-Type:application/json" https://e01bdc0f.ee10cd00.z.vespa-app.cloud/search/ -d '{
"yql": "select * from product where Description contains \"premium cotton\" limit 5",
"presentation.summary": "short"
}'

### group by color
curl -H "Content-Type:application/json" https://e01bdc0f.ee10cd00.z.vespa-app.cloud/search/ -d '{
"yql": "select * from product where true | all(group(PrimaryColor) each(output(count())))"
}'

0 comments on commit fcd66dd

Please sign in to comment.