Skip to content

Commit

Permalink
Add namespace support and improve examples
Browse files Browse the repository at this point in the history
  • Loading branch information
mfortman11 committed Jan 30, 2024
1 parent 9ba4547 commit 6c46990
Show file tree
Hide file tree
Showing 8 changed files with 98 additions and 154 deletions.
13 changes: 11 additions & 2 deletions examples/astradb/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,27 @@ Here are two sample scripts which work well with the sample data in the Astra Po

- `ASTRA_DB_APPLICATION_TOKEN`: The generated app token for your Astra database
- `ASTRA_DB_ENDPOINT`: The API endpoint for your Astra database
- `ASTRA_DB_NAMESPACE`: (Optional) The namespace where your collection is stored defaults to `default_keyspace`
- `OPENAI_API_KEY`: Your OpenAI key

2. `cd` Into the `examples` directory
3. run `npm i`

## Load the data
## Example load and query

Loads and queries a simple vectorstore with some documents about Astra DB

run `ts-node astradb/example`

## Movie Reviews Example

### Load the data

This sample loads the same dataset of movie reviews as the Astra Portal sample dataset. (Feel free to load the data in your the Astra Data Explorer to compare)

run `ts-node astradb/load`

## Use RAG to Query the data
### Use RAG to Query the data

Check out your data in the Astra Data Explorer and change the sample query as you see fit.

Expand Down
55 changes: 55 additions & 0 deletions examples/astradb/example.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import {
AstraDBVectorStore,
Document,
storageContextFromDefaults,
VectorStoreIndex,
} from "llamaindex";

const collectionName = "test_collection";

async function main() {
try {
const docs = [
new Document({
text: "AstraDB is built on Apache Cassandra",
metadata: {
id: 123,
},
}),
new Document({
text: "AstraDB is a NoSQL DB",
metadata: {
id: 456,
},
}),
new Document({
text: "AstraDB supports vector search",
metadata: {
id: 789,
},
}),
];

const astraVS = new AstraDBVectorStore();
await astraVS.create(collectionName, {
vector: { dimension: 1536, metric: "cosine" },
});
await astraVS.connect(collectionName);

const ctx = await storageContextFromDefaults({ vectorStore: astraVS });
const index = await VectorStoreIndex.fromDocuments(docs, {
storageContext: ctx,
});

const queryEngine = index.asQueryEngine();
const response = await queryEngine.query({
query: "Describe AstraDB.",
});

console.log(response.toString());
} catch (e) {
console.error(e);
}
}

main();
4 changes: 2 additions & 2 deletions examples/astradb/load.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ const collectionName = "movie_reviews";
async function main() {
try {
const reader = new PapaCSVReader(false);
const docs = await reader.loadData("../data/movie_reviews.csv");
const docs = await reader.loadData("./data/movie_reviews.csv");

const astraVS = new AstraDBVectorStore();
const astraVS = new AstraDBVectorStore({ contentKey: "reviewtext" });
await astraVS.create(collectionName, {
vector: { dimension: 1536, metric: "cosine" },
});
Expand Down
2 changes: 1 addition & 1 deletion examples/astradb/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ const collectionName = "movie_reviews";

async function main() {
try {
const astraVS = new AstraDBVectorStore();
const astraVS = new AstraDBVectorStore({ contentKey: "reviewtext" });
await astraVS.connect(collectionName);

const ctx = serviceContextFromDefaults();
Expand Down
2 changes: 1 addition & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "examples",
"private": true,
"dependencies": {
"@datastax/astra-db-ts": "^0.1.2",
"@datastax/astra-db-ts": "^0.1.4",
"@notionhq/client": "^2.2.14",
"@pinecone-database/pinecone": "^1.1.2",
"chromadb": "^1.7.3",
Expand Down
50 changes: 23 additions & 27 deletions packages/core/src/storage/vectorStore/AstraDBVectorStore.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ export class AstraDBVectorStore implements VectorStore {

astraDBClient: AstraDB;
idKey: string;
contentKey: string | undefined; // if undefined the entirety of the node aside from the id and embedding will be stored as content
metadataKey: string;
contentKey: string;

private collection: Collection | undefined;

Expand All @@ -22,6 +21,7 @@ export class AstraDBVectorStore implements VectorStore {
params?: {
token: string;
endpoint: string;
namespace: string;
};
},
) {
Expand All @@ -40,12 +40,15 @@ export class AstraDBVectorStore implements VectorStore {
if (!endpoint) {
throw new Error("Must specify ASTRA_DB_ENDPOINT via env variable.");
}
this.astraDBClient = new AstraDB(token, endpoint);
const namespace =
init?.params?.namespace ??
process.env.ASTRA_DB_NAMESPACE ??
"default_keyspace";
this.astraDBClient = new AstraDB(token, endpoint, namespace);
}

this.idKey = init?.idKey ?? "_id";
this.contentKey = init?.contentKey;
this.metadataKey = init?.metadataKey ?? "metadata";
this.contentKey = init?.contentKey ?? "text";
}

/**
Expand Down Expand Up @@ -102,12 +105,13 @@ export class AstraDBVectorStore implements VectorStore {
if (!nodes || nodes.length === 0) {
return [];
}

const dataToInsert = nodes.map((node) => {
return {
_id: node.id_,
$vector: node.getEmbedding(),
content: node.getContent(MetadataMode.ALL),
metadata: node.metadata,
$vector: node.embedding,
[this.idKey]: node.id_,
[this.contentKey]: node.getContent(MetadataMode.NONE),
...node.metadata,
};
});

Expand All @@ -122,8 +126,7 @@ export class AstraDBVectorStore implements VectorStore {

for (const batch of batchData) {
console.debug(`Inserting batch of size ${batch.length}`);

const result = await collection.insertMany(batch);
await collection.insertMany(batch);
}

return dataToInsert.map((node) => node._id);
Expand Down Expand Up @@ -185,26 +188,19 @@ export class AstraDBVectorStore implements VectorStore {
const similarities: number[] = [];

await cursor.forEach(async (row: Record<string, any>) => {
const id = row[this.idKey];
const embedding = row.$vector;
const similarity = row.$similarity;
const metadata = row[this.metadataKey];

// Remove fields from content
delete row[this.idKey];
delete row.$similarity;
delete row.$vector;
delete row[this.metadataKey];

const content = this.contentKey
? row[this.contentKey]
: JSON.stringify(row);
const {
[this.idKey]: id,
[this.contentKey]: content,
$vector: embedding,
$similarity: similarity,
...metadata
} = row;

const node = new Document({
id_: id,
text: content,
metadata: metadata ?? {},
embedding: embedding,
metadata,
embedding,
});

ids.push(id);
Expand Down
1 change: 1 addition & 0 deletions packages/eslint-config-custom/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ module.exports = {

"ASTRA_DB_APPLICATION_TOKEN",
"ASTRA_DB_ENDPOINT",
"ASTRA_DB_NAMESPACE",

"AZURE_OPENAI_KEY",
"AZURE_OPENAI_ENDPOINT",
Expand Down
Loading

0 comments on commit 6c46990

Please sign in to comment.