Skip to content

Commit

Permalink
update async_add method to follow pinecone-client example
Browse files Browse the repository at this point in the history
  • Loading branch information
DosticJelena committed Oct 16, 2023
1 parent 084a95b commit fab147b
Showing 1 changed file with 38 additions and 18 deletions.
56 changes: 38 additions & 18 deletions llama_index/vector_stores/pinecone.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@

DEFAULT_BATCH_SIZE = 200

SEM_MAX_CONCURRENT = 10

_logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -98,6 +100,14 @@ def _to_pinecone_filter(standard_filters: MetadataFilters) -> dict:
return filters


async def async_upload(index, vectors, batch_size, semaphore):
async def send_batch(batch):
async with semaphore:
return await asyncio.to_thread(index.upsert, batch, async_req=True)

await asyncio.gather(*[send_batch(chunk) for chunk in iter_batch(vectors, size=batch_size)])


import_err_msg = (
"`pinecone` package not found, please run `pip install pinecone-client`"
)
Expand Down Expand Up @@ -224,39 +234,42 @@ def from_params(
def class_name(cls) -> str:
return "PinconeVectorStore"

def add(
self,
nodes: List[BaseNode],
) -> List[str]:
"""Add nodes to index.
Args:
nodes: List[BaseNode]: list of nodes with embeddings
"""
ids = []
def _prepare_entries_for_upsert(self, nodes: List[BaseNode]) -> List[Dict]:
entries = []
for node in nodes:
node_id = node.node_id

metadata = node_to_metadata_dict(
node, remove_text=False, flat_metadata=self.flat_metadata
)

entry = {
ID_KEY: node_id,
ID_KEY: node.node_id,
VECTOR_KEY: node.get_embedding(),
METADATA_KEY: metadata,
}
if self.add_sparse_vector and self._tokenizer is not None:

if self.add_sparse_vector:
sparse_vector = generate_sparse_vectors(
[node.get_content(metadata_mode=MetadataMode.EMBED)],
self._tokenizer,
)[0]
entry[SPARSE_VECTOR_KEY] = sparse_vector

ids.append(node_id)
entries.append(entry)

return entries

def add(
self,
nodes: List[BaseNode],
) -> List[str]:
"""Add nodes to index.
Args:
nodes: List[BaseNode]: list of nodes with embeddings
"""

entries = self._prepare_entries_for_upsert(nodes)

[
self._pinecone_index.upsert(
Expand All @@ -266,7 +279,7 @@ def add(
for batch in iter_batch(entries, self.batch_size)
]

return ids
return [entry[ID_KEY] for entry in entries]

async def async_add(
self,
Expand All @@ -280,7 +293,14 @@ async def async_add(
Returns:
List[str]: List of IDs of the added documents.
"""
return await asyncio.to_thread(self.add, nodes) # type: ignore

entries = self._prepare_entries_for_upsert(nodes)

semaphore = asyncio.Semaphore(SEM_MAX_CONCURRENT)
await async_upload(self._pinecone_index, entries, DEFAULT_BATCH_SIZE, semaphore)

return [entry[ID_KEY] for entry in entries]


def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None:
"""
Expand Down

0 comments on commit fab147b

Please sign in to comment.