Skip to content

Commit

Permalink
Enable database reading in recursive notion crawl
Browse files Browse the repository at this point in the history
  • Loading branch information
Weves committed Nov 3, 2023
1 parent 174f544 commit 68b23b6
Showing 1 changed file with 49 additions and 0 deletions.
49 changes: 49 additions & 0 deletions backend/danswer/connectors/notion/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,50 @@ def _fetch_page(self, page_id: str) -> NotionPage:
raise e
return NotionPage(**res.json())

@retry(tries=3, delay=1, backoff=2)
def _fetch_database(
self, database_id: str, cursor: str | None = None
) -> dict[str, Any]:
"""Fetch a database from it's ID via the Notion API."""
logger.debug(f"Fetching database for ID '{database_id}'")
block_url = f"https://api.notion.com/v1/databases/{database_id}/query"
body = None if not cursor else {"start_cursor": cursor}
res = requests.post(block_url, headers=self.headers, json=body)
try:
res.raise_for_status()
except Exception as e:
logger.exception(f"Error fetching database - {res.json()}")
raise e
return res.json()

def _read_pages_from_database(self, database_id: str) -> list[str]:
"""Returns a list of all page IDs in the database"""
result_pages: list[str] = []
cursor = None
while True:
data = self._fetch_database(database_id, cursor)

for result in data["results"]:
obj_id = result["id"]
obj_type = result["object"]
if obj_type == "page":
logger.debug(
f"Found page with ID '{obj_id}' in database '{database_id}'"
)
result_pages.append(result["id"])
elif obj_type == "database":
logger.debug(
f"Found database with ID '{obj_id}' in database '{database_id}'"
)
result_pages.extend(self._read_pages_from_database(obj_id))

if data["next_cursor"] is None:
break

cursor = data["next_cursor"]

return result_pages

def _read_blocks(
self, page_block_id: str
) -> tuple[list[tuple[str, str]], list[str]]:
Expand Down Expand Up @@ -145,12 +189,17 @@ def _read_blocks(
if result_type == "child_page":
child_pages.append(result_block_id)
else:
logger.debug(f"Entering sub-block: {result_block_id}")
subblock_result_lines, subblock_child_pages = self._read_blocks(
result_block_id
)
logger.debug(f"Finished sub-block: {result_block_id}")
result_lines.extend(subblock_result_lines)
child_pages.extend(subblock_child_pages)

if result_type == "child_database" and self.recursive_index_enabled:
child_pages.extend(self._read_pages_from_database(result_block_id))

cur_result_text = "\n".join(cur_result_text_arr)
if cur_result_text:
result_lines.append((cur_result_text, result_block_id))
Expand Down

1 comment on commit 68b23b6

@vercel
Copy link

@vercel vercel bot commented on 68b23b6 Nov 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.