Skip to content

Commit

Permalink
Merge pull request #39 from paulparkinson/main
Browse files Browse the repository at this point in the history
  • Loading branch information
paulparkinson authored Aug 16, 2024
2 parents 2eb2923 + 1f93eb1 commit 304e423
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 25 deletions.
7 changes: 2 additions & 5 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
FROM maven:3.6.3-openjdk-11 AS maven_build
FROM maven:3.8.4-openjdk-17 AS maven_build
COPY pom.xml /tmp/pom.xml
COPY env.properties /tmp/env.properties
COPY src /tmp/src
COPY lib /tmp/lib
COPY lib/oci-java-sdk-generativeai-3.25.1-preview1-20230906.204234-1.jar /tmp/lib/oci-java-sdk-generativeai-3.25.1-preview1-20230906.204234-1.jar
WORKDIR /tmp/

RUN mvn org.apache.maven.plugins:maven-install-plugin:2.5.2:install-file -Dfile=/tmp/lib/oci-java-sdk-generativeai-3.25.1-preview1-20230906.204234-1.jar
RUN mvn -f /tmp/pom.xml clean package

FROM openjdk
FROM openjdk:17
EXPOSE 8080

CMD ls /tmp
Expand Down
9 changes: 7 additions & 2 deletions python-rag-chatbot/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@
## TODO instructions

- setup ~/.oci/config
- set yourcompartmentid
- oci session authenticate and make sure authentication is successful
- use region where the OCI gen AI is available. Currently we are using chicago region.
- run pip install -r requirements.txt
- check you config_rag.py file and make sure your api endpoint belong to chicago region and db which you want to use like chroma db or oracle db
- set your compartment_id ocid inside the file i.e. init_rag_streamlit_exp.py and init_rag.py file
- Changing the db type you need to modify at config file and you see the logic inside create_vector_store
- podman run -d --name 23ai -p 1521:1521 -e ORACLE_PWD=<password> -v oracle-volume:/Users/pparkins/oradata container-registry.oracle.com/database/free:latest
- create/config vector tablespace and user
- add oracle database info for use in init_rag_streamlit.py / init_rag_streamlit_exp.py
- run run_oracle_bot.sh /run_oracle_bot_exp.sh
- run ./run_oracle_bot_exp.sh


## Documentation
Expand Down
8 changes: 2 additions & 6 deletions python-rag-chatbot/config_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,14 @@
DEBUG = False

# book to use for augmentation
# BOOK1 = "APISpec.pdf"

BOOK1 = "pdfFiles/sharding-adg-addshard-cookbook-3610618.pdf"
BOOK2 = "pdfFiles/globally-distributed-autonomous-database.pdf"
# BOOK4 = "OnBoardingGuide.pdf"
# BOOK5 = "CreateWorkFlow.pdf"
# BOOK6 = "Team Onboarding.pdf"
# BOOK7 = "workflow.pdf"
BOOK3 = "pdfFiles/oracle-database-23c.pdf"
BOOK4 = "pdfFiles/oracle-globally-distributed-database-guide.pdf"
BOOK5 = "pdfFiles/Oracle True cache.pdf"
BOOK6 = "pdfFiles/Trobleshooting.pdf"
# BOOK12 = "OsdCode.pdf"


BOOK_LIST = [BOOK1, BOOK2, BOOK3, BOOK4, BOOK5, BOOK6]

Expand Down
4 changes: 2 additions & 2 deletions python-rag-chatbot/init_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@

# private configs
CONFIG_PROFILE = "DEFAULT"
COMPARTMENT_OCID = "ocid1.compartment.oc1..yourcompartmentid"
COMPARTMENT_OCID = "ocid1.compartment.yourcomparmentid"
oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE)
COHERE_API_KEY = oci_config['security_token_file']
DEBUG = True
Expand Down Expand Up @@ -155,7 +155,7 @@ def initialize_rag_chain():

llm = OCIGenAI(
service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
compartment_id="ocid1.compartment.oc1..yourcompartmentid",
compartment_id="ocid1.compartment.oc1..yourcompartment",
model_kwargs={"max_tokens": 1000},
auth_type='SECURITY_TOKEN',
)
Expand Down
2 changes: 1 addition & 1 deletion python-rag-chatbot/init_rag_streamlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

# private configs
CONFIG_PROFILE = "DEFAULT"
COMPARTMENT_OCID = "ocid1.compartment.oc1..aaaaaaaajdyhd7dqnix2avhlckbhhkkcl3cujzyuz6jzyzonadca3i66pqjq"
COMPARTMENT_OCID = "ocid1.compartment.oc1..yourcompartment"
oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE)
COHERE_API_KEY = oci_config['key_file']
#
Expand Down
8 changes: 5 additions & 3 deletions python-rag-chatbot/init_rag_streamlit_exp.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@

# private configs
CONFIG_PROFILE = "DEFAULT"
COMPARTMENT_OCID = "ocid1.compartment.oc1..yourcompartmentid"
COMPARTMENT_OCID = "ocid1.compartment.oc1..yourcompartment"
oci_config = oci.config.from_file("~/.oci/config", CONFIG_PROFILE)
COHERE_API_KEY = oci_config['security_token_file']

Expand Down Expand Up @@ -134,6 +134,7 @@ def split_in_chunks(all_pages):
splits = post_process(splits)

print(f"Splitted the pdf in {len(splits)} chunks...")
print("Size of splits: " + str(text_splitter.__sizeof__()) + "bytes")

return splits

Expand Down Expand Up @@ -252,7 +253,7 @@ def build_llm(llm_type):
llm = OCIGenAI(
model_id="cohere.command",
service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
compartment_id="ocid1.compartment.oc1..yourcompartmentid",
compartment_id="ocid1.compartment.oc1..yourCompartment",
model_kwargs={"max_tokens": 200},
auth_type='SECURITY_TOKEN',
)
Expand Down Expand Up @@ -284,13 +285,14 @@ def initialize_rag_chain():

# 3. Load embeddings model
embedder = create_cached_embedder()

print("Size of splits---: " + str(document_splits.__sizeof__()) + "bytes")
# 4. Create a Vectore Store and store embeddings
vectorstore = create_vector_store(VECTOR_STORE_NAME, document_splits, embedder)

# 5. Create a retriever
# increased num. of docs to 5 (default to 4)
# added optionally a reranker

retriever = create_retriever(vectorstore)

# 6. Build the LLM
Expand Down
15 changes: 9 additions & 6 deletions python-rag-chatbot/oracle_bot_exp.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import streamlit as st
import time
import traceback
import sys

from init_rag_streamlit_exp import initialize_rag_chain, get_answer

from streamlit_feedback import streamlit_feedback

def process_feedback(feedback_value):
st.write("Feedback value:", feedback_value)
with open("feedback.txt", "a", encoding="utf-8") as f:
f.write(f"{feedback_value}\n")
st.write("Processing feedback value:", feedback_value) # Debugging output
try:
with open("feedback.txt", "a", encoding="utf-8") as f:
f.write(f"{feedback_value}\n")
st.write("Feedback successfully written to file.") # Debugging output
except Exception as e:
st.error(f"Error writing to file: {e}")
traceback.print_exc()

def reset_conversation():
st.session_state.messages = []
Expand Down Expand Up @@ -53,6 +55,7 @@ def reset_conversation():
if not st.session_state.feedback_rendered:
def _submit_feedback(feedback_value, *args, **kwargs):
print("Feedback submitted:", feedback_value, file=sys.stderr) # Redirect to stderr
st.write("Feedback value received for submission:", feedback_value) # Debugging output
process_feedback(feedback_value)
st.session_state.feedback_rendered = False

Expand Down

0 comments on commit 304e423

Please sign in to comment.