From 8b58c13c22b86a1105eee3dd7af6b15f6645394b Mon Sep 17 00:00:00 2001 From: Victor Martin Date: Wed, 26 Jun 2024 17:56:17 +0200 Subject: [PATCH 1/2] improve Java service and fix issue with IAM and OKE workload for GenAI Service --- K8S.md | 27 +++-- backend/build.gradle | 2 +- .../config/ClientConfigurationBean.java | 19 ---- .../config/GenerativeAiClientConfig.java | 93 ---------------- .../GenerativeAiInferenceClientConfig.java | 92 ---------------- .../backend/controller/GenAIController.java | 10 +- .../controller/PDFConvertorController.java | 26 +++-- .../backend/controller/PromptController.java | 13 +-- .../backend/controller/SummaryController.java | 33 ------ .../service/GenerativeAiClientService.java | 100 ++++++++++++++++++ .../GenerativeAiInferenceClientService.java | 98 +++++++++++++++++ .../backend/service/OCIGenAIService.java | 15 ++- backend/src/main/resources/application.yaml | 1 - deploy/k8s/backend/application.yaml.mustache | 3 +- deploy/k8s/backend/backend.yaml | 2 +- deploy/k8s/backend/kustomization.yaml | 5 +- .../{genai-sa.yaml => service-account.yaml} | 2 +- deploy/k8s/ingress/ingress.yaml | 7 ++ deploy/k8s/ingress/kustomization.yaml | 3 +- deploy/k8s/web/kustomization.yaml | 3 +- deploy/terraform/iam.tf | 15 ++- deploy/terraform/oke.tf | 2 +- scripts/kustom.mjs | 27 ++++- scripts/package.json | 2 +- web/Dockerfile | 6 +- web/nginx/default.conf | 14 +++ web/src/Summary.jsx | 23 +++- 27 files changed, 353 insertions(+), 290 deletions(-) delete mode 100644 backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/ClientConfigurationBean.java delete mode 100644 backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiClientConfig.java delete mode 100644 backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiInferenceClientConfig.java delete mode 100644 backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/SummaryController.java create mode 100644 backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiClientService.java create mode 100644 backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiInferenceClientService.java rename deploy/k8s/backend/{genai-sa.yaml => service-account.yaml} (62%) create mode 100644 web/nginx/default.conf diff --git a/K8S.md b/K8S.md index ec9f6803..910fb93e 100644 --- a/K8S.md +++ b/K8S.md @@ -115,7 +115,7 @@ npx zx scripts/kustom.mjs ### Kubernetes Deployment ```bash -export KUBECONFIG="deploy/terraform/generated/kubeconfig" +export KUBECONFIG="$(pwd)/deploy/terraform/generated/kubeconfig" ``` ```bash @@ -129,28 +129,39 @@ kubectl apply -k deploy/k8s/overlays/prod Run `get deploy` a few times: ```bash -kubectl get deploy +kubectl get deploy -n backend ``` Wait for all deployments to be `Ready` and `Available`. ``` -NAME READY UP-TO-DATE AVAILABLE AGE -backend 1/1 1 1 3m28s -web 1/1 1 1 3m21s +NAME READY UP-TO-DATE AVAILABLE AGE +backend 1/1 1 1 3m28s +ingress-nginx-controller 1/1 1 1 3m17s +web 1/1 1 1 3m21s ``` Access your application: ```bash echo $(kubectl get service \ - -n ingress-nginx \ + -n backend \ -o jsonpath='{.items[?(@.spec.type=="LoadBalancer")].status.loadBalancer.ingress[0].ip}') ``` -> This command will list the services on the `ingress-nginx` namespace and filter for the Load Balancer. If the response is an empty string, wait a bit and execute the command again. The Load Balancer takes a bit of time to create the Public IP address. +> This command will list the Load Balancer services on the `backend` namespace. If the response is an empty string, wait a bit and execute the command again. The Load Balancer takes a bit of time to create the Public IP address. -Take the Public IP to your browser. +Paste the Public IP address on your browser and test your new Generative AI website deployed in Kubernetes. + +Remember to visit SQL Developer Web on the OCI Console for your Oracle Database and run some queries to investigate the historical of prompts. + +```sql +SELECT * FROM interactions; +``` + +```bash +cd ../.. +``` ## Clean up diff --git a/backend/build.gradle b/backend/build.gradle index 9deb513a..7be7e3b7 100644 --- a/backend/build.gradle +++ b/backend/build.gradle @@ -5,7 +5,7 @@ plugins { } group = 'dev.victormartin.oci.genai.backend' -version = '0.0.3' +version = '0.0.5' java { sourceCompatibility = '17' diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/ClientConfigurationBean.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/ClientConfigurationBean.java deleted file mode 100644 index 1bcc9425..00000000 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/ClientConfigurationBean.java +++ /dev/null @@ -1,19 +0,0 @@ -package dev.victormartin.oci.genai.backend.backend.config; - -import com.oracle.bmc.ClientConfiguration; -import com.oracle.bmc.retrier.RetryConfiguration; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - -@Configuration -public class ClientConfigurationBean { - @Bean - public ClientConfiguration clientConfiguration() { - ClientConfiguration clientConfiguration = - ClientConfiguration.builder() - .readTimeoutMillis(240000) - .retryConfiguration(RetryConfiguration.NO_RETRY_CONFIGURATION) - .build(); - return clientConfiguration; - } -} diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiClientConfig.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiClientConfig.java deleted file mode 100644 index 5ff82466..00000000 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiClientConfig.java +++ /dev/null @@ -1,93 +0,0 @@ -package dev.victormartin.oci.genai.backend.backend.config; - -import com.oracle.bmc.ClientConfiguration; -import com.oracle.bmc.ConfigFileReader; -import com.oracle.bmc.Region; -import com.oracle.bmc.auth.AuthenticationDetailsProvider; -import com.oracle.bmc.auth.ConfigFileAuthenticationDetailsProvider; -import com.oracle.bmc.auth.okeworkloadidentity.OkeWorkloadIdentityAuthenticationDetailsProvider; -import com.oracle.bmc.generativeai.GenerativeAiClient; -import jakarta.annotation.PostConstruct; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.core.env.Environment; - -import java.io.IOException; - -@Configuration -public class GenerativeAiClientConfig { - - Logger logger = LoggerFactory.getLogger(GenerativeAiClientConfig.class); - - @Autowired - private Environment environment; - - @Autowired - ClientConfiguration clientConfiguration; - - @Value("${genai.endpoint}") - private String ENDPOINT; - @Value("${genai.region}") - private String regionCode; - @Value("${genai.config.location}") - private String CONFIG_LOCATION; - @Value("${genai.config.profile}") - private String CONFIG_PROFILE; - - @Value("${genai.chat_model_id}") - private String chatModelId; - - @Value("${genai.summarization_model_id}") - private String summarizationModelId; - - private Region region; - - @PostConstruct - private void postConstruct() { - logger.info("Region Code: " + regionCode); - region = Region.fromRegionCode(regionCode); - } - - @Bean - GenerativeAiClient genAiClient() throws IOException { - String[] activeProfiles = environment.getActiveProfiles(); - String profile = activeProfiles[0]; - if (profile.equals("production")) { - return instancePrincipalConfig(); - } else { - return localConfig(); - } - } - - GenerativeAiClient instancePrincipalConfig() throws IOException { - final OkeWorkloadIdentityAuthenticationDetailsProvider okeProvider = new OkeWorkloadIdentityAuthenticationDetailsProvider.OkeWorkloadIdentityAuthenticationDetailsProviderBuilder() - .build(); - // final InstancePrincipalsAuthenticationDetailsProvider provider = - // new - // InstancePrincipalsAuthenticationDetailsProvider.InstancePrincipalsAuthenticationDetailsProviderBuilder().build(); - - GenerativeAiClient generativeAiClient = new GenerativeAiClient(okeProvider, clientConfiguration); - generativeAiClient.setRegion(okeProvider.getRegion()); - generativeAiClient.setEndpoint(ENDPOINT); - return generativeAiClient; - } - - GenerativeAiClient localConfig() throws IOException { - // Configuring the AuthenticationDetailsProvider. It's assuming there is a - // default OCI config file - // "~/.oci/config", and a profile in that config with the name defined in - // CONFIG_PROFILE variable. - final ConfigFileReader.ConfigFile configFile = ConfigFileReader.parse(CONFIG_LOCATION, CONFIG_PROFILE); - final AuthenticationDetailsProvider provider = new ConfigFileAuthenticationDetailsProvider(configFile); - - GenerativeAiClient generativeAiClient = new GenerativeAiClient(provider, - clientConfiguration); - generativeAiClient.setEndpoint(ENDPOINT); - generativeAiClient.setRegion(region); - return generativeAiClient; - } -} diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiInferenceClientConfig.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiInferenceClientConfig.java deleted file mode 100644 index 56a513ef..00000000 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/config/GenerativeAiInferenceClientConfig.java +++ /dev/null @@ -1,92 +0,0 @@ -package dev.victormartin.oci.genai.backend.backend.config; - -import com.oracle.bmc.ClientConfiguration; -import com.oracle.bmc.ConfigFileReader; -import com.oracle.bmc.Region; -import com.oracle.bmc.auth.AuthenticationDetailsProvider; -import com.oracle.bmc.auth.ConfigFileAuthenticationDetailsProvider; -import com.oracle.bmc.auth.InstancePrincipalsAuthenticationDetailsProvider; -import com.oracle.bmc.generativeaiinference.GenerativeAiInferenceClient; -import jakarta.annotation.PostConstruct; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; -import org.springframework.core.env.Environment; - -import java.io.IOException; - -@Configuration -public class GenerativeAiInferenceClientConfig { - - Logger logger = LoggerFactory.getLogger(GenerativeAiInferenceClientConfig.class); - - @Autowired - private Environment environment; - - @Autowired - ClientConfiguration clientConfiguration; - - @Value("${genai.endpoint}") - private String ENDPOINT; - @Value("${genai.region}") - private String regionCode; - @Value("${genai.config.location}") - private String CONFIG_LOCATION; - @Value("${genai.config.profile}") - private String CONFIG_PROFILE; - - @Value("${genai.chat_model_id}") - private String modelChatId; - - @Value("${genai.summarization_model_id}") - private String modelSummarizationId; - - private Region region; - - @PostConstruct - private void postConstruct() { - logger.info("Region Code: " + regionCode); - region = Region.fromRegionCode(regionCode); - } - - @Bean - GenerativeAiInferenceClient genAiInferenceClient() throws IOException { - String[] activeProfiles = environment.getActiveProfiles(); - String profile = activeProfiles[0]; - logger.info("Profile: " + profile); - if (profile.equals("production")) { - return instancePrincipalConfig(); - } else { - return localConfig(); - } - } - - GenerativeAiInferenceClient instancePrincipalConfig() throws IOException { - final InstancePrincipalsAuthenticationDetailsProvider provider = new InstancePrincipalsAuthenticationDetailsProvider.InstancePrincipalsAuthenticationDetailsProviderBuilder() - .build(); - - GenerativeAiInferenceClient generativeAiInferenceClient = new GenerativeAiInferenceClient(provider, - clientConfiguration); - generativeAiInferenceClient.setEndpoint(ENDPOINT); - generativeAiInferenceClient.setRegion(provider.getRegion()); - return generativeAiInferenceClient; - } - - GenerativeAiInferenceClient localConfig() throws IOException { - // Configuring the AuthenticationDetailsProvider. It's assuming there is a - // default OCI config file - // "~/.oci/config", and a profile in that config with the name defined in - // CONFIG_PROFILE variable. - final ConfigFileReader.ConfigFile configFile = ConfigFileReader.parse(CONFIG_LOCATION, CONFIG_PROFILE); - final AuthenticationDetailsProvider provider = new ConfigFileAuthenticationDetailsProvider(configFile); - - GenerativeAiInferenceClient generativeAiInferenceClient = new GenerativeAiInferenceClient(provider, - clientConfiguration); - generativeAiInferenceClient.setEndpoint(ENDPOINT); - generativeAiInferenceClient.setRegion(region); - return generativeAiInferenceClient; - } -} diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/GenAIController.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/GenAIController.java index bd8d1d98..9564e33c 100644 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/GenAIController.java +++ b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/GenAIController.java @@ -5,6 +5,7 @@ import com.oracle.bmc.generativeai.requests.ListModelsRequest; import com.oracle.bmc.generativeai.responses.ListModelsResponse; import dev.victormartin.oci.genai.backend.backend.dao.GenAiModel; +import dev.victormartin.oci.genai.backend.backend.service.GenerativeAiClientService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; @@ -23,17 +24,14 @@ public class GenAIController { private String COMPARTMENT_ID; @Autowired - private GenerativeAiClient generativeAiClient; - - // repository - - // constructor + private GenerativeAiClientService generativeAiClientService; @GetMapping("/api/genai/models") public List getModels() { logger.info("getModels()"); ListModelsRequest listModelsRequest = ListModelsRequest.builder().compartmentId(COMPARTMENT_ID).build(); - ListModelsResponse response = generativeAiClient.listModels(listModelsRequest); + GenerativeAiClient client = generativeAiClientService.getClient(); + ListModelsResponse response = client.listModels(listModelsRequest); return response.getModelCollection().getItems().stream().map(m -> { List capabilities = m.getCapabilities().stream().map(ModelCapability::getValue).collect(Collectors.toList()); GenAiModel model = new GenAiModel(m.getId(),m.getDisplayName(), m.getVendor(), m.getVersion(), diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PDFConvertorController.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PDFConvertorController.java index 5af08852..86d480f8 100644 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PDFConvertorController.java +++ b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PDFConvertorController.java @@ -1,6 +1,8 @@ package dev.victormartin.oci.genai.backend.backend.controller; +import com.oracle.bmc.model.BmcException; +import dev.victormartin.oci.genai.backend.backend.dao.Answer; import dev.victormartin.oci.genai.backend.backend.service.OCIGenAIService; import dev.victormartin.oci.genai.backend.backend.service.PDFConvertorService; import org.slf4j.Logger; @@ -32,11 +34,8 @@ public class PDFConvertorController { @Autowired PDFConvertorService pdfConvertorService; - @Autowired - SummaryController summaryController; - @PostMapping("/api/upload") - public String fileUploading(@RequestParam("file") MultipartFile multipartFile) { + public Answer fileUploading(@RequestParam("file") MultipartFile multipartFile) { String filename = StringUtils.cleanPath(multipartFile.getOriginalFilename()); log.info("File uploaded {} {} bytes ({})", filename, multipartFile.getSize(), multipartFile.getContentType()); try { @@ -53,14 +52,27 @@ public String fileUploading(@RequestParam("file") MultipartFile multipartFile) { String convertedText = pdfConvertorService.convert(file.getAbsolutePath()); String summaryText = ociGenAIService.summaryText(convertedText, summarizationModelId); log.info("Summary text: {}(...)", summaryText.substring(0, 40)); - summaryController.handleSummary(summaryText); - return summaryText; + Answer answer = new Answer(summaryText, ""); + return answer; } catch (MaxUploadSizeExceededException maxUploadSizeExceededException) { log.error(maxUploadSizeExceededException.getMessage()); throw new RuntimeException(maxUploadSizeExceededException); + } catch (BmcException exception) { + log.error("Message: {}", exception.getMessage()); + log.error("Original Message: {}", exception.getOriginalMessage()); + log.error("Unmodified Message: {}", exception.getUnmodifiedMessage()); + log.error("Service Details: {}", exception.getServiceDetails()); + log.error("Status Code: {}", exception.getStatusCode()); + String unmodifiedMessage = exception.getUnmodifiedMessage(); + int statusCode = exception.getStatusCode(); + String errorMessage = statusCode + " " + unmodifiedMessage; + log.error(errorMessage); + Answer answer = new Answer("", errorMessage); + return answer; } catch (Exception e) { log.error(e.getMessage()); - throw new RuntimeException(e); + Answer answer = new Answer("", e.getMessage()); + return answer; } } } \ No newline at end of file diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PromptController.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PromptController.java index 6eda8fe4..6f6fb90a 100644 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PromptController.java +++ b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/PromptController.java @@ -26,9 +26,6 @@ public class PromptController { @Value("${genai.chat_model_id}") private String hardcodedChatModelId; - @Value("${genai.summarization_model_id}") - private String hardcodedSummarizationModelId; - @Autowired private final InteractionRepository interactionRepository; @@ -53,19 +50,23 @@ public Answer handlePrompt(Prompt prompt) { interaction.setRequest(promptEscaped); Interaction saved = interactionRepository.save(interaction); try { - if (prompt.content() == null || prompt.content().length() < 1) { + if (prompt.content().isEmpty()) { throw new InvalidPromptRequest(); } // if (prompt.modelId() == null || // !prompt.modelId().startsWith("ocid1.generativeaimodel.")) { throw new // InvalidPromptRequest(); } - String responseFromGenAI = genAI.request(promptEscaped, hardcodedChatModelId); + String responseFromGenAI = genAI.resolvePrompt(promptEscaped, hardcodedChatModelId); saved.setDatetimeResponse(new Date()); saved.setResponse(responseFromGenAI); interactionRepository.save(saved); return new Answer(responseFromGenAI, ""); } catch (BmcException exception) { - logger.error(exception.getOriginalMessage()); + logger.error("Message: {}", exception.getMessage()); + logger.error("Original Message: {}", exception.getOriginalMessage()); + logger.error("Unmodified Message: {}", exception.getUnmodifiedMessage()); + logger.error("Service Details: {}", exception.getServiceDetails()); + logger.error("Status Code: {}", exception.getStatusCode()); String unmodifiedMessage = exception.getUnmodifiedMessage(); int statusCode = exception.getStatusCode(); String errorMessage = statusCode + " " + unmodifiedMessage; diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/SummaryController.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/SummaryController.java deleted file mode 100644 index 8c49ed5f..00000000 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/controller/SummaryController.java +++ /dev/null @@ -1,33 +0,0 @@ -package dev.victormartin.oci.genai.backend.backend.controller; - -import com.oracle.bmc.model.BmcException; -import dev.victormartin.oci.genai.backend.backend.InvalidPromptRequest; -import dev.victormartin.oci.genai.backend.backend.dao.Answer; -import dev.victormartin.oci.genai.backend.backend.dao.Prompt; -import dev.victormartin.oci.genai.backend.backend.data.Interaction; -import dev.victormartin.oci.genai.backend.backend.data.InteractionRepository; -import dev.victormartin.oci.genai.backend.backend.service.OCIGenAIService; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.http.HttpStatus; -import org.springframework.messaging.handler.annotation.MessageMapping; -import org.springframework.messaging.simp.annotation.SendToUser; -import org.springframework.messaging.simp.annotation.SubscribeMapping; -import org.springframework.stereotype.Controller; -import org.springframework.web.util.HtmlUtils; - -import java.util.Date; - -@Controller -public class SummaryController { - Logger logger = LoggerFactory.getLogger(SummaryController.class); - - @SendToUser("/queue/summary") - public Answer handleSummary(String summary) { - logger.info("handleSummary"); - return new Answer(summary , ""); - } - -} diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiClientService.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiClientService.java new file mode 100644 index 00000000..fe9bb00e --- /dev/null +++ b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiClientService.java @@ -0,0 +1,100 @@ +package dev.victormartin.oci.genai.backend.backend.service; + +import com.oracle.bmc.ConfigFileReader; +import com.oracle.bmc.Region; +import com.oracle.bmc.auth.AuthenticationDetailsProvider; +import com.oracle.bmc.auth.ConfigFileAuthenticationDetailsProvider; +import com.oracle.bmc.auth.InstancePrincipalsAuthenticationDetailsProvider; +import com.oracle.bmc.auth.okeworkloadidentity.OkeWorkloadIdentityAuthenticationDetailsProvider; +import com.oracle.bmc.generativeai.GenerativeAiClient; +import jakarta.annotation.PostConstruct; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.env.Environment; +import org.springframework.stereotype.Service; + +import java.io.IOException; + +@Service +public class GenerativeAiClientService { + + Logger log = LoggerFactory.getLogger(GenerativeAiClientService.class); + + @Autowired + private Environment environment; + + private GenerativeAiClient client; + + @Value("${genai.region}") + private String regionCode; + @Value("${genai.config.location}") + private String CONFIG_LOCATION; + @Value("${genai.config.profile}") + private String CONFIG_PROFILE; + + @PostConstruct + private void postConstruct() { + String[] activeProfiles = environment.getActiveProfiles(); + String profile = activeProfiles[0]; + switch (profile) { + case "oke": + okeGenAiClient(); + break; + case "compute": + instancePrincipalClient(); + break; + default: + localClient(); + break; + } + } + + private void okeGenAiClient() { + final OkeWorkloadIdentityAuthenticationDetailsProvider provider = new OkeWorkloadIdentityAuthenticationDetailsProvider + .OkeWorkloadIdentityAuthenticationDetailsProviderBuilder() + .build(); + GenerativeAiClient okeClient = GenerativeAiClient.builder() + .region(Region.fromRegionCode(regionCode)) + .build(provider); + setClient(okeClient); + } + + + private void instancePrincipalClient() { + final InstancePrincipalsAuthenticationDetailsProvider provider = new InstancePrincipalsAuthenticationDetailsProvider + .InstancePrincipalsAuthenticationDetailsProviderBuilder() + .build(); + + GenerativeAiClient instancePrinciplaClient = GenerativeAiClient.builder() + .region(Region.fromRegionCode(regionCode)) + .build(provider); + setClient(instancePrinciplaClient); + } + + private void localClient() { + final ConfigFileReader.ConfigFile configFile; + try { + configFile = ConfigFileReader.parse(CONFIG_LOCATION, CONFIG_PROFILE); + } catch (IOException e) { + log.error("Failed to load config file at {}", CONFIG_LOCATION); + log.error(e.getMessage()); + throw new RuntimeException(e); + } + final AuthenticationDetailsProvider provider = new ConfigFileAuthenticationDetailsProvider(configFile); + + GenerativeAiClient localClient = GenerativeAiClient.builder() + .region(Region.fromRegionCode(regionCode)) + .build(provider); + setClient(localClient); + } + + public GenerativeAiClient getClient() { + return client; + } + + public void setClient(GenerativeAiClient client) { + this.client = client; + } +} diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiInferenceClientService.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiInferenceClientService.java new file mode 100644 index 00000000..8df936c6 --- /dev/null +++ b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/GenerativeAiInferenceClientService.java @@ -0,0 +1,98 @@ +package dev.victormartin.oci.genai.backend.backend.service; + +import com.oracle.bmc.ConfigFileReader; +import com.oracle.bmc.Region; +import com.oracle.bmc.auth.AuthenticationDetailsProvider; +import com.oracle.bmc.auth.ConfigFileAuthenticationDetailsProvider; +import com.oracle.bmc.auth.InstancePrincipalsAuthenticationDetailsProvider; +import com.oracle.bmc.auth.okeworkloadidentity.OkeWorkloadIdentityAuthenticationDetailsProvider; +import com.oracle.bmc.generativeai.GenerativeAiClient; +import com.oracle.bmc.generativeaiinference.GenerativeAiInferenceClient; +import jakarta.annotation.PostConstruct; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.env.Environment; +import org.springframework.stereotype.Service; + +import java.io.IOException; + +@Service +public class GenerativeAiInferenceClientService { + + Logger log = LoggerFactory.getLogger(GenerativeAiInferenceClientService.class); + + private GenerativeAiInferenceClient client; + + @Autowired + private Environment environment; + + @Value("${genai.region}") + private String regionCode; + @Value("${genai.config.location}") + private String CONFIG_LOCATION; + @Value("${genai.config.profile}") + private String CONFIG_PROFILE; + + @PostConstruct + private void postConstruct() { + String[] activeProfiles = environment.getActiveProfiles(); + String profile = activeProfiles[0]; + log.info("Profile: {}", profile); + switch (profile) { + case "oke": + okeGenAiClient(); + break; + case "compute": + instancePrincipalClient(); + break; + default: + localConfig(); + break; + } + } + + private void okeGenAiClient() { + final OkeWorkloadIdentityAuthenticationDetailsProvider provider = new OkeWorkloadIdentityAuthenticationDetailsProvider + .OkeWorkloadIdentityAuthenticationDetailsProviderBuilder() + .build(); + GenerativeAiInferenceClient okeClient = GenerativeAiInferenceClient.builder() + .region(Region.fromRegionCode(regionCode)) + .build(provider); + setClient(okeClient); + } + + private void instancePrincipalClient() { + final InstancePrincipalsAuthenticationDetailsProvider provider = new InstancePrincipalsAuthenticationDetailsProvider.InstancePrincipalsAuthenticationDetailsProviderBuilder() + .build(); + + GenerativeAiInferenceClient okeClient = GenerativeAiInferenceClient.builder() + .region(Region.fromRegionCode(regionCode)) + .build(provider); + setClient(okeClient); + } + + private void localConfig() { + final ConfigFileReader.ConfigFile configFile; + try { + configFile = ConfigFileReader.parse(CONFIG_LOCATION, CONFIG_PROFILE); + } catch (IOException e) { + throw new RuntimeException(e); + } + final AuthenticationDetailsProvider provider = new ConfigFileAuthenticationDetailsProvider(configFile); + + GenerativeAiInferenceClient okeClient = GenerativeAiInferenceClient.builder() + .region(Region.fromRegionCode(regionCode)) + .build(provider); + setClient(okeClient); + } + + public GenerativeAiInferenceClient getClient() { + return client; + } + + public void setClient(GenerativeAiInferenceClient client) { + this.client = client; + } +} diff --git a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/OCIGenAIService.java b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/OCIGenAIService.java index ca4a5356..6da6f0aa 100644 --- a/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/OCIGenAIService.java +++ b/backend/src/main/java/dev/victormartin/oci/genai/backend/backend/service/OCIGenAIService.java @@ -18,9 +18,9 @@ public class OCIGenAIService { private String COMPARTMENT_ID; @Autowired - private GenerativeAiInferenceClient generativeAiInferenceClient; + private GenerativeAiInferenceClientService generativeAiInferenceClientService; - public String request(String input, String modelId) { + public String resolvePrompt(String input, String modelId) { // Build generate text request, send, and get response CohereLlmInferenceRequest llmInferenceRequest = CohereLlmInferenceRequest.builder() @@ -41,10 +41,14 @@ public String request(String input, String modelId) { GenerateTextRequest generateTextRequest = GenerateTextRequest.builder() .generateTextDetails(generateTextDetails) .build(); - GenerateTextResponse generateTextResponse = generativeAiInferenceClient.generateText(generateTextRequest); + GenerativeAiInferenceClient client = generativeAiInferenceClientService.getClient(); + GenerateTextResponse generateTextResponse = client.generateText(generateTextRequest); CohereLlmInferenceResponse response = (CohereLlmInferenceResponse) generateTextResponse.getGenerateTextResult().getInferenceResponse(); - String responseTexts = response.getGeneratedTexts().stream().map(t -> t.getText()).collect(Collectors.joining(",")); + String responseTexts = response.getGeneratedTexts() + .stream() + .map(t -> t.getText()) + .collect(Collectors.joining(",")); return responseTexts; } @@ -57,7 +61,8 @@ public String summaryText(String input, String modelId) { SummarizeTextRequest request = SummarizeTextRequest.builder() .summarizeTextDetails(summarizeTextDetails) .build(); - SummarizeTextResponse summarizeTextResponse = generativeAiInferenceClient.summarizeText(request); + GenerativeAiInferenceClient client = generativeAiInferenceClientService.getClient(); + SummarizeTextResponse summarizeTextResponse = client.summarizeText(request); String summaryText = summarizeTextResponse.getSummarizeTextResult().getSummary(); return summaryText; } diff --git a/backend/src/main/resources/application.yaml b/backend/src/main/resources/application.yaml index 395a64e1..5f004124 100644 --- a/backend/src/main/resources/application.yaml +++ b/backend/src/main/resources/application.yaml @@ -24,7 +24,6 @@ oracle: fanEnabled: true genai: - endpoint: "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com" region: "US_CHICAGO_1" config: location: "~/.oci/config" diff --git a/deploy/k8s/backend/application.yaml.mustache b/deploy/k8s/backend/application.yaml.mustache index 3b1de91f..e0e21e86 100644 --- a/deploy/k8s/backend/application.yaml.mustache +++ b/deploy/k8s/backend/application.yaml.mustache @@ -2,7 +2,7 @@ spring: main: banner-mode: "off" profiles: - active: production + active: oke datasource: driver-class-name: oracle.jdbc.OracleDriver url: jdbc:oracle:thin:@{{{db_service}}}_high?TNS_ADMIN={{{path_to_wallet}}} @@ -24,7 +24,6 @@ oracle: fanEnabled: true genai: - endpoint: "https://inference.generativeai.{{{region_name}}}.oci.oraclecloud.com" region: "{{{region_name}}}" compartment_id: "{{{compartment_ocid}}}" chat_model_id: "{{{genai_model_chat_ocid}}}" diff --git a/deploy/k8s/backend/backend.yaml b/deploy/k8s/backend/backend.yaml index b2a3e4cd..c8f36484 100644 --- a/deploy/k8s/backend/backend.yaml +++ b/deploy/k8s/backend/backend.yaml @@ -20,7 +20,7 @@ spec: labels: app: backend spec: - serviceAccountName: genai-sa + serviceAccountName: oci-service-account automountServiceAccountToken: true initContainers: - name: unzip diff --git a/deploy/k8s/backend/kustomization.yaml b/deploy/k8s/backend/kustomization.yaml index 2e18e01e..4af50e44 100644 --- a/deploy/k8s/backend/kustomization.yaml +++ b/deploy/k8s/backend/kustomization.yaml @@ -1,5 +1,5 @@ resources: - - genai-sa.yaml + - service-account.yaml - backend.yaml - backend-svc.yaml configMapGenerator: @@ -8,4 +8,5 @@ configMapGenerator: - application.yaml - name: wallet-zip files: - - wallet/wallet.zip \ No newline at end of file + - wallet/wallet.zip +namespace: backend \ No newline at end of file diff --git a/deploy/k8s/backend/genai-sa.yaml b/deploy/k8s/backend/service-account.yaml similarity index 62% rename from deploy/k8s/backend/genai-sa.yaml rename to deploy/k8s/backend/service-account.yaml index bcf1f435..03f9ef59 100644 --- a/deploy/k8s/backend/genai-sa.yaml +++ b/deploy/k8s/backend/service-account.yaml @@ -1,4 +1,4 @@ apiVersion: v1 kind: ServiceAccount metadata: - name: genai-sa + name: oci-service-account diff --git a/deploy/k8s/ingress/ingress.yaml b/deploy/k8s/ingress/ingress.yaml index 2e846edc..27faecd5 100644 --- a/deploy/k8s/ingress/ingress.yaml +++ b/deploy/k8s/ingress/ingress.yaml @@ -23,6 +23,13 @@ spec: name: backend port: number: 8080 + - path: /summary + pathType: Prefix + backend: + service: + name: web + port: + number: 80 - path: / pathType: Prefix backend: diff --git a/deploy/k8s/ingress/kustomization.yaml b/deploy/k8s/ingress/kustomization.yaml index 91171f17..6196e176 100644 --- a/deploy/k8s/ingress/kustomization.yaml +++ b/deploy/k8s/ingress/kustomization.yaml @@ -8,4 +8,5 @@ secretGenerator: files: - .certs/tls.crt - .certs/tls.key - type: "kubernetes.io/tls" \ No newline at end of file + type: "kubernetes.io/tls" +namespace: backend \ No newline at end of file diff --git a/deploy/k8s/web/kustomization.yaml b/deploy/k8s/web/kustomization.yaml index 42d4e11e..66829d63 100644 --- a/deploy/k8s/web/kustomization.yaml +++ b/deploy/k8s/web/kustomization.yaml @@ -1,3 +1,4 @@ resources: - web.yaml - - web-svc.yaml \ No newline at end of file + - web-svc.yaml +namespace: backend \ No newline at end of file diff --git a/deploy/terraform/iam.tf b/deploy/terraform/iam.tf index 216b9cbb..e249e6e6 100644 --- a/deploy/terraform/iam.tf +++ b/deploy/terraform/iam.tf @@ -1,16 +1,23 @@ locals { - oke_policy_name = "${local.project_name}_${local.deploy_id}_oke" - ocir_group_name = "${local.project_name}-${local.deploy_id}-group" + oke_policy_name = "${local.project_name}_${local.deploy_id}_oke" + ocir_group_name = "${local.project_name}-${local.deploy_id}-group" } +# TODO restrict permissions, all gen ai family is too much +# but it doesn't seems to work in other than tenancy level resource "oci_identity_policy" "allow-oke-genai-policy" { provider = oci.home compartment_id = var.tenancy_ocid name = "${local.oke_policy_name}" description = "Allow OKE workload to manage gen ai service for ${local.project_name} ${local.deploy_id}" statements = [ - "Allow any-user to manage generative-ai-family in compartment id ${var.compartment_ocid} where all { request.principal.type = 'workload', request.principal.namespace = 'default', request.principal.service_account = 'genai-sa', request.principal.cluster_id = '${module.oke.cluster_id}' }", - "Allow any-user to manage generative-ai-model in compartment id ${var.compartment_ocid} where all { request.principal.type = 'workload', request.principal.namespace = 'default', request.principal.service_account = 'genai-sa', request.principal.cluster_id = '${module.oke.cluster_id}' }" + "Allow any-user to manage generative-ai-family in tenancy where all { request.principal.type = 'workload', request.principal.namespace = 'backend', request.principal.service_account = 'oci-service-account', request.principal.cluster_id = '${module.oke.cluster_id}'}" + # "Allow any-user to manage generative-ai-chat in tenancy where all { request.principal.type = 'workload', request.principal.namespace = 'backend', request.principal.service_account = 'oci-service-account', request.principal.cluster_id = '${module.oke.cluster_id}'}", + # "Allow any-user to manage generative-ai-text-generation in tenancy where all { request.principal.type = 'workload', request.principal.namespace = 'backend', request.principal.service_account = 'oci-service-account', request.principal.cluster_id = '${module.oke.cluster_id}'}", + # "Allow any-user to manage generative-ai-text-summarization in tenancy where all { request.principal.type = 'workload', request.principal.namespace = 'backend', request.principal.service_account = 'oci-service-account', request.principal.cluster_id = '${module.oke.cluster_id}'}", + # "Allow any-user to manage generative-ai-text-embedding in tenancy where all { request.principal.type = 'workload', request.principal.namespace = 'backend', request.principal.service_account = 'oci-service-account', request.principal.cluster_id = '${module.oke.cluster_id}'}", + # "Allow any-user to manage generative-ai-work-request in tenancy where all { request.principal.type = 'workload', request.principal.namespace = 'backend', request.principal.service_account = 'oci-service-account', request.principal.cluster_id = '${module.oke.cluster_id}'}", + # "Allow any-user to manage generative-ai-model in tenancy where all { request.principal.type = 'workload', request.principal.namespace = 'backend', request.principal.service_account = 'oci-service-account', request.principal.cluster_id = '${module.oke.cluster_id}'}" ] } diff --git a/deploy/terraform/oke.tf b/deploy/terraform/oke.tf index 96547bec..9b2a6cab 100644 --- a/deploy/terraform/oke.tf +++ b/deploy/terraform/oke.tf @@ -111,7 +111,7 @@ module "oke" { worker_pools = { node_pool_1 = { - shape = "VM.Standard.E4.Flex", + shape = "VM.Standard.E5.Flex", ocpus = 1, memory = 32, boot_volume_size = 120, diff --git a/scripts/kustom.mjs b/scripts/kustom.mjs index bde1d658..08205bd6 100644 --- a/scripts/kustom.mjs +++ b/scripts/kustom.mjs @@ -19,7 +19,7 @@ const namespace = config.get("namespace"); const regionName = config.get("regionName"); const regionKey = config.get("regionKey"); const webVersion = config.get("webVersion"); -const backendVersion = config.get("webVersion"); +const backendVersion = config.get("backendVersion"); const certFullchain = config.get("certFullchain"); const certPrivateKey = config.get("certPrivateKey"); const genAiModelChat = config.get("genAiModelChat"); @@ -31,6 +31,9 @@ await createBackendProperties(); await createProdKustomization(); await copyCerts(); await copyWallet(); + +const namespaceName = "backend"; +await createNamespace(namespaceName); await createRegistrySecret(); async function createBackendProperties() { @@ -94,6 +97,27 @@ async function copyWallet() { console.log(`File ${chalk.green(walletSourcePath)} copied`); } +async function createNamespace(namespaceName = "default") { + try { + const { exitCode, stdout } = + await $`KUBECONFIG="deploy/terraform/generated/kubeconfig" \ + kubectl create ns ${namespaceName} -o yaml \ + --dry-run=client | \ + KUBECONFIG="deploy/terraform/generated/kubeconfig" kubectl apply -f -`; + if (exitCode !== 0) { + exitWithError("namespace not created"); + } else { + console.log( + `Namespace ${chalk.green( + namespaceName + )} created on Kubernetes cluster: ${stdout}` + ); + } + } catch (error) { + exitWithError(error.stderr); + } +} + async function createRegistrySecret() { const user = config.get("ocir_user"); const email = config.get("ocir_user_email"); @@ -108,6 +132,7 @@ async function createRegistrySecret() { --docker-username=${namespace}/${user} \ --docker-password=${token} \ --docker-email=${email} \ + -n ${namespaceName} \ -o yaml | \ KUBECONFIG="deploy/terraform/generated/kubeconfig" kubectl apply -f -`; if (exitCode !== 0) { diff --git a/scripts/package.json b/scripts/package.json index 4658e1a2..c3b7db54 100644 --- a/scripts/package.json +++ b/scripts/package.json @@ -19,4 +19,4 @@ "mustache": "^4.2.0", "underscore": "^1.13.6" } -} +} \ No newline at end of file diff --git a/web/Dockerfile b/web/Dockerfile index d65dd954..3da6f1c4 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -20,5 +20,9 @@ RUN npm run build FROM --platform=linux/amd64 nginx:1.23-alpine-slim COPY --from=builder /usr/src/web/dist/ /usr/share/nginx/html/ +RUN rm /etc/nginx/conf.d/default.conf +COPY nginx/default.conf /etc/nginx/conf.d -EXPOSE 80 \ No newline at end of file +EXPOSE 80 + +CMD ["nginx", "-g", "daemon off;"] \ No newline at end of file diff --git a/web/nginx/default.conf b/web/nginx/default.conf new file mode 100644 index 00000000..ecb9e436 --- /dev/null +++ b/web/nginx/default.conf @@ -0,0 +1,14 @@ +server { + listen 80; + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html; + } + + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } +} \ No newline at end of file diff --git a/web/src/Summary.jsx b/web/src/Summary.jsx index cd72cda4..63362c55 100644 --- a/web/src/Summary.jsx +++ b/web/src/Summary.jsx @@ -1,4 +1,11 @@ -import { Box, Button, Snackbar, Stack, TextField } from "@mui/material"; +import { + Box, + Button, + Snackbar, + Stack, + TextField, + Typography, +} from "@mui/material"; import { useEffect, useState } from "react"; import { useForm } from "react-hook-form"; import { useStomp } from "./stompHook"; @@ -8,6 +15,7 @@ function Summary() { const [waiting, setWaiting] = useState(false); const [showError, setShowError] = useState(false); const [errorMessage, setErrorMessage] = useState(); + const [summary, setSummary] = useState(""); const { subscribe, unsubscribe, isConnected } = useStomp(); useEffect(() => { @@ -20,6 +28,7 @@ function Summary() { } else { console.log("/user/queue/summary"); console.log(message); + setSummary(message); } }); } @@ -37,8 +46,15 @@ function Summary() { method: "POST", body: formData, }); - const text = await res.text(); - console.log(text); + const responseData = await res.json(); + const { content, errorMessage } = responseData; + if (errorMessage.length) { + setErrorMessage(errorMessage); + setShowError(true); + } else { + console.log(content); + setSummary(content); + } }; return ( @@ -53,6 +69,7 @@ function Summary() { + {summary.length && {summary}} Date: Thu, 27 Jun 2024 01:12:28 +0200 Subject: [PATCH 2/2] fix text generation model selection at setenv --- scripts/setenv.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/setenv.mjs b/scripts/setenv.mjs index 00732357..ea185678 100644 --- a/scripts/setenv.mjs +++ b/scripts/setenv.mjs @@ -118,7 +118,7 @@ async function setLatestGenAIModelChat() { config.get("compartmentId"), config.get("regionName"), "cohere", - "CHAT" + "TEXT_GENERATION" ); const { id, vendor: vendorName, version, capabilities } = latestVersionModel;