From 3ae460497e7ef04e04c411e20477083fb4ccaedc Mon Sep 17 00:00:00 2001 From: Enrico Olivelli Date: Wed, 20 Sep 2023 10:35:26 +0200 Subject: [PATCH] [logging] Do not print secrets while opening a connection to the Vector database (#446) --- .../webcrawler-source/chatbot.yaml | 2 +- .../webcrawler-source/crawler.yaml | 4 +- .../ai/datasource/CassandraDataSource.java | 4 +- .../api/util/ConfigurationUtils.java | 47 +++++++++++++++++++ 4 files changed, 53 insertions(+), 4 deletions(-) diff --git a/examples/applications/webcrawler-source/chatbot.yaml b/examples/applications/webcrawler-source/chatbot.yaml index b8c967d2f..a049c345c 100644 --- a/examples/applications/webcrawler-source/chatbot.yaml +++ b/examples/applications/webcrawler-source/chatbot.yaml @@ -67,7 +67,7 @@ pipeline: messages: - role: system content: | - An user is going to perform a questions, he documents below may help you in answering to their questions. + An user is going to perform a questions, The documents below may help you in answering to their questions. Please try to leverage them in your answer as much as possible. Take into consideration that the user is always asking questions about the LangStream project. If you provide code or YAML snippets, please explicitly state that they are examples. diff --git a/examples/applications/webcrawler-source/crawler.yaml b/examples/applications/webcrawler-source/crawler.yaml index affca5180..9bec4b98d 100644 --- a/examples/applications/webcrawler-source/crawler.yaml +++ b/examples/applications/webcrawler-source/crawler.yaml @@ -31,7 +31,7 @@ pipeline: reindex-interval-seconds: 3600 max-error-count: 5 max-urls: 1000 - max-depth: 10 + max-depth: 50 handle-robots-file: true user-agent: "" # this is computed automatically, but you can override it scan-html-documents: true @@ -53,7 +53,7 @@ pipeline: - name: "Detect language" type: "language-detector" configuration: - allowedLanguages: ["en"] + allowedLanguages: ["en", "fr"] property: "language" - name: "Split into chunks" type: "text-splitter" diff --git a/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/datasource/CassandraDataSource.java b/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/datasource/CassandraDataSource.java index 0c57634c2..53423f114 100644 --- a/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/datasource/CassandraDataSource.java +++ b/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/datasource/CassandraDataSource.java @@ -74,7 +74,9 @@ protected TypeCodec createCodec( @Override public void initialize(Map dataSourceConfig) { - log.info("Initializing AstraDBDataSource with config {}", dataSourceConfig); + log.info( + "Initializing CassandraDataSource with config {}", + ConfigurationUtils.redactSecrets(dataSourceConfig)); this.astraToken = ConfigurationUtils.getString("token", "", dataSourceConfig); this.astraEnvironment = ConfigurationUtils.getString("environment", "PROD", dataSourceConfig); diff --git a/langstream-api/src/main/java/ai/langstream/api/util/ConfigurationUtils.java b/langstream-api/src/main/java/ai/langstream/api/util/ConfigurationUtils.java index 4172a522f..595caac1b 100644 --- a/langstream-api/src/main/java/ai/langstream/api/util/ConfigurationUtils.java +++ b/langstream-api/src/main/java/ai/langstream/api/util/ConfigurationUtils.java @@ -15,8 +15,10 @@ */ package ai.langstream.api.util; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -252,4 +254,49 @@ public static void requiredListField( "Expecting a list in the field '" + name + "' in " + definition.get()); } } + + /** + * Remove all the secrets from the configuration. This method is used to avoid logging secrets + * + * @param object + * @return the object without secrets + */ + public static Object redactSecrets(Object object) { + if (object == null) { + return null; + } + + if (object instanceof List list) { + List other = new ArrayList<>(list.size()); + list.forEach(o -> other.add(redactSecrets(o))); + return other; + } + if (object instanceof Set set) { + Set other = new HashSet<>(set.size()); + set.forEach(o -> other.add(redactSecrets(o))); + return other; + } + + if (object instanceof Map map) { + Map other = new HashMap<>(); + map.forEach( + (k, v) -> { + String keyLowercase = (String.valueOf(k)).toLowerCase(); + if (keyLowercase.contains("password") + || keyLowercase.contains("pwd") + || keyLowercase.contains("secure") + || keyLowercase.contains("secret") + || keyLowercase.contains("serviceaccountjson") + || keyLowercase.contains("access-key") + || keyLowercase.contains("token")) { + other.put(k, ""); + } else { + other.put(k, redactSecrets(v)); + } + }); + return other; + } + + return object; + } }