diff --git a/docker/replace-agent-in-runtime-tester.sh b/dev/replace-agent-in-runtime-tester.sh similarity index 100% rename from docker/replace-agent-in-runtime-tester.sh rename to dev/replace-agent-in-runtime-tester.sh diff --git a/docker/metrics/dashboards/apps.json b/docker/metrics/dashboards/apps.json new file mode 100644 index 000000000..467ac5aff --- /dev/null +++ b/docker/metrics/dashboards/apps.json @@ -0,0 +1,2037 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "datasource", + "uid": "grafana" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Apps related metrics", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 131, + "panels": [], + "repeat": "agent", + "repeatDirection": "h", + "title": "OpenAI Comsumption", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 132, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (agent_id) (openai_embeddings_num_calls_total)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Http requests - {{agent_id}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "sum by (agent_id) (openai_embeddings_num_errors_total)", + "hide": false, + "instant": false, + "legendFormat": "Errors - {{agent_id}}", + "range": true, + "refId": "B" + } + ], + "title": "OpenAI Embeddings requests", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 133, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (agent_id) (openai_embeddings_num_texts_total)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Texts - {{agent_id}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + } + ], + "title": "OpenAI Embeddings Texts", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 134, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (agent_id) (openai_embeddings_prompt_tokens_total) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Prompt tokens - {{agent_id}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "sum by (agent_id) (openai_embeddings_total_tokens_total)", + "hide": false, + "instant": false, + "legendFormat": "Total tokens - {{agent_id}}", + "range": true, + "refId": "B" + } + ], + "title": "OpenAI Embeddings Tokens", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 135, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (agent_id) (openai_chat_completions_num_calls_total)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Http requests - {{agent_id}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "sum by (agent_id) (openai_chat_completions_num_errors_total)", + "hide": false, + "instant": false, + "legendFormat": "Errors - {{agent_id}}", + "range": true, + "refId": "B" + } + ], + "title": "OpenAI Chat Completions requests", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 7 + }, + "id": 137, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (agent_id) (openai_chat_completions_num_calls_total)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Http requests - {{agent_id}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "sum by (agent_id) (openai_chat_completions_num_errors_total)", + "hide": false, + "instant": false, + "legendFormat": "Errors - {{agent_id}}", + "range": true, + "refId": "B" + } + ], + "title": "OpenAI Chat Completions requests", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 13 + }, + "id": 136, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (agent_id) (openai_text_completions_prompt_tokens_total) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Prompt tokens - {{agent_id}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "sum by (agent_id) (openai_text_completions_total_tokens_total)", + "hide": false, + "instant": false, + "legendFormat": "Completions tokens - {{agent_id}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "sum by (agent_id) (openai_text_completions_completions_tokens_total)", + "hide": false, + "instant": false, + "legendFormat": "Total tokens - {{agent_id}}", + "range": true, + "refId": "C" + } + ], + "title": "OpenAI Text Completions Tokens", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 13 + }, + "id": 138, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (agent_id) (openai_text_completions_num_calls_total)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Http requests - {{agent_id}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "expr": "sum by (agent_id) (openai_text_completions_num_errors_total)", + "hide": false, + "instant": false, + "legendFormat": "Errors - {{agent_id}}", + "range": true, + "refId": "B" + } + ], + "title": "OpenAI Text Completions requests", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 19 + }, + "id": 41, + "panels": [], + "repeat": "tenant", + "title": "Tenant: $tenant", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 100 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 20 + }, + "id": 4, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "irate(process_cpu_seconds_total{namespace=~\"ls-$tenant\", pod=~\"$application-$agent-.*\"}[60s]) * 100\n\n", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "process_cpu_seconds_total", + "range": true, + "refId": "A", + "step": 20 + } + ], + "title": "CPU", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 20 + }, + "id": 1, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum by (pod) (jvm_memory_bytes_used{namespace=~\"ls-$tenant\",pod=~\"$application-$agent-.*\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Used - {{pod}}", + "metric": "jvm_memory_bytes_committed", + "range": true, + "refId": "A", + "step": 20 + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod) (jvm_memory_bytes_max{namespace=~\"ls-$tenant\",pod=~\"$application-$agent-.*\"})", + "hide": false, + "legendFormat": "Max: {{pod}}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum by (pod) (jvm_memory_bytes_committed{namespace=~\"ls-$tenant\",pod=~\"$application-$agent-.*\"})", + "hide": false, + "legendFormat": "Committed: {{pod}}", + "range": true, + "refId": "C" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "kube_pod_container_resource_limits{namespace=~\"ls-$tenant\", pod=~\"$application-$agent-.*\", resource=\"memory\"}", + "hide": false, + "legendFormat": "Kube limit: {{pod}}", + "range": true, + "refId": "D" + } + ], + "title": "Heap Memory", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "old count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.axisPlacement", + "value": "hidden" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "young count" + }, + "properties": [ + { + "id": "unit", + "value": "short" + }, + { + "id": "custom.axisPlacement", + "value": "hidden" + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 20 + }, + "id": 3, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "increase(jvm_gc_collection_seconds_sum{namespace=~\"ls-$tenant\",pod=~\"$application-$agent-.*\"}[60s])", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "jvm_gc_collection_seconds_count", + "range": true, + "refId": "B", + "step": 20 + } + ], + "title": "GC Pauses", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 0, + "y": 26 + }, + "id": 118, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_status_ready{namespace=~\"ls-$tenant\", condition=\"true\", pod=~\"$application-$agent-.*\"}) by (pod)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "process_cpu_seconds_total", + "range": true, + "refId": "A", + "step": 20 + } + ], + "title": "Running executors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 8, + "y": 26 + }, + "id": 129, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_status_ready{namespace=~\"ls-$tenant\", condition=\"false\", pod=~\"$application-$agent-.*\"}) by (pod)", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Failing executors", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + } + ] + }, + "unit": "string" + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 8, + "x": 16, + "y": 26 + }, + "id": 130, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "\nsum(label_replace(\n kube_pod_status_phase{namespace=~\"ls-$tenant\", phase=~\"Failed|Unknown\", pod=~\"langstream-runtime-deployer-.*\"},\n \"application1\",\"$1\",\"pod\", \"(.*)-(.*)\"\n )\n)by(application1)", + "hide": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Failed deployments", + "transformations": [], + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 109, + "panels": [], + "title": "Global", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 33 + }, + "id": 87, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_status_ready{namespace=~\"ls-.*\", condition=\"true\"})", + "legendFormat": "Running", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_status_ready{namespace=~\"ls-.*\", condition=\"false\"})", + "hide": false, + "legendFormat": "Failing", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_status_ready{namespace=~\"ls-.*\"})", + "hide": false, + "legendFormat": "Total", + "range": true, + "refId": "C" + } + ], + "title": "Executors ", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 33 + }, + "id": 106, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{namespace=~\"ls-$tenant\", pod=~\"$application-$agent-.*\", resource=\"cpu\"}) by (namespace)", + "legendFormat": "{{namespace}}", + "range": true, + "refId": "A" + } + ], + "title": "CPU limits", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 33 + }, + "id": 107, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.5.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "editorMode": "code", + "expr": "sum(kube_pod_container_resource_limits{namespace=~\"ls-$tenant\", pod=~\"$application-$agent-.*\", resource=\"memory\"}) by (namespace)", + "legendFormat": "{{namespace}}", + "range": true, + "refId": "A" + } + ], + "title": "Memory limits", + "type": "timeseries" + } + ], + "refresh": "5s", + "schemaVersion": 38, + "tags": [], + "templating": { + "list": [ + { + "allValue": "", + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "jvm_memory_bytes_used{namespace=~\".+\"}", + "hide": 0, + "includeAll": true, + "label": "Tenant", + "multi": false, + "name": "tenant", + "options": [], + "query": { + "query": "jvm_memory_bytes_used{namespace=~\".+\"}", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "/.*[^_]namespace=\\\"ls-([^\\\"]+)\\\".*/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "jvm_memory_bytes_used{namespace=~\"ls-($tenant|All)\",langstream_application=~\".+\"}", + "hide": 0, + "includeAll": true, + "label": "Application", + "multi": true, + "name": "application", + "options": [], + "query": { + "query": "jvm_memory_bytes_used{namespace=~\"ls-($tenant|All)\",langstream_application=~\".+\"}", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "/.*[^_]langstream_application=\\\"([^\\\"]+)\\\".*/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "prometheus" + }, + "definition": "jvm_memory_bytes_used{namespace=~\"ls-($tenant|All)\",langstream_application=~\"($application|All)\",langstream_agent=~\".+\"}", + "hide": 0, + "includeAll": true, + "label": "Agent", + "multi": true, + "name": "agent", + "options": [], + "query": { + "query": "jvm_memory_bytes_used{namespace=~\"ls-($tenant|All)\",langstream_application=~\"($application|All)\",langstream_agent=~\".+\"}", + "refId": "StandardVariableQuery" + }, + "refresh": 2, + "regex": "/.*[^_]langstream_agent=\\\"([^\\\"]+)\\\".*/", + "skipUrlSync": false, + "sort": 1, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Langstream / Apps", + "uid": "rGuiWUzSk", + "version": 1, + "weekStart": "" +} \ No newline at end of file diff --git a/docker/metrics/prometheus.yml b/docker/metrics/prometheus.yml new file mode 100644 index 000000000..181132408 --- /dev/null +++ b/docker/metrics/prometheus.yml @@ -0,0 +1,28 @@ +# +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +global: + scrape_interval: 15s + +scrape_configs: + - job_name: 'langstream' + static_configs: + - targets: ['host.docker.internal:8790'] + labels: + namespace: 'ls-docker' + pod: 'app-docker-local' + langstream_application: 'app' + langstream_agent: 'docker' diff --git a/docker/metrics/provisioning/dashboards/dashboards.yml b/docker/metrics/provisioning/dashboards/dashboards.yml new file mode 100644 index 000000000..c7738f200 --- /dev/null +++ b/docker/metrics/provisioning/dashboards/dashboards.yml @@ -0,0 +1,30 @@ +# +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +apiVersion: 1 + +providers: + - name: 'langstream' + orgId: 1 + folder: '' + folderUid: '123123' + type: file + disableDeletion: false + updateIntervalSeconds: 10 + allowUiUpdates: true + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: true diff --git a/docker/metrics/provisioning/datasources/prometheus-datasources.yml b/docker/metrics/provisioning/datasources/prometheus-datasources.yml new file mode 100644 index 000000000..9a6f3aa27 --- /dev/null +++ b/docker/metrics/provisioning/datasources/prometheus-datasources.yml @@ -0,0 +1,23 @@ +# +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +apiVersion: 1 +datasources: + - name: prometheus + type: prometheus + access: proxy + url: http://host.docker.internal:9090 + isDefault: true diff --git a/docker/metrics/run-local-grafana.sh b/docker/metrics/run-local-grafana.sh new file mode 100755 index 000000000..bd267b412 --- /dev/null +++ b/docker/metrics/run-local-grafana.sh @@ -0,0 +1,34 @@ +# +# Copyright DataStax, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +#/bin/bash + +HERE=$(dirname $0) +docker rm -f prometheus +docker run -d -p 9090:9090 --name prometheus -v $HERE/prometheus.yml:/etc/prometheus/prometheus.yml prom/prometheus +docker rm -f grafana + +docker run -d -p 3000:3000 --name=grafana \ + -e "GF_SECURITY_ADMIN_USER=admin" \ + -e "GF_SECURITY_ADMIN_PASSWORD=admin" \ + -v $HERE/provisioning:/etc/grafana/provisioning \ + -v $HERE/dashboards:/var/lib/grafana/dashboards \ + grafana/grafana + + +echo "Open Grafana at http://localhost:3000/" +echo "Username: admin" +echo "Password: admin" diff --git a/examples/applications/flare/flare-text-generator.yaml b/examples/applications/flare/flare-text-generator.yaml index fdb9ff947..c207921ab 100644 --- a/examples/applications/flare/flare-text-generator.yaml +++ b/examples/applications/flare/flare-text-generator.yaml @@ -100,6 +100,7 @@ pipeline: logprobs: 5 logprobs-field: "value.tokens" max-tokens: 100 + stream: false prompt: - | There is a list of documents that you must use to perform your task. diff --git a/langstream-agents/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/OpenAICompletionService.java b/langstream-agents/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/OpenAICompletionService.java index 64aea35a6..e2186682a 100644 --- a/langstream-agents/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/OpenAICompletionService.java +++ b/langstream-agents/langstream-ai-agents/src/main/java/ai/langstream/ai/agents/services/impl/OpenAICompletionService.java @@ -19,12 +19,14 @@ import static ai.langstream.api.util.ConfigurationUtils.getDouble; import static ai.langstream.api.util.ConfigurationUtils.getInteger; +import ai.langstream.api.runner.code.MetricsReporter; import com.azure.ai.openai.OpenAIAsyncClient; import com.azure.ai.openai.models.ChatCompletionsOptions; import com.azure.ai.openai.models.ChatRole; import com.azure.ai.openai.models.CompletionsFinishReason; import com.azure.ai.openai.models.CompletionsLogProbabilityModel; import com.azure.ai.openai.models.CompletionsOptions; +import com.azure.ai.openai.models.CompletionsUsage; import com.datastax.oss.streaming.ai.completions.ChatChoice; import com.datastax.oss.streaming.ai.completions.ChatCompletions; import com.datastax.oss.streaming.ai.completions.ChatMessage; @@ -41,6 +43,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.stream.Collectors; +import lombok.Getter; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; import reactor.core.publisher.Flux; @@ -50,8 +53,68 @@ public class OpenAICompletionService implements CompletionsService { private final OpenAIAsyncClient client; - public OpenAICompletionService(OpenAIAsyncClient client) { + private final MetricsReporter.Counter textTotalTokens; + private final MetricsReporter.Counter textPromptTokens; + private final MetricsReporter.Counter textCompletionTokens; + private final MetricsReporter.Counter textNumCalls; + private final MetricsReporter.Counter textNumErrors; + + private final MetricsReporter.Counter chatTotalTokens; + private final MetricsReporter.Counter chatPromptTokens; + private final MetricsReporter.Counter chatCompletionTokens; + private final MetricsReporter.Counter chatNumCalls; + private final MetricsReporter.Counter chatNumErrors; + + public OpenAICompletionService(OpenAIAsyncClient client, MetricsReporter metricsReporter) { this.client = client; + + this.chatTotalTokens = + metricsReporter.counter( + "openai_chat_completions_total_tokens", + "Total number of tokens exchanged with OpenAI Chat Completions"); + this.chatPromptTokens = + metricsReporter.counter( + "openai_chat_completions_prompt_tokens", + "Total number of prompt tokens sent to OpenAI Chat Completions"); + this.chatCompletionTokens = + metricsReporter.counter( + "openai_chat_completions_completions_tokens", + "Total number of completions tokens received from OpenAI Chat Completions"); + + this.chatNumCalls = + metricsReporter.counter( + "openai_chat_completions_num_calls", + "Total number of calls to OpenAI Chat Completions"); + + this.chatNumErrors = + metricsReporter.counter( + "openai_chat_completions_num_errors", + "Total number of errors while calling OpenAI Chat Completions"); + + this.textTotalTokens = + metricsReporter.counter( + "openai_text_completions_total_tokens", + "Total number of tokens exchanged with OpenAI Text Completions"); + + this.textCompletionTokens = + metricsReporter.counter( + "openai_chat_completions_completions_tokens", + "Total number of completions tokens received from OpenAI Text Completions"); + + this.textPromptTokens = + metricsReporter.counter( + "openai_text_completions_prompt_tokens", + "Total number of prompt tokens sent to OpenAI Text Completions"); + + this.textNumCalls = + metricsReporter.counter( + "openai_text_completions_num_calls", + "Total number of calls to OpenAI Text Completions"); + + this.textNumErrors = + metricsReporter.counter( + "openai_text_completions_num_errors", + "Total number of errors while calling OpenAI Text Completions"); } @Override @@ -81,6 +144,7 @@ public CompletableFuture getChatCompletions( .setPresencePenalty(getDouble("presence-penalty", null, options)) .setFrequencyPenalty(getDouble("frequency-penalty", null, options)); ChatCompletions result = new ChatCompletions(); + chatNumCalls.count(1); // this is the default behavior, as it is async // it works even if the streamingChunksConsumer is null if (chatCompletionsOptions.isStream()) { @@ -98,6 +162,7 @@ public CompletableFuture getChatCompletions( log.error( "Internal error while processing the streaming response", error); + chatNumErrors.count(1); finished.completeExceptionally(error); }) .doOnNext(chatCompletionsConsumer) @@ -110,17 +175,38 @@ public CompletableFuture getChatCompletions( new ChatChoice( chatCompletionsConsumer .buildTotalAnswerMessage()))); + chatTotalTokens.count(chatCompletionsConsumer.getTotalTokens().intValue()); + chatPromptTokens.count( + chatCompletionsConsumer.getPromptTokens().intValue()); + chatCompletionTokens.count( + chatCompletionsConsumer.getCompletionTokens().intValue()); return result; }); } else { - com.azure.ai.openai.models.ChatCompletions chatCompletions = + CompletableFuture resultHandle = client.getChatCompletions((String) options.get("model"), chatCompletionsOptions) - .block(); - result.setChoices( - chatCompletions.getChoices().stream() - .map(c -> new ChatChoice(convertMessage(c))) - .collect(Collectors.toList())); - return CompletableFuture.completedFuture(result); + .toFuture() + .thenApply( + chatCompletions -> { + result.setChoices( + chatCompletions.getChoices().stream() + .map(c -> new ChatChoice(convertMessage(c))) + .collect(Collectors.toList())); + CompletionsUsage usage = chatCompletions.getUsage(); + if (usage != null) { + chatTotalTokens.count(usage.getTotalTokens()); + chatPromptTokens.count(usage.getPromptTokens()); + chatCompletionTokens.count(usage.getCompletionTokens()); + } + return result; + }); + + resultHandle.exceptionally( + error -> { + chatNumErrors.count(1); + return null; + }); + return resultHandle; } } @@ -142,12 +228,17 @@ private static class ChatCompletionsConsumer private final AtomicReference role = new AtomicReference<>(); private final StringWriter totalAnswer = new StringWriter(); + @Getter private final AtomicInteger totalTokens = new AtomicInteger(); + @Getter private final AtomicInteger promptTokens = new AtomicInteger(); + + @Getter private final AtomicInteger completionTokens = new AtomicInteger(); + private final StringWriter writer = new StringWriter(); private final AtomicInteger numberOfChunks = new AtomicInteger(); private final int minChunksPerMessage; - private AtomicInteger currentChunkSize = new AtomicInteger(1); - private AtomicInteger index = new AtomicInteger(); + private final AtomicInteger currentChunkSize = new AtomicInteger(1); + private final AtomicInteger index = new AtomicInteger(); public ChatCompletionsConsumer( StreamingChunksConsumer streamingChunksConsumer, @@ -166,6 +257,14 @@ public synchronized void accept( com.azure.ai.openai.models.ChatCompletions chatCompletions) { List choices = chatCompletions.getChoices(); String answerId = chatCompletions.getId(); + log.info("Chat completions chunk: {}", chatCompletions); + log.info("Chat completions chunk:usage: {}", chatCompletions.getUsage()); + if (chatCompletions.getUsage() != null) { + totalTokens.addAndGet(chatCompletions.getUsage().getTotalTokens()); + completionTokens.addAndGet(chatCompletions.getUsage().getCompletionTokens()); + promptTokens.addAndGet(chatCompletions.getUsage().getPromptTokens()); + } + if (!choices.isEmpty()) { com.azure.ai.openai.models.ChatChoice first = choices.get(0); CompletionsFinishReason finishReason = first.getFinishReason(); @@ -235,6 +334,7 @@ public CompletableFuture getTextCompletions( // this is the default behavior, as it is async // it works even if the streamingChunksConsumer is null final String model = (String) options.get("model"); + textNumCalls.count(1); if (completionsOptions.isStream()) { CompletableFuture finished = new CompletableFuture<>(); Flux flux = @@ -249,6 +349,7 @@ public CompletableFuture getTextCompletions( log.error( "Internal error while processing the streaming response", error); + textNumErrors.count(1); finished.completeExceptionally(error); }) .doOnNext(textCompletionsConsumer) @@ -260,20 +361,47 @@ public CompletableFuture getTextCompletions( new TextCompletionResult.LogProbInformation( textCompletionsConsumer.logProbsTokens, textCompletionsConsumer.logProbsTokenLogProbabilities); + textTotalTokens.count(textCompletionsConsumer.getTotalTokens().intValue()); + textPromptTokens.count( + textCompletionsConsumer.getPromptTokens().intValue()); + textCompletionTokens.count( + textCompletionsConsumer.getCompletionTokens().intValue()); return new TextCompletionResult( textCompletionsConsumer.totalAnswer.toString(), logProbs); }); } else { - com.azure.ai.openai.models.Completions completions = - client.getCompletions(model, completionsOptions).block(); - final String text = completions.getChoices().get(0).getText(); - CompletionsLogProbabilityModel logprobs = completions.getChoices().get(0).getLogprobs(); - TextCompletionResult.LogProbInformation logProbs = - completions.getChoices().get(0).getLogprobs() != null - ? new TextCompletionResult.LogProbInformation( - logprobs.getTokens(), logprobs.getTokenLogProbabilities()) - : new TextCompletionResult.LogProbInformation(null, null); - return CompletableFuture.completedFuture(new TextCompletionResult(text, logProbs)); + CompletableFuture resultHandle = + client.getCompletions(model, completionsOptions) + .toFuture() + .thenApply( + completions -> { + CompletionsUsage usage = completions.getUsage(); + if (usage != null) { + textTotalTokens.count(usage.getTotalTokens()); + textPromptTokens.count(usage.getPromptTokens()); + textCompletionTokens.count(usage.getCompletionTokens()); + } + final String text = + completions.getChoices().get(0).getText(); + CompletionsLogProbabilityModel logprobs = + completions.getChoices().get(0).getLogprobs(); + TextCompletionResult.LogProbInformation logProbs = + completions.getChoices().get(0).getLogprobs() + != null + ? new TextCompletionResult + .LogProbInformation( + logprobs.getTokens(), + logprobs.getTokenLogProbabilities()) + : new TextCompletionResult + .LogProbInformation(null, null); + return new TextCompletionResult(text, logProbs); + }); + resultHandle.exceptionally( + error -> { + textNumErrors.count(1); + return null; + }); + return resultHandle; } } @@ -282,17 +410,21 @@ private static class TextCompletionsConsumer private final StreamingChunksConsumer streamingChunksConsumer; private final CompletableFuture finished; - private final AtomicReference role = new AtomicReference<>(); private final StringWriter totalAnswer = new StringWriter(); + @Getter private final AtomicInteger totalTokens = new AtomicInteger(); + @Getter private final AtomicInteger promptTokens = new AtomicInteger(); + + @Getter private final AtomicInteger completionTokens = new AtomicInteger(); + private final StringWriter writer = new StringWriter(); private final AtomicInteger numberOfChunks = new AtomicInteger(); private final int minChunksPerMessage; public List logProbsTokens = new ArrayList<>(); public List logProbsTokenLogProbabilities = new ArrayList<>(); - private AtomicInteger currentChunkSize = new AtomicInteger(1); - private AtomicInteger index = new AtomicInteger(); + private final AtomicInteger currentChunkSize = new AtomicInteger(1); + private final AtomicInteger index = new AtomicInteger(); private final AtomicBoolean firstChunk = new AtomicBoolean(true); @@ -313,6 +445,11 @@ public TextCompletionsConsumer( public synchronized void accept(com.azure.ai.openai.models.Completions completions) { List choices = completions.getChoices(); String answerId = completions.getId(); + if (completions.getUsage() != null) { + totalTokens.addAndGet(completions.getUsage().getTotalTokens()); + completionTokens.addAndGet(completions.getUsage().getCompletionTokens()); + promptTokens.addAndGet(completions.getUsage().getPromptTokens()); + } if (!choices.isEmpty()) { com.azure.ai.openai.models.Choice first = choices.get(0); diff --git a/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/embeddings/OpenAIEmbeddingsService.java b/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/embeddings/OpenAIEmbeddingsService.java index 44eb587d1..ebaa57399 100644 --- a/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/embeddings/OpenAIEmbeddingsService.java +++ b/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/embeddings/OpenAIEmbeddingsService.java @@ -30,7 +30,6 @@ public class OpenAIEmbeddingsService implements EmbeddingsService { private final OpenAIAsyncClient openAIClient; private final String model; - private final MetricsReporter metricsReporter; private final MetricsReporter.Counter totalTokens; private final MetricsReporter.Counter promptTokens; @@ -42,7 +41,6 @@ public OpenAIEmbeddingsService( OpenAIAsyncClient openAIClient, String model, MetricsReporter metricsReporter) { this.openAIClient = openAIClient; this.model = model; - this.metricsReporter = metricsReporter; this.totalTokens = metricsReporter.counter( "openai_embeddings_total_tokens", diff --git a/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/services/OpenAIServiceProvider.java b/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/services/OpenAIServiceProvider.java index e7e554f74..d6eefe249 100644 --- a/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/services/OpenAIServiceProvider.java +++ b/langstream-agents/langstream-ai-agents/src/main/java/com/datastax/oss/streaming/ai/services/OpenAIServiceProvider.java @@ -42,7 +42,7 @@ public OpenAIServiceProvider(OpenAIAsyncClient client, MetricsReporter metricsRe @Override public CompletionsService getCompletionsService(Map additionalConfiguration) { - return new OpenAICompletionService(client); + return new OpenAICompletionService(client, metricsReporter); } @Override diff --git a/langstream-agents/langstream-ai-agents/src/test/java/com/datastax/oss/streaming/ai/ChatCompletionsStepTest.java b/langstream-agents/langstream-ai-agents/src/test/java/com/datastax/oss/streaming/ai/ChatCompletionsStepTest.java index afd9fce91..b4ea5ea1e 100644 --- a/langstream-agents/langstream-ai-agents/src/test/java/com/datastax/oss/streaming/ai/ChatCompletionsStepTest.java +++ b/langstream-agents/langstream-ai-agents/src/test/java/com/datastax/oss/streaming/ai/ChatCompletionsStepTest.java @@ -24,6 +24,7 @@ import static org.mockito.Mockito.when; import ai.langstream.ai.agents.services.impl.OpenAICompletionService; +import ai.langstream.api.runner.code.MetricsReporter; import com.azure.ai.openai.OpenAIAsyncClient; import com.azure.ai.openai.models.ChatCompletions; import com.azure.ai.openai.models.ChatCompletionsOptions; @@ -88,7 +89,8 @@ void setup() throws Exception { .thenReturn(Mono.just(mapper.readValue(COMPLETION, ChatCompletions.class))); when(openAIClient.getChatCompletionsStream(eq("test-model"), any())) .thenAnswer(a -> Flux.just(mapper.readValue(COMPLETION, ChatCompletions.class))); - this.completionService = new OpenAICompletionService(openAIClient); + this.completionService = + new OpenAICompletionService(openAIClient, MetricsReporter.DISABLED); } @Test