Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added response_format option to openai_transcription processor #2925

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/modules/components/pages/processors/openai_transcription.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ openai_transcription:
file: "" # No default (required)
language: en # No default (optional)
prompt: "" # No default (optional)
response_format: json
```

--
Expand Down Expand Up @@ -144,4 +145,14 @@ This field supports xref:configuration:interpolation.adoc#bloblang-queries[inter
*Type*: `string`


=== `response_format`

The format of the output, in one of these options: json, text, srt, verbose_json, or vtt.
This field supports xref:configuration:interpolation.adoc#bloblang-queries[interpolation functions].


*Type*: `string`

*Default*: `"json"`


41 changes: 40 additions & 1 deletion internal/impl/openai/transcription_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"bytes"
"context"
"fmt"
"strings"

"github.com/redpanda-data/benthos/v4/public/bloblang"
"github.com/redpanda-data/benthos/v4/public/service"
Expand All @@ -22,6 +23,7 @@ const (
otspFieldFile = "file"
otspFieldLang = "language"
otspFieldPrompt = "prompt"
otspFieldFormat = "response_format"
)

func init() {
Expand Down Expand Up @@ -61,6 +63,11 @@ To learn more about audio transcription, see the: https://platform.openai.com/do
Description("Optional text to guide the model's style or continue a previous audio segment. The prompt should match the audio language.").
Optional().
Advanced(),
service.NewInterpolatedStringField(otspFieldFormat).
Description("The format of the output, in one of these options: json, text, srt, verbose_json, or vtt.").
Optional().
Default("json").
Advanced(),
)
}

Expand All @@ -87,7 +94,14 @@ func makeTranscriptionProcessor(conf *service.ParsedConfig, mgr *service.Resourc
return nil, err
}
}
return &transcriptionProcessor{b, f, l, p}, nil
var t *service.InterpolatedString
if conf.Contains(otspFieldFormat) {
t, err = conf.FieldInterpolatedString(otspFieldFormat)
if err != nil {
return nil, err
}
}
return &transcriptionProcessor{b, f, l, p, t}, nil
}

type transcriptionProcessor struct {
Expand All @@ -96,6 +110,7 @@ type transcriptionProcessor struct {
file *bloblang.Executor
lang *service.InterpolatedString
prompt *service.InterpolatedString
format *service.InterpolatedString
}

func (p *transcriptionProcessor) Process(ctx context.Context, msg *service.Message) (service.MessageBatch, error) {
Expand Down Expand Up @@ -124,6 +139,30 @@ func (p *transcriptionProcessor) Process(ctx context.Context, msg *service.Messa
}
body.Prompt = pr
}

if p.format != nil {
t, err := p.format.TryString(msg)
if err != nil {
return nil, fmt.Errorf("%s interpolation error: %w", otspFieldFormat, err)
}

var format oai.AudioResponseFormat
switch strings.ToLower(t) {
case "verbose_json":
format = oai.AudioResponseFormatVerboseJSON
case "text":
format = oai.AudioResponseFormatText
case "vtt":
format = oai.AudioResponseFormatVTT
case "srt":
format = oai.AudioResponseFormatSRT
default:
format = oai.AudioResponseFormatJSON
rockwotj marked this conversation as resolved.
Show resolved Hide resolved
}

body.Format = format
}

resp, err := p.client.CreateTranscription(ctx, body)
if err != nil {
return nil, err
Expand Down
Loading