Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AI] Add support for Object Detection pipeline #3228

Open
wants to merge 31 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
d907edc
feat:add initial implementation of support for object detection pipeline
RUFFY-369 Nov 1, 2024
dd6598b
Merge remote-tracking branch 'upstream/ai-video' into feature/object-…
RUFFY-369 Nov 1, 2024
ba4eb5e
chore:fix remaining merge conflicts
RUFFY-369 Nov 1, 2024
ea4dba0
chore:add missing dependencies for testing locally
RUFFY-369 Nov 2, 2024
cf9e389
chore:update ai-worker commit hash
RUFFY-369 Nov 2, 2024
589b6c7
fix:build errors for go-livepeer remote-worker docker image
RUFFY-369 Nov 2, 2024
34f4df7
chore:update server package to enable pipeline processing by remote w…
RUFFY-369 Nov 9, 2024
e1a3767
chore:fix merge conflicts
RUFFY-369 Nov 14, 2024
e2ebe08
Merge remote-tracking branch 'upstream/master' into feature/object-de…
RUFFY-369 Nov 16, 2024
5b77d63
fix:make error
RUFFY-369 Nov 18, 2024
b229813
Merge remote-tracking branch 'upstream/master' into feature/object-de…
RUFFY-369 Nov 18, 2024
4bb8266
Add Gateway ETH Address to Kafka events (#3249)
thomshutt Nov 18, 2024
f096368
ci: fix PR labeler (#3254)
rickstaa Nov 18, 2024
1d66460
update input probing to use ffmpeg.GetCodecInfoBytes
ad-astra-video Nov 19, 2024
cc62271
update transcoding of result
ad-astra-video Nov 20, 2024
a4f06d9
Merge pull request #1 from ad-astra-video/object-detection
RUFFY-369 Nov 21, 2024
9e20c37
chore:update returned detection data with ObjectDetectionResponse
RUFFY-369 Nov 22, 2024
154db36
Merge remote-tracking branch 'upstream/master' into feature/object-de…
RUFFY-369 Nov 22, 2024
37b1471
chore:remove async processing for object detection
RUFFY-369 Nov 29, 2024
4d7a4b8
chore:add ObjectDetectionResponse in json result parsing as new case
RUFFY-369 Nov 29, 2024
4f09e62
remove addl object detection async, re-order ai_mediaserver.go urls, …
ad-astra-video Nov 29, 2024
d5975ad
update go.mod go.sum to remove ffmpeg-go dependency
ad-astra-video Nov 29, 2024
e9046fc
Merge pull request #2 from ad-astra-video/object-detection
RUFFY-369 Nov 29, 2024
6d537dd
chore:changes for base64 encoded video file instead of url for each f…
RUFFY-369 Dec 1, 2024
0a1e6f8
chore:nits(add on previous commit)
RUFFY-369 Dec 1, 2024
bb0c7cf
chore:add necessary code for update in ObjectDetectionResponse in run…
RUFFY-369 Dec 1, 2024
be3ebe3
fix:make error
RUFFY-369 Dec 3, 2024
c59ae30
updates for object detection pipeline
ad-astra-video Jan 2, 2025
7b609f5
fix parsing annotated video
ad-astra-video Jan 2, 2025
80c2fad
Merge pull request #3 from ad-astra-video/object-detection
RUFFY-369 Jan 6, 2025
a18ef7e
chore:resolve merge conflicts
RUFFY-369 Jan 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions common/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,21 @@ func ParseEthAddr(strJsonKey string) (string, error) {
return "", errors.New("Error parsing address from keyfile")
}

func GetInputVideoInfo(video types.File) (ffmpeg.MediaFormatInfo, error) {
bytearr, _ := video.Bytes()
_, mediaFormat, err := ffmpeg.GetCodecInfoBytes(bytearr)
if err != nil {
return ffmpeg.MediaFormatInfo{}, errors.New("Error getting codec info")
}

duration := int64(mediaFormat.DurSecs)
if duration <= 0 {
return ffmpeg.MediaFormatInfo{}, errors.New("video duration calculation failed")
}

return mediaFormat, nil
}

// CalculateAudioDuration calculates audio file duration using the lpms/ffmpeg package.
func CalculateAudioDuration(audio types.File) (int64, error) {
read, err := audio.Reader()
Expand Down
1 change: 1 addition & 0 deletions core/ai.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type AI interface {
ImageToText(context.Context, worker.GenImageToTextMultipartRequestBody) (*worker.ImageToTextResponse, error)
TextToSpeech(context.Context, worker.GenTextToSpeechJSONRequestBody) (*worker.AudioResponse, error)
LiveVideoToVideo(context.Context, worker.GenLiveVideoToVideoJSONRequestBody) (*worker.LiveVideoToVideoResponse, error)
ObjectDetection(context.Context, worker.GenObjectDetectionMultipartRequestBody) (*worker.ObjectDetectionResponse, error)
Warm(context.Context, string, string, worker.RunnerEndpoint, worker.OptimizationFlags) error
Stop(context.Context) error
HasCapacity(string, string) bool
Expand Down
9 changes: 9 additions & 0 deletions core/ai_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,15 @@ func (a *stubAIWorker) LiveVideoToVideo(ctx context.Context, req worker.GenLiveV
return &worker.LiveVideoToVideoResponse{}, nil
}

func (a *stubAIWorker) ObjectDetection(ctx context.Context, req worker.GenObjectDetectionMultipartRequestBody) (*worker.ObjectDetectionResponse, error) {
return &worker.ObjectDetectionResponse{
Video: {Url: "http://example.com/frames1.mp4"},
ConfidenceScores: "confidence_scores",
Labels: "labels",
DetectionBoxes: "detection_boxes",
DetectionPts: "detection_pts"}, nil
}

func (a *stubAIWorker) Warm(ctx context.Context, arg1, arg2 string, endpoint worker.RunnerEndpoint, flags worker.OptimizationFlags) error {
return nil
}
Expand Down
78 changes: 77 additions & 1 deletion core/ai_worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,21 @@
}
resp.Audio.Url = osUrl

results = resp
case worker.ObjectDetectionResponse:
if resp.Video.Url != "" {

Check failure on line 470 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 470 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 470 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 470 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 470 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 470 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for windows-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 470 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)
err := worker.ReadVideoB64DataUrl(resp.Video.Url, &buf)

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-arm64

undefined: worker.ReadVideoB64DataUrl

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-amd64

undefined: worker.ReadVideoB64DataUrl

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-arm64

undefined: worker.ReadVideoB64DataUrl

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-arm64

undefined: worker.ReadVideoB64DataUrl

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-amd64

undefined: worker.ReadVideoB64DataUrl

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for windows-cpu-amd64

undefined: worker.ReadVideoB64DataUrl

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for windows-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-amd64

undefined: worker.ReadVideoB64DataUrl

Check failure on line 471 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)
if err != nil {
return nil, err
}

osUrl, err := storage.OS.SaveData(ctx, fileName, bytes.NewBuffer(buf.Bytes()), nil, 0)
if err != nil {
return nil, err
}
resp.Video.Url = osUrl

Check failure on line 480 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 480 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 480 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 480 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 480 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 480 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for windows-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 480 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)
}

results = resp
}

Expand Down Expand Up @@ -510,6 +525,19 @@
delete(results.Files, fileName)

results.Results = resp
case worker.ObjectDetectionResponse:
if resp.Video.Url != "" {

Check failure on line 529 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 529 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 529 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 529 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 529 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 529 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for windows-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 529 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)
fileName := resp.Video.Url

Check failure on line 530 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 530 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 530 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 530 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 530 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 530 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for windows-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 530 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)
osUrl, err := storage.OS.SaveData(ctx, fileName, bytes.NewReader(results.Files[fileName]), nil, 0)
if err != nil {
return nil, err
}

resp.Video.Url = osUrl

Check failure on line 536 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 536 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for darwin-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 536 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 536 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-arm64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 536 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 536 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for windows-cpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)

Check failure on line 536 in core/ai_worker.go

View workflow job for this annotation

GitHub Actions / Build binaries for linux-gpu-amd64

resp.Video undefined (type worker.ObjectDetectionResponse has no field or method Video)
delete(results.Files, fileName)

results.Results = resp
}
}

// no file response to save, response is text
Expand Down Expand Up @@ -884,6 +912,50 @@
return res.Results, nil
}

func (orch *orchestrator) ObjectDetection(ctx context.Context, requestID string, req worker.GenObjectDetectionMultipartRequestBody) (interface{}, error) {
// local AIWorker processes job if combined orchestrator/ai worker
if orch.node.AIWorker != nil {
workerResp, err := orch.node.ObjectDetection(ctx, req)
if err == nil {
return orch.node.saveLocalAIWorkerResults(ctx, *workerResp, requestID, "video/mp4")
} else {
clog.Errorf(ctx, "Error processing with local ai worker err=%q", err)
if monitor.Enabled {
monitor.AIResultSaveError(ctx, "object-detection", *req.ModelId, string(monitor.SegmentUploadErrorUnknown))
}
return nil, err
}
}

// remote ai worker proceses job
videoBytes, err := req.Video.Bytes()
if err != nil {
return nil, err
}

inputUrl, err := orch.SaveAIRequestInput(ctx, requestID, videoBytes)
if err != nil {
return nil, err
}
req.Video.InitFromBytes(nil, "")

res, err := orch.node.AIWorkerManager.Process(ctx, requestID, "object-detection", *req.ModelId, inputUrl, AIJobRequestData{Request: req, InputUrl: inputUrl})
if err != nil {
return nil, err
}

res, err = orch.node.saveRemoteAIWorkerResults(ctx, res, requestID)
if err != nil {
clog.Errorf(ctx, "Error saving remote ai result err=%q", err)
if monitor.Enabled {
monitor.AIResultSaveError(ctx, "object-detection", *req.ModelId, string(monitor.SegmentUploadErrorUnknown))
}
return nil, err
}

return res.Results, nil
}

// only used for sending work to remote AI worker
func (orch *orchestrator) SaveAIRequestInput(ctx context.Context, requestID string, fileData []byte) (string, error) {
node := orch.node
Expand Down Expand Up @@ -1062,7 +1134,11 @@
return n.AIWorker.LiveVideoToVideo(ctx, req)
}

// transcodeFrames converts a series of image URLs into a video segment for the image-to-video pipeline.
func (n *LivepeerNode) ObjectDetection(ctx context.Context, req worker.GenObjectDetectionMultipartRequestBody) (*worker.ObjectDetectionResponse, error) {
return n.AIWorker.ObjectDetection(ctx, req)
}

// transcodeFrames converts a series of image URLs into a video segment for the image-to-video and object-detection pipeline.
func (n *LivepeerNode) transcodeFrames(ctx context.Context, sessionID string, urls []string, inProfile ffmpeg.VideoProfile, outProfile ffmpeg.VideoProfile) *TranscodeResult {
ctx = clog.AddOrchSessionID(ctx, sessionID)

Expand Down
3 changes: 3 additions & 0 deletions core/capabilities.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ const (
Capability_ImageToText Capability = 34
Capability_LiveVideoToVideo Capability = 35
Capability_TextToSpeech Capability = 36
Capability_ObjectDetection Capability = 37
)

var CapabilityNameLookup = map[Capability]string{
Expand Down Expand Up @@ -124,6 +125,7 @@ var CapabilityNameLookup = map[Capability]string{
Capability_ImageToText: "Image to text",
Capability_LiveVideoToVideo: "Live video to video",
Capability_TextToSpeech: "Text to speech",
Capability_ObjectDetection: "Object detection",
}

var CapabilityTestLookup = map[Capability]CapabilityTest{
Expand Down Expand Up @@ -217,6 +219,7 @@ func OptionalCapabilities() []Capability {
Capability_SegmentAnything2,
Capability_ImageToText,
Capability_TextToSpeech,
Capability_ObjectDetection,
}
}

Expand Down
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,5 @@ require (
lukechampine.com/blake3 v1.2.1 // indirect
rsc.io/tmplfunc v0.0.3 // indirect
)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note to remove before merging.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just give me a heads up before we merge and I will get it removed in the last commit

replace github.com/livepeer/ai-worker => github.com/RUFFY-369/ai-worker v0.8.1-0.20241102154421-60e5d350c2df
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ github.com/Masterminds/semver/v3 v3.2.1 h1:RN9w6+7QoMeJVGyfmbcgs28Br8cvmnucEXnY0
github.com/Masterminds/semver/v3 v3.2.1/go.mod h1:qvl/7zhW3nngYb5+80sSMF+FG2BjYrf8m9wsX0PNOMQ=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/RUFFY-369/ai-worker v0.8.1-0.20241102154421-60e5d350c2df h1:UL+t2GVDDk20eypLFcbbqiO95OmlyzjARCkBUbvVNmc=
github.com/RUFFY-369/ai-worker v0.8.1-0.20241102154421-60e5d350c2df/go.mod h1:GjQuPmz69UO53WVtqzB9Ygok5MmKCGNuobbfMXH7zgw=
github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk=
github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0=
github.com/StackExchange/wmi v1.2.1 h1:VIkavFPXSjcnS+O8yTq7NI32k0R5Aj+v39y29VYDOSA=
Expand Down
27 changes: 27 additions & 0 deletions server/ai_http.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ func startAIServer(lp *lphttp) error {
lp.transRPC.Handle("/image-to-text", oapiReqValidator(aiHttpHandle(lp, multipartDecoder[worker.GenImageToTextMultipartRequestBody])))
lp.transRPC.Handle("/text-to-speech", oapiReqValidator(aiHttpHandle(lp, jsonDecoder[worker.GenTextToSpeechJSONRequestBody])))
lp.transRPC.Handle("/live-video-to-video", oapiReqValidator(lp.StartLiveVideoToVideo()))
lp.transRPC.Handle("/object-detection", oapiReqValidator(aiHttpHandle(&lp, multipartDecoder[worker.GenObjectDetectionMultipartRequestBody])))
// Additionally, there is the '/aiResults' endpoint registered in server/rpc.go

return nil
Expand Down Expand Up @@ -470,6 +471,20 @@ func handleAIRequest(ctx context.Context, w http.ResponseWriter, r *http.Request
// TTS pricing is typically in characters, including punctuation.
words := utf8.RuneCountInString(*v.Text)
outPixels = int64(1000 * words)
case worker.GenObjectDetectionMultipartRequestBody:
pipeline = "object-detection"
cap = core.Capability_ObjectDetection
modelID = *v.ModelId
mediaFormat, err := common.GetInputVideoInfo(v.Video)
if err != nil {
respondWithError(w, err.Error(), http.StatusBadRequest)
}

submitFn = func(ctx context.Context) (interface{}, error) {
return orch.ObjectDetection(ctx, requestID, v)
}
// Calculate the output pixels using the video profile
outPixels = int64(mediaFormat.Width) * int64(mediaFormat.Height) * int64(mediaFormat.FPS) * mediaFormat.DurSecs
default:
respondWithError(w, "Unknown request type", http.StatusBadRequest)
return
Expand Down Expand Up @@ -575,6 +590,8 @@ func handleAIRequest(ctx context.Context, w http.ResponseWriter, r *http.Request
latencyScore = CalculateImageToTextLatencyScore(took, outPixels)
case worker.GenTextToSpeechJSONRequestBody:
latencyScore = CalculateTextToSpeechLatencyScore(took, outPixels)
case worker.GenObjectDetectionMultipartRequestBody:
latencyScore = CalculateObjectDetectionLatencyScore(took, outPixels)
}

var pricePerAIUnit float64
Expand Down Expand Up @@ -786,6 +803,16 @@ func parseMultiPartResult(body io.Reader, boundary string, pipeline string) core
}
case "text-to-speech":
var parsedResp worker.AudioResponse
err := json.Unmarshal(body, &parsedResp)
if err != nil {
glog.Error("Error getting results json:", err)
wkrResult.Err = err
break
}
results = parsedResp
case "object-detection":
var parsedResp worker.ObjectDetectionResponse

err := json.Unmarshal(body, &parsedResp)
if err != nil {
glog.Error("Error getting results json:", err)
Expand Down
1 change: 1 addition & 0 deletions server/ai_mediaserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ func startAIMediaServer(ls *LivepeerServer) error {
ls.HTTPMux.Handle("/segment-anything-2", oapiReqValidator(aiMediaServerHandle(ls, multipartDecoder[worker.GenSegmentAnything2MultipartRequestBody], processSegmentAnything2)))
ls.HTTPMux.Handle("/image-to-text", oapiReqValidator(aiMediaServerHandle(ls, multipartDecoder[worker.GenImageToTextMultipartRequestBody], processImageToText)))
ls.HTTPMux.Handle("/text-to-speech", oapiReqValidator(aiMediaServerHandle(ls, jsonDecoder[worker.GenTextToSpeechJSONRequestBody], processTextToSpeech)))
ls.HTTPMux.Handle("/object-detection", oapiReqValidator(aiMediaServerHandle(ls, multipartDecoder[worker.GenObjectDetectionMultipartRequestBody], processObjectDetection)))

// This is called by the media server when the stream is ready
ls.HTTPMux.Handle("/live/video-to-video/{stream}/start", ls.StartLiveVideo())
Expand Down
Loading
Loading