Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OpenAI integration ✨ #142

Merged
merged 14 commits into from
Dec 18, 2024
66 changes: 66 additions & 0 deletions app/components/AiButton.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import { useRoomContext } from '~/hooks/useRoomContext'
import type { ClientMessage, User } from '~/types/Messages'
import { AiPushToTalkButtion } from './AiPushToTalkButton'
import { Button } from './Button'
import { Trigger } from './Dialog'
import { InviteAiDialog } from './InviteAiDialog'
import { RecordAiVoiceActivity } from './RecordAiVoiceActivity'

function RemoveAiButton() {
const {
room: { websocket },
} = useRoomContext()
return (
<Button
onClick={() =>
websocket.send(
JSON.stringify({ type: 'disableAi' } satisfies ClientMessage)
)
}
className="text-xs"
displayType="secondary"
>
Remove AI
</Button>
)
}

export function AiButton(props: { recordActivity: (user: User) => void }) {
const {
room: {
roomState: {
ai: { connectionPending, error },
users,
},
},
} = useRoomContext()

const aiUser = users.find((u) => u.id === 'ai')

return (
<>
{error && <span className="text-red-800 dark:text-red-500">{error}</span>}
{aiUser ? (
<>
<RemoveAiButton />
<AiPushToTalkButtion />
<RecordAiVoiceActivity
user={aiUser}
recordActivity={props.recordActivity}
/>
</>
) : (
<InviteAiDialog>
<Trigger asChild>
<Button
className="text-xs flex items-center gap-2"
disabled={connectionPending}
>
<span>Invite AI</span>
</Button>
</Trigger>
</InviteAiDialog>
)}
</>
)
}
137 changes: 137 additions & 0 deletions app/components/AiPushToTalkButton.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import { useEffect, useMemo, useRef, useState } from 'react'
import { switchMap } from 'rxjs'
import { useStateObservable, useSubscribedState } from '~/hooks/rxjsHooks'
import { useRoomContext } from '~/hooks/useRoomContext'
import type { ClientMessage } from '~/types/Messages'
import { playSound } from '~/utils/playSound'
import { inaudibleAudioTrack$ } from '~/utils/rxjs/inaudibleAudioTrack$'
import { Button } from './Button'

function useButtonIsHeldDown({
key,
disabled,
}: {
key: string
disabled: boolean
}) {
const [keyIsHeldDown, setKeyIsHeldDown] = useState(false)
const buttonRef = useRef<HTMLButtonElement>(null)

useEffect(() => {
const button = buttonRef.current
let timeout = -1
const setTrue = () => {
if (!disabled) {
setKeyIsHeldDown(true)
clearTimeout(timeout)
}
}
const setFalse = () => {
timeout = window.setTimeout(() => {
setKeyIsHeldDown(false)
}, 200)
}

const onKeyDown = (e: KeyboardEvent) => {
if (e.key.toLowerCase() === key.toLowerCase()) {
setTrue()
}
}

const onKeyUp = (e: KeyboardEvent) => {
if (e.key.toLowerCase() === key.toLowerCase()) {
setFalse()
}
}

document.addEventListener('keydown', onKeyDown)
document.addEventListener('keyup', onKeyUp)
document.addEventListener('blur', setFalse)
button?.addEventListener('pointerdown', setTrue)
button?.addEventListener('pointerup', setFalse)

return () => {
clearTimeout(timeout)
document.removeEventListener('keydown', onKeyDown)
document.removeEventListener('keyup', onKeyUp)
document.removeEventListener('blur', setFalse)
button?.removeEventListener('pointerdown', setTrue)
button?.removeEventListener('pointerup', setFalse)
}
}, [disabled, key])

return [keyIsHeldDown, buttonRef] as const
}

export function AiPushToTalkButtion() {
const {
peer,
room: {
websocket,
roomState: {
ai: { controllingUser },
},
},
userMedia: { turnMicOn, publicAudioTrack$ },
} = useRoomContext()
const hasControl = controllingUser === websocket.id
const disabled = !hasControl && controllingUser !== undefined
const [holdingTalkButton, talkButtonRef] = useButtonIsHeldDown({
key: 'a',
disabled,
})

const holdingTalkButton$ = useStateObservable(holdingTalkButton)
const audioTrack$ = useMemo(
() =>
holdingTalkButton$.pipe(
switchMap((talking) =>
talking ? publicAudioTrack$ : inaudibleAudioTrack$
)
),
[holdingTalkButton$, publicAudioTrack$]
)

const pushedAiAudioTrack$ = useMemo(
() => peer.pushTrack(audioTrack$),
[audioTrack$, peer]
)

const pushedAiAudioTrack = useSubscribedState(pushedAiAudioTrack$)

useEffect(() => {
if (holdingTalkButton && pushedAiAudioTrack) {
turnMicOn()
console.log('🤖 Requesting ai control')
websocket.send(
JSON.stringify({
type: 'requestAiControl',
track: pushedAiAudioTrack,
} satisfies ClientMessage)
)
} else {
console.log('🤖 Relinquishing ai control!')
websocket.send(
JSON.stringify({
type: 'relenquishAiControl',
} satisfies ClientMessage)
)
}
}, [holdingTalkButton, pushedAiAudioTrack, turnMicOn, websocket])

useEffect(() => {
if (controllingUser !== undefined) {
playSound('aiReady')
}
}, [controllingUser])

return (
<Button
className="text-xs select-none"
disabled={disabled}
ref={talkButtonRef}
>
{hasControl ? 'Speaking to Ai...' : 'Hold to talk to AI'}
</Button>
)
}
12 changes: 6 additions & 6 deletions app/components/Button.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,23 @@ import { cn } from '~/utils/style'
const displayTypeMap = {
primary: [
'text-white',
'bg-orange-500 hover:bg-orange-600',
'border-orange-500 hover:border-orange-600',
'bg-orange-500 hover:bg-orange-600 active:bg-orange-700 active:bg-orange-800',
'border-orange-500 hover:border-orange-600 active:border-orange-700 active:border-orange-800',
],
secondary: [
'text-zinc-900 dark:text-zinc-100',
'bg-zinc-200 hover:bg-zinc-300 dark:bg-zinc-700 dark:hover:bg-zinc-600',
'bg-zinc-200 hover:bg-zinc-300 dark:bg-zinc-700 dark:hover:bg-zinc-600 active:bg-zinc-400 dark:active:bg-zinc-700',
'border-zinc-200 hover:border-zinc-300 dark:border-zinc-700 dark:hover:border-zinc-600',
],
ghost: [
'text-white hover:text-zinc-900',
'text-white dark:text-zinc-800 hover:text-zinc-900',
'bg-transparent hover:bg-white',
'border-transparent hover:border-white',
],
danger: [
'text-white',
'bg-red-600 hover:bg-red-700',
'border-red-600 hover:border-red-700',
'bg-red-600 hover:bg-red-700 active:bg-red-800',
'border-red-600 hover:border-red-700 active:border-red-800',
],
}

Expand Down
87 changes: 87 additions & 0 deletions app/components/InviteAiDialog.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { useSearchParams } from '@remix-run/react'
import { useState, type ReactNode } from 'react'
import { useRoomContext } from '~/hooks/useRoomContext'
import type { ClientMessage } from '~/types/Messages'
import { Button } from './Button'
import { Dialog, DialogContent, DialogOverlay, Portal } from './Dialog'

export function InviteAiDialog(props: { children?: ReactNode }) {
const [open, setOpen] = useState(false)

const {
room: { websocket },
} = useRoomContext()

const [params] = useSearchParams()

const instructions = params.get('instructions')
const voice = params.get('voice')

return (
<Dialog open={open} onOpenChange={setOpen}>
{props.children}
<Portal>
<DialogOverlay />
<DialogContent>
<form
className="flex flex-col gap-4 mt-8"
onSubmit={(e) => {
e.preventDefault()
websocket.send(
JSON.stringify({
type: 'enableAi',
...Object.fromEntries(new FormData(e.currentTarget)),
} satisfies ClientMessage)
)
setOpen(false)
}}
>
<div className="flex flex-col gap-2">
<div>
<label className="font-medium" htmlFor="instructions">
Instructions
</label>
</div>

<div>
<textarea
className="bg-gray-100 dark:bg-zinc-800 w-full"
id="instructions"
name="instructions"
rows={15}
defaultValue={
instructions ??
`You are a helpful and concise AI assistant for a video chat application called Orange Meets.`
}
/>
</div>
</div>
<div className="flex flex-col gap-2">
<div>
<label className="font-medium" htmlFor="voice">
Voice
</label>
</div>

<div>
<select
className="bg-gray-100 dark:bg-zinc-800 w-full"
id="voice"
name="voice"
defaultValue={voice ?? 'ash'}
>
<option value="ash">Ash</option>
<option value="ballad">Ballad</option>
</select>
</div>
</div>

<Button type="submit" className="self-end text-xs">
Invite AI
</Button>
</form>
</DialogContent>
</Portal>
</Dialog>
)
}
15 changes: 12 additions & 3 deletions app/components/Participant.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { Flipped } from 'react-flip-toolkit'
import { combineLatest, fromEvent, map, of, switchMap } from 'rxjs'
import { useSubscribedState } from '~/hooks/rxjsHooks'
import { useDeadPulledTrackMonitor } from '~/hooks/useDeadPulledTrackMonitor'
import useIsSpeaking from '~/hooks/useIsSpeaking'
import { useRoomContext } from '~/hooks/useRoomContext'
import { useUserMetadata } from '~/hooks/useUserMetadata'
import type { User } from '~/types/Messages'
Expand All @@ -23,6 +24,7 @@ import { HoverFade } from './HoverFade'
import { Icon } from './Icon/Icon'
import { MuteUserButton } from './MuteUserButton'
import { OptionalLink } from './OptionalLink'
import { usePulledAudioTrack } from './PullAudioTracks'
import { Tooltip } from './Tooltip'
import { VideoSrcObject } from './VideoSrcObject'

Expand Down Expand Up @@ -78,6 +80,13 @@ export const Participant = forwardRef<
const { data } = useUserMetadata(user.name)
const { traceLink, peer, dataSaverMode } = useRoomContext()
const peerConnection = useSubscribedState(peer.peerConnection$)
const isAi = user.id === 'ai'
const aiAudioTrack = usePulledAudioTrack(
isAi ? user.tracks.audio : undefined
)
const isSpeaking =
useIsSpeaking(user.id === 'ai' ? aiAudioTrack : undefined) ||
user.speaking

useDeadPulledTrackMonitor(
user.tracks.video,
Expand Down Expand Up @@ -153,7 +162,7 @@ export const Participant = forwardRef<
</div>
) : (
<span className="relative grid w-full h-full uppercase rounded-full place-items-center bg-zinc-500">
{user.speaking && (
{isSpeaking && (
<AudioGlow
type="text"
className="absolute uppercase"
Expand Down Expand Up @@ -204,7 +213,7 @@ export const Participant = forwardRef<
<div className="absolute left-4 top-4">
{user.tracks.audioEnabled &&
user.tracks.videoEnabled &&
user.speaking && <AudioIndicator audioTrack={audioTrack} />}
isSpeaking && <AudioIndicator audioTrack={audioTrack} />}

{!user.tracks.audioEnabled && !user.tracks.audioUnavailable && (
<Tooltip content="Mic is turned off">
Expand Down Expand Up @@ -266,7 +275,7 @@ export const Participant = forwardRef<
</Tooltip>
)}
</div>
{(user.speaking || user.raisedHand) && (
{(isSpeaking || user.raisedHand) && (
<div
className={cn(
'pointer-events-none absolute inset-0 h-full w-full border-4 border-orange-400',
Expand Down
Loading
Loading