diff --git a/Assets/Samples/Stream Video & Audio Chat SDK/0.7.0/Video & Audio Chat Example Project/Scripts/UI/UIManager.cs b/Assets/Samples/Stream Video & Audio Chat SDK/0.7.0/Video & Audio Chat Example Project/Scripts/UI/UIManager.cs index 843fcbf3..0e248450 100644 --- a/Assets/Samples/Stream Video & Audio Chat SDK/0.7.0/Video & Audio Chat Example Project/Scripts/UI/UIManager.cs +++ b/Assets/Samples/Stream Video & Audio Chat SDK/0.7.0/Video & Audio Chat Example Project/Scripts/UI/UIManager.cs @@ -46,8 +46,7 @@ public bool HasUserAuthorizedCameraPermission() { #if UNITY_STANDALONE return true; //StreamTodo: check if this is true for all platforms -#endif -#if UNITY_ANDROID +#elif UNITY_ANDROID return Permission.HasUserAuthorizedPermission(Permission.Camera); #else Debug.LogError($"Handling permissions not implemented for platform: " + Application.platform); @@ -81,8 +80,7 @@ public bool HasUserAuthorizedMicrophonePermission() { #if UNITY_STANDALONE return true; //StreamTodo: check if this is true for all platforms -#endif -#if UNITY_ANDROID +#elif UNITY_ANDROID return Permission.HasUserAuthorizedPermission(Permission.Microphone); #else Debug.LogError($"Handling permissions not implemented for platform: " + Application.platform); diff --git a/Packages/StreamVideo/Editor/StreamEditorTools.cs b/Packages/StreamVideo/Editor/StreamEditorTools.cs index 9a441704..ce249fd2 100644 --- a/Packages/StreamVideo/Editor/StreamEditorTools.cs +++ b/Packages/StreamVideo/Editor/StreamEditorTools.cs @@ -25,6 +25,10 @@ public static void ToggleStreamDebugModeCompilerFlag() [MenuItem(MenuPrefix + "Toggle " + StreamLocalSfuModeEnabledCompilerFlag + " compiler flag")] public static void ToggleStreamLocalSfuCompilerFlag() => ToggleCompilerFlag(StreamLocalSfuModeEnabledCompilerFlag); + + [MenuItem(MenuPrefix + "Toggle " + StreamAudioBenchmarkEnabledCompilerFlag + " compiler flag")] + public static void ToggleStreamAudioBenchmarkCompilerFlag() + => ToggleCompilerFlag(StreamAudioBenchmarkEnabledCompilerFlag); public static void BuildSampleApp() { @@ -160,5 +164,6 @@ public static void SetStreamTestsEnabledCompilerFlag(string flagKeyword, bool en private const string StreamTestsEnabledCompilerFlag = "STREAM_TESTS_ENABLED"; private const string StreamDebugModeEnabledCompilerFlag = "STREAM_DEBUG_ENABLED"; private const string StreamLocalSfuModeEnabledCompilerFlag = "STREAM_LOCAL_SFU"; + private const string StreamAudioBenchmarkEnabledCompilerFlag = "STREAM_AUDIO_BENCHMARK_ENABLED"; } } \ No newline at end of file diff --git a/Packages/StreamVideo/Runtime/Core/LowLevelClient/RtcSession.cs b/Packages/StreamVideo/Runtime/Core/LowLevelClient/RtcSession.cs index 749e413f..55bb8262 100644 --- a/Packages/StreamVideo/Runtime/Core/LowLevelClient/RtcSession.cs +++ b/Packages/StreamVideo/Runtime/Core/LowLevelClient/RtcSession.cs @@ -166,6 +166,12 @@ public RtcSession(SfuWebSocket sfuWebSocket, Func httpC var statsCollector = new UnityWebRtcStatsCollector(this, _serializer); _statsSender = new WebRtcStatsSender(this, statsCollector, _timeService, _logs); + + //StreamTodo: enable this only if a special mode e.g. compiler flag +#if STREAM_AUDIO_BENCHMARK_ENABLED + _logs.Warning($"Audio benchmark enabled. Waiting for a special video stream to measure audio-video sync. Check {nameof(VideoAudioSyncBenchmark)} summary for more details."); + _videoAudioSyncBenchmark = new VideoAudioSyncBenchmark(_timeService, _logs); +#endif } public void Dispose() @@ -184,6 +190,7 @@ public void Update() _sfuWebSocket.Update(); Publisher?.Update(); _statsSender.Update(); + _videoAudioSyncBenchmark?.Update(); //StreamTodo: we could remove this if we'd maintain a collection of tracks and update them directly if (ActiveCall != null) @@ -270,6 +277,7 @@ public async Task StartAsync(StreamCall call) //StreamTodo: validate when this state should set CallState = CallingState.Joined; + _videoAudioSyncBenchmark?.Init(call); } public async Task StopAsync() @@ -278,6 +286,7 @@ public async Task StopAsync() //StreamTodo: check with js definition of "offline" CallState = CallingState.Offline; await _sfuWebSocket.DisconnectAsync(WebSocketCloseStatus.NormalClosure, "Video session stopped"); + _videoAudioSyncBenchmark?.Finish(); } //StreamTodo: call by call.reconnectOrSwitchSfu() @@ -324,6 +333,8 @@ public void TrySetVideoTrackEnabled(bool isEnabled) private readonly IStreamClientConfig _config; private readonly Func _httpClientFactory; private readonly WebRtcStatsSender _statsSender; + private readonly VideoAudioSyncBenchmark _videoAudioSyncBenchmark; + private readonly SdpMungeUtils _sdpMungeUtils = new SdpMungeUtils(); private readonly List _pendingIceTrickleRequests = new List(); private readonly PublisherVideoSettings _publisherVideoSettings = PublisherVideoSettings.Default; @@ -340,7 +351,6 @@ private readonly Dictionary _videoResolutionByParticipa private bool _trackSubscriptionRequested; private bool _trackSubscriptionRequestInProgress; - private SdpMungeUtils _sdpMungeUtils = new SdpMungeUtils(); private AudioSource _audioInput; private WebCamTexture _videoInput; private Camera _videoSceneInput; diff --git a/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamAudioTrack.cs b/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamAudioTrack.cs index 39b72943..43e61c10 100644 --- a/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamAudioTrack.cs +++ b/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamAudioTrack.cs @@ -1,10 +1,13 @@ -using Unity.WebRTC; +using System; +using Unity.WebRTC; using UnityEngine; namespace StreamVideo.Core.StatefulModels.Tracks { public class StreamAudioTrack : BaseStreamTrack { + internal AudioSource TargetAudioSource; + public StreamAudioTrack(AudioStreamTrack track) : base(track) { @@ -13,9 +16,15 @@ public StreamAudioTrack(AudioStreamTrack track) public void SetAudioSourceTarget(AudioSource audioSource) { - audioSource.SetTrack(Track); - audioSource.loop = true; - audioSource.Play(); + if (audioSource == null) + { + throw new ArgumentNullException($"{nameof(audioSource)} cannot be null"); + } + + TargetAudioSource = audioSource; + TargetAudioSource.SetTrack(Track); + TargetAudioSource.loop = true; + TargetAudioSource.Play(); } } } \ No newline at end of file diff --git a/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamVideoTrack.cs b/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamVideoTrack.cs index 040a16b7..182ef21c 100644 --- a/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamVideoTrack.cs +++ b/Packages/StreamVideo/Runtime/Core/StatefulModels/Tracks/StreamVideoTrack.cs @@ -7,11 +7,13 @@ namespace StreamVideo.Core.StatefulModels.Tracks { public class StreamVideoTrack : BaseStreamTrack { - public StreamVideoTrack(MediaStreamTrack track) + internal RenderTexture TargetTexture => _targetTexture; + + public StreamVideoTrack(MediaStreamTrack track) : base(track) { } - + //StreamTodo: can we remove Unity dependency? public void SetRenderTarget(RawImage targetImage) { @@ -39,7 +41,7 @@ internal override void Update() _targetTexture = new RenderTexture(source.width, source.height, 0, RenderTextureFormat.Default); _targetImage.texture = _targetTexture; } - + var sizeRatio = (float)source.width / source.height; var sizeChanged = source.width != _targetTexture.width || source.height != _targetTexture.height; @@ -55,13 +57,13 @@ internal override void Update() _targetTexture.height = source.height; _targetTexture.Create(); } - + //StreamTodo: debug this size, it can get to negative values var rect = _targetImage.GetComponent(); var current = rect.sizeDelta; - rect.sizeDelta = new Vector2(current.x, current.x * (1/sizeRatio)); - - //StreamTodo: investigate if copying texture is really necessary. Perhaps we can just use the texture from the VideoStreamTrack + rect.sizeDelta = new Vector2(current.x, current.x * (1 / sizeRatio)); + + //StreamTodo: PERFORMANCE investigate if copying texture is really necessary. Perhaps we can just use the texture from the VideoStreamTrack. Test cross-platform //StreamTodo: use CopyTexture if available on this GPU Graphics.Blit(source, _targetTexture); diff --git a/Packages/StreamVideo/Runtime/Core/Utils/VideoAudioSyncBenchmark.cs b/Packages/StreamVideo/Runtime/Core/Utils/VideoAudioSyncBenchmark.cs new file mode 100644 index 00000000..42db4b1b --- /dev/null +++ b/Packages/StreamVideo/Runtime/Core/Utils/VideoAudioSyncBenchmark.cs @@ -0,0 +1,256 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using StreamVideo.Core.StatefulModels; +using StreamVideo.Core.StatefulModels.Tracks; +using StreamVideo.Libs.Logs; +using StreamVideo.Libs.Time; +using UnityEngine; + +namespace StreamVideo.Core.Utils +{ + /// + /// This tool is used to benchmark the audio and video sync. + /// + /// How it works: + /// A stream of video with a sequence of black frames with short sequences of white frames is expected. The white frames should be accompanied by a sound beep. + /// The tool will detect a switch to bright frames and expects to receive a beep sound at the same time. + /// The time between the frame switch and the sound beep is the audio and video sync delay. + /// + internal class VideoAudioSyncBenchmark + { + internal class Results + { + public IReadOnlyList AudioDelays => _audioDelays; + + public static Results GenerateResults(IEnumerable audioDelays) + { + var results = new Results(); + results._audioDelays.AddRange(audioDelays); + return results; + } + + private readonly List _audioDelays = new List(); + } + + public VideoAudioSyncBenchmark(ITimeService timeService, ILogs logs) + { + _timeService = timeService ?? throw new ArgumentNullException(nameof(timeService)); + _logs = logs ?? throw new ArgumentNullException(nameof(logs)); + } + + public void Init(IStreamCall call) + { + Log("Init benchmark"); + var remoteParticipant = call.Participants.FirstOrDefault(p => !p.IsLocalParticipant); + if (remoteParticipant != null) + { + Log("Remote participant found. Getting video and audio tracks"); + GetVideoAndAudioTracks(remoteParticipant); + return; + } + + Log("Remote participant not found. Waiting for participant to join"); + call.ParticipantJoined += CallOnParticipantJoined; + + void CallOnParticipantJoined(IStreamVideoCallParticipant participant) + { + call.ParticipantJoined -= CallOnParticipantJoined; + + Log("Participant joined. Getting video and audio tracks"); + GetVideoAndAudioTracks(participant); + } + } + + public void Update() + { + if (!IsEnabled()) + { + return; + } + + EvaluateVideoFrame(_videoTrack.TargetTexture); + EvaluateAudioFrame(_audioTrack.TargetAudioSource); + } + + public Results Finish() + { + if (!IsEnabled()) + { + return default; + } + + Log("Benchmark finished. Generating results."); + var results = Results.GenerateResults(EnumerateResults()); + Clear(); + return results; + + IEnumerable EnumerateResults() + { + var frameAndSoundPairs = Math.Min(_brightFramesReceivedAt.Count, _beepSoundReceivedAt.Count); + + if (_brightFramesReceivedAt.Count != _beepSoundReceivedAt.Count) + { + Log("Warning. Bright frames and beep sounds count mismatch: " + _brightFramesReceivedAt.Count + + " vs " + _beepSoundReceivedAt.Count); + } + + var totalDiff = 0f; + for (int i = 0; i < frameAndSoundPairs; i++) + { + var brightFrameTime = _brightFramesReceivedAt[i]; + var beepSoundTime = _beepSoundReceivedAt[i]; + + var timeDiff = beepSoundTime - brightFrameTime; + Log("Audio and video sync delay: " + timeDiff); + totalDiff += timeDiff; + yield return timeDiff; + } + + var averageDiff = totalDiff / frameAndSoundPairs; + Log("All delays parsed. Average audio and video sync delay: " + averageDiff); + } + } + + // Determined via testing + private const float BeepAudioVolumeThreshold = 0.001f; + + private const string LogsPrefix = "[VideoAudioSyncBenchmark] "; + + private readonly ITimeService _timeService; + private readonly ILogs _logs; + + private readonly List _brightFramesReceivedAt = new List(); + private readonly List _beepSoundReceivedAt = new List(); + private readonly float[] _audioBuffer = new float[2048 * 2 * 2]; + + private StreamVideoTrack _videoTrack; + private StreamAudioTrack _audioTrack; + + private Texture2D _textureBuffer; + private bool _prevIsBrightFrame; + private bool _prevIsBeepSound; + + private bool IsEnabled() + => _videoTrack != null && _videoTrack.TargetTexture != null && _audioTrack != null && + _audioTrack.TargetAudioSource != null; + + private void GetVideoAndAudioTracks(IStreamVideoCallParticipant participant) + { + if (participant.VideoTrack != null && participant.AudioTrack != null) + { + Log("Video and audio tracks already present. Starting benchmark"); + Start(participant.VideoTrack as StreamVideoTrack, participant.AudioTrack as StreamAudioTrack); + return; + } + + Log("Waiting for video and audio tracks to be added"); + participant.TrackAdded += ParticipantOnTrackAdded; + + void ParticipantOnTrackAdded(IStreamVideoCallParticipant streamVideoCallParticipant, IStreamTrack track) + { + if (participant.VideoTrack == null || participant.AudioTrack == null) + { + return; + } + + participant.TrackAdded -= ParticipantOnTrackAdded; + Log("Video and audio tracks received. Starting benchmark"); + Start(participant.VideoTrack as StreamVideoTrack, participant.AudioTrack as StreamAudioTrack); + } + } + + // StreamTodo: we could start after receiving specific sequence of frames like r -> g > r > b -> start + private void Start(StreamVideoTrack videoTrack, StreamAudioTrack audioTrack) + { + _videoTrack = videoTrack ?? throw new ArgumentNullException(nameof(videoTrack)); + _audioTrack = audioTrack ?? throw new ArgumentNullException(nameof(audioTrack)); + } + + private void EvaluateVideoFrame(RenderTexture texture) + { + var buffer = GetTextureBuffer(texture); + RenderTexture.active = texture; + buffer.ReadPixels(new Rect(0, 0, texture.width, texture.height), 0, 0); + buffer.Apply(); + + var isBrightFrame = IsBrightFrame(buffer); + + if (!_prevIsBrightFrame && isBrightFrame) + { + Log("Bright frame detected at: " + + _timeService.Time); //StreamTodo: count how many bright frames received in sequence + _brightFramesReceivedAt.Add(_timeService.Time); + } + + _prevIsBrightFrame = isBrightFrame; + } + + private void EvaluateAudioFrame(AudioSource audioSource) + { + audioSource.GetOutputData(_audioBuffer, 0); + + const float sampleRate = 44100; + var samplesPerFrame = sampleRate * _timeService.DeltaTime; + var maxFrames = Math.Min(samplesPerFrame, _audioBuffer.Length); + + float maxVolume = 0; + for (var i = 0; i < maxFrames; i++) + { + var volume = Mathf.Abs(_audioBuffer[i]); + if (volume > maxVolume) + { + maxVolume = Mathf.Abs(volume); + } + } + + var isBeep = maxVolume > BeepAudioVolumeThreshold; + + if (isBeep && !_prevIsBeepSound) + { + Log($"Beep sound detected at: {_timeService.Time} and max volume: {maxVolume}"); + _beepSoundReceivedAt.Add(_timeService.Time); + } + + _prevIsBeepSound = isBeep; + } + + private Texture2D GetTextureBuffer(RenderTexture texture) + { + if (_textureBuffer == null || _textureBuffer.width != texture.width || + _textureBuffer.height != texture.height) + { + _textureBuffer = new Texture2D(texture.width, texture.height, TextureFormat.RGB24, mipChain: false); + } + + return _textureBuffer; + } + + private static bool IsBrightFrame(Texture2D texture) + { + float totalBrightness = 0; + for (var x = 0; x < texture.width; x++) + { + for (var y = 0; y < texture.height; y++) + { + var pixel = texture.GetPixel(x, y); + totalBrightness += (pixel.r + pixel.g + pixel.b) / 3f; + } + } + + var averageBrightness = totalBrightness / (texture.width * texture.height); + return averageBrightness > 0.5f; + } + + private void Log(string message) => _logs.Warning(LogsPrefix + message); + + private void Clear() + { + _brightFramesReceivedAt.Clear(); + _beepSoundReceivedAt.Clear(); + + _videoTrack = null; + _audioTrack = null; + } + } +} \ No newline at end of file diff --git a/Packages/StreamVideo/Runtime/Core/Utils/VideoAudioSyncBenchmark.cs.meta b/Packages/StreamVideo/Runtime/Core/Utils/VideoAudioSyncBenchmark.cs.meta new file mode 100644 index 00000000..e76a982e --- /dev/null +++ b/Packages/StreamVideo/Runtime/Core/Utils/VideoAudioSyncBenchmark.cs.meta @@ -0,0 +1,3 @@ +fileFormatVersion: 2 +guid: 5a4f54e0883f4c01bbe71d7400e64cb4 +timeCreated: 1729241854 \ No newline at end of file diff --git a/Packages/StreamVideo/Samples~/VideoChat/Scripts/UI/UIManager.cs b/Packages/StreamVideo/Samples~/VideoChat/Scripts/UI/UIManager.cs index 843fcbf3..0e248450 100644 --- a/Packages/StreamVideo/Samples~/VideoChat/Scripts/UI/UIManager.cs +++ b/Packages/StreamVideo/Samples~/VideoChat/Scripts/UI/UIManager.cs @@ -46,8 +46,7 @@ public bool HasUserAuthorizedCameraPermission() { #if UNITY_STANDALONE return true; //StreamTodo: check if this is true for all platforms -#endif -#if UNITY_ANDROID +#elif UNITY_ANDROID return Permission.HasUserAuthorizedPermission(Permission.Camera); #else Debug.LogError($"Handling permissions not implemented for platform: " + Application.platform); @@ -81,8 +80,7 @@ public bool HasUserAuthorizedMicrophonePermission() { #if UNITY_STANDALONE return true; //StreamTodo: check if this is true for all platforms -#endif -#if UNITY_ANDROID +#elif UNITY_ANDROID return Permission.HasUserAuthorizedPermission(Permission.Microphone); #else Debug.LogError($"Handling permissions not implemented for platform: " + Application.platform);