Skip to content

Commit

Permalink
Vulkan: Feedback loop detection and barriers (#7226)
Browse files Browse the repository at this point in the history
* Vulkan: Feedback loop improvements

This PR allows the Vulkan backend to detect attachment feedback loops. These are currently used in the following ways:

- Partial use of VK_EXT_attachment_feedback_loop_layout
  - All renderable textures have AttachmentFeedbackLoopBitExt
  - Compile pipelines with Color/DepthStencil feedback loop flags when present
- Support using FragmentBarrier for feedback loops (fixes regressions from https://github.com/Ryujinx/Ryujinx/pull/7012 )

TODO:
- AMD GPUs may need layout transitions for it to properly allow textures to be used in feedback loops.
- Use dynamic state for feedback loops. The background pipeline will always miss since feedback loop state isn't known on the GPU project.
- How is the barrier dependency flag used? (DXVK just ignores it, there's no vulkan validation...)
- Improve subpass dependencies to fix validation errors

* Mark field readonly

* Add feedback loop dynamic state

* fix: add MoltenVK resolver workaround

fix: add MoltenVK resolver workaround

* Formatting

* Fix more complaints

* RADV dcc workaround

* Use dynamic state properly, cleanup.

* Use aspects flags in more places
  • Loading branch information
riperiperi authored Sep 2, 2024
1 parent fdd7ee7 commit ca59c3f
Show file tree
Hide file tree
Showing 18 changed files with 538 additions and 74 deletions.
24 changes: 22 additions & 2 deletions src/Ryujinx.Common/GraphicsDriver/DriverUtilities.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,33 @@
using Ryujinx.Common.Utilities;
using System;

namespace Ryujinx.Common.GraphicsDriver
{
public static class DriverUtilities
{
private static void AddMesaFlags(string envVar, string newFlags)
{
string existingFlags = Environment.GetEnvironmentVariable(envVar);

string flags = existingFlags == null ? newFlags : $"{existingFlags},{newFlags}";

OsUtils.SetEnvironmentVariableNoCaching(envVar, flags);
}

public static void InitDriverConfig(bool oglThreading)
{
if (OperatingSystem.IsLinux())
{
AddMesaFlags("RADV_DEBUG", "nodcc");
}

ToggleOGLThreading(oglThreading);
}

public static void ToggleOGLThreading(bool enabled)
{
Environment.SetEnvironmentVariable("mesa_glthread", enabled.ToString().ToLower());
Environment.SetEnvironmentVariable("__GL_THREADED_OPTIMIZATIONS", enabled ? "1" : "0");
OsUtils.SetEnvironmentVariableNoCaching("mesa_glthread", enabled.ToString().ToLower());
OsUtils.SetEnvironmentVariableNoCaching("__GL_THREADED_OPTIMIZATIONS", enabled ? "1" : "0");

try
{
Expand Down
24 changes: 24 additions & 0 deletions src/Ryujinx.Common/Utilities/OsUtils.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using System;
using System.Diagnostics;
using System.Runtime.InteropServices;

namespace Ryujinx.Common.Utilities
{
public partial class OsUtils
{
[LibraryImport("libc", SetLastError = true)]
private static partial int setenv([MarshalAs(UnmanagedType.LPStr)] string name, [MarshalAs(UnmanagedType.LPStr)] string value, int overwrite);

public static void SetEnvironmentVariableNoCaching(string key, string value)
{
// Set the value in the cached environment variables, too.
Environment.SetEnvironmentVariable(key, value);

if (!OperatingSystem.IsWindows())
{
int res = setenv(key, value, 1);
Debug.Assert(res != -1);
}
}
}
}
39 changes: 26 additions & 13 deletions src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ private enum IncoherentBarrierType
CommandBuffer
}

private bool _feedbackLoopActive;
private PipelineStageFlags _incoherentBufferWriteStages;
private PipelineStageFlags _incoherentTextureWriteStages;
private PipelineStageFlags _extraStages;
private IncoherentBarrierType _queuedIncoherentBarrier;
private bool _queuedFeedbackLoopBarrier;

public BarrierBatch(VulkanRenderer gd)
{
Expand All @@ -53,17 +55,6 @@ public static (AccessFlags Access, PipelineStageFlags Stages) GetSubpassAccessSu
stages |= PipelineStageFlags.TransformFeedbackBitExt;
}

if (!gd.IsTBDR)
{
// Desktop GPUs can transform image barriers into memory barriers.

access |= AccessFlags.DepthStencilAttachmentWriteBit | AccessFlags.ColorAttachmentWriteBit;
access |= AccessFlags.DepthStencilAttachmentReadBit | AccessFlags.ColorAttachmentReadBit;

stages |= PipelineStageFlags.EarlyFragmentTestsBit | PipelineStageFlags.LateFragmentTestsBit;
stages |= PipelineStageFlags.ColorAttachmentOutputBit;
}

return (access, stages);
}

Expand Down Expand Up @@ -178,23 +169,43 @@ public unsafe void FlushMemoryBarrier(ShaderCollection program, bool inRenderPas
}

_queuedIncoherentBarrier = IncoherentBarrierType.None;
_queuedFeedbackLoopBarrier = false;
}
else if (_feedbackLoopActive && _queuedFeedbackLoopBarrier)
{
// Feedback loop barrier.

MemoryBarrier barrier = new MemoryBarrier()
{
SType = StructureType.MemoryBarrier,
SrcAccessMask = AccessFlags.ShaderWriteBit,
DstAccessMask = AccessFlags.ShaderReadBit
};

QueueBarrier(barrier, PipelineStageFlags.FragmentShaderBit, PipelineStageFlags.AllGraphicsBit);

_queuedFeedbackLoopBarrier = false;
}

_feedbackLoopActive = false;
}
}

public unsafe void Flush(CommandBufferScoped cbs, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass)
{
Flush(cbs, null, inRenderPass, rpHolder, endRenderPass);
Flush(cbs, null, false, inRenderPass, rpHolder, endRenderPass);
}

public unsafe void Flush(CommandBufferScoped cbs, ShaderCollection program, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass)
public unsafe void Flush(CommandBufferScoped cbs, ShaderCollection program, bool feedbackLoopActive, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass)
{
if (program != null)
{
_incoherentBufferWriteStages |= program.IncoherentBufferWriteStages | _extraStages;
_incoherentTextureWriteStages |= program.IncoherentTextureWriteStages;
}

_feedbackLoopActive |= feedbackLoopActive;

FlushMemoryBarrier(program, inRenderPass);

if (!inRenderPass && rpHolder != null)
Expand Down Expand Up @@ -406,6 +417,8 @@ private void QueueIncoherentBarrier(IncoherentBarrierType type)
{
_queuedIncoherentBarrier = type;
}

_queuedFeedbackLoopBarrier = true;
}

public void QueueTextureBarrier()
Expand Down
64 changes: 45 additions & 19 deletions src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Silk.NET.Vulkan;
using System;
using System.Buffers;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using CompareOp = Ryujinx.Graphics.GAL.CompareOp;
Expand Down Expand Up @@ -42,30 +43,30 @@ public BufferRef(Auto<DisposableBuffer> buffer, ref BufferRange range)
private record struct TextureRef
{
public ShaderStage Stage;
public TextureStorage Storage;
public Auto<DisposableImageView> View;
public TextureView View;
public Auto<DisposableImageView> ImageView;
public Auto<DisposableSampler> Sampler;

public TextureRef(ShaderStage stage, TextureStorage storage, Auto<DisposableImageView> view, Auto<DisposableSampler> sampler)
public TextureRef(ShaderStage stage, TextureView view, Auto<DisposableImageView> imageView, Auto<DisposableSampler> sampler)
{
Stage = stage;
Storage = storage;
View = view;
ImageView = imageView;
Sampler = sampler;
}
}

private record struct ImageRef
{
public ShaderStage Stage;
public TextureStorage Storage;
public Auto<DisposableImageView> View;
public TextureView View;
public Auto<DisposableImageView> ImageView;

public ImageRef(ShaderStage stage, TextureStorage storage, Auto<DisposableImageView> view)
public ImageRef(ShaderStage stage, TextureView view, Auto<DisposableImageView> imageView)
{
Stage = stage;
Storage = storage;
View = view;
ImageView = imageView;
}
}

Expand Down Expand Up @@ -124,6 +125,8 @@ private enum DirtyFlags
private readonly TextureView _dummyTexture;
private readonly SamplerHolder _dummySampler;

public List<TextureView> FeedbackLoopHazards { get; private set; }

public DescriptorSetUpdater(VulkanRenderer gd, Device device)
{
_gd = gd;
Expand Down Expand Up @@ -209,10 +212,15 @@ public DescriptorSetUpdater(VulkanRenderer gd, Device device)
_templateUpdater = new();
}

public void Initialize()
public void Initialize(bool isMainPipeline)
{
MemoryOwner<byte> dummyTextureData = MemoryOwner<byte>.RentCleared(4);
_dummyTexture.SetData(dummyTextureData);

if (isMainPipeline)
{
FeedbackLoopHazards = new();
}
}

private static bool BindingOverlaps(ref DescriptorBufferInfo info, int bindingOffset, int offset, int size)
Expand Down Expand Up @@ -275,6 +283,18 @@ internal void Rebind(Auto<DisposableBuffer> buffer, int offset, int size)

public void InsertBindingBarriers(CommandBufferScoped cbs)
{
if ((FeedbackLoopHazards?.Count ?? 0) > 0)
{
// Clear existing hazards - they will be rebuilt.

foreach (TextureView hazard in FeedbackLoopHazards)
{
hazard.DecrementHazardUses();
}

FeedbackLoopHazards.Clear();
}

foreach (ResourceBindingSegment segment in _program.BindingSegments[PipelineBase.TextureSetIndex])
{
if (segment.Type == ResourceType.TextureAndSampler)
Expand All @@ -284,7 +304,7 @@ public void InsertBindingBarriers(CommandBufferScoped cbs)
for (int i = 0; i < segment.Count; i++)
{
ref var texture = ref _textureRefs[segment.Binding + i];
texture.Storage?.QueueWriteToReadBarrier(cbs, AccessFlags.ShaderReadBit, texture.Stage.ConvertToPipelineStageFlags());
texture.View?.PrepareForUsage(cbs, texture.Stage.ConvertToPipelineStageFlags(), FeedbackLoopHazards);
}
}
else
Expand All @@ -305,7 +325,7 @@ public void InsertBindingBarriers(CommandBufferScoped cbs)
for (int i = 0; i < segment.Count; i++)
{
ref var image = ref _imageRefs[segment.Binding + i];
image.Storage?.QueueWriteToReadBarrier(cbs, AccessFlags.ShaderReadBit, image.Stage.ConvertToPipelineStageFlags());
image.View?.PrepareForUsage(cbs, image.Stage.ConvertToPipelineStageFlags(), FeedbackLoopHazards);
}
}
else
Expand Down Expand Up @@ -385,9 +405,12 @@ public void SetImage(
}
else if (image is TextureView view)
{
view.Storage.QueueWriteToReadBarrier(cbs, AccessFlags.ShaderReadBit, stage.ConvertToPipelineStageFlags());
ref ImageRef iRef = ref _imageRefs[binding];

_imageRefs[binding] = new(stage, view.Storage, view.GetView(imageFormat).GetIdentityImageView());
iRef.View?.ClearUsage(FeedbackLoopHazards);
view?.PrepareForUsage(cbs, stage.ConvertToPipelineStageFlags(), FeedbackLoopHazards);

iRef = new(stage, view, view.GetView(imageFormat).GetIdentityImageView());
}
else
{
Expand Down Expand Up @@ -486,9 +509,12 @@ public void SetTextureAndSampler(
}
else if (texture is TextureView view)
{
view.Storage.QueueWriteToReadBarrier(cbs, AccessFlags.ShaderReadBit, stage.ConvertToPipelineStageFlags());
ref TextureRef iRef = ref _textureRefs[binding];

iRef.View?.ClearUsage(FeedbackLoopHazards);
view?.PrepareForUsage(cbs, stage.ConvertToPipelineStageFlags(), FeedbackLoopHazards);

_textureRefs[binding] = new(stage, view.Storage, view.GetImageView(), ((SamplerHolder)sampler)?.GetSampler());
iRef = new(stage, view, view.GetImageView(), ((SamplerHolder)sampler)?.GetSampler());
}
else
{
Expand All @@ -510,7 +536,7 @@ public void SetTextureAndSamplerIdentitySwizzle(
{
view.Storage.QueueWriteToReadBarrier(cbs, AccessFlags.ShaderReadBit, stage.ConvertToPipelineStageFlags());

_textureRefs[binding] = new(stage, view.Storage, view.GetIdentityImageView(), ((SamplerHolder)sampler)?.GetSampler());
_textureRefs[binding] = new(stage, view, view.GetIdentityImageView(), ((SamplerHolder)sampler)?.GetSampler());

SignalDirty(DirtyFlags.Texture);
}
Expand Down Expand Up @@ -836,7 +862,7 @@ private void UpdateAndBind(CommandBufferScoped cbs, ShaderCollection program, in
ref var texture = ref textures[i];
ref var refs = ref _textureRefs[binding + i];

texture.ImageView = refs.View?.Get(cbs).Value ?? default;
texture.ImageView = refs.ImageView?.Get(cbs).Value ?? default;
texture.Sampler = refs.Sampler?.Get(cbs).Value ?? default;

if (texture.ImageView.Handle == 0)
Expand Down Expand Up @@ -886,7 +912,7 @@ private void UpdateAndBind(CommandBufferScoped cbs, ShaderCollection program, in

for (int i = 0; i < count; i++)
{
images[i].ImageView = _imageRefs[binding + i].View?.Get(cbs).Value ?? default;
images[i].ImageView = _imageRefs[binding + i].ImageView?.Get(cbs).Value ?? default;
}

tu.Push<DescriptorImageInfo>(images[..count]);
Expand Down Expand Up @@ -957,7 +983,7 @@ private void UpdateAndBindTexturesWithoutTemplate(CommandBufferScoped cbs, Shade
ref var texture = ref textures[i];
ref var refs = ref _textureRefs[binding + i];

texture.ImageView = refs.View?.Get(cbs).Value ?? default;
texture.ImageView = refs.ImageView?.Get(cbs).Value ?? default;
texture.Sampler = refs.Sampler?.Get(cbs).Value ?? default;

if (texture.ImageView.Handle == 0)
Expand Down
12 changes: 12 additions & 0 deletions src/Ryujinx.Graphics.Vulkan/FeedbackLoopAspects.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System;

namespace Ryujinx.Graphics.Vulkan
{
[Flags]
internal enum FeedbackLoopAspects
{
None = 0,
Color = 1 << 0,
Depth = 1 << 1,
}
}
21 changes: 21 additions & 0 deletions src/Ryujinx.Graphics.Vulkan/FramebufferParams.cs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,27 @@ public void AddStoreOpUsage()
_depthStencil?.Storage?.AddStoreOpUsage(true);
}

public void ClearBindings()
{
_depthStencil?.Storage.ClearBindings();

for (int i = 0; i < _colorsCanonical.Length; i++)
{
_colorsCanonical[i]?.Storage.ClearBindings();
}
}

public void AddBindings()
{
_depthStencil?.Storage.AddBinding(_depthStencil);

for (int i = 0; i < _colorsCanonical.Length; i++)
{
TextureView color = _colorsCanonical[i];
color?.Storage.AddBinding(color);
}
}

public (RenderPassHolder rpHolder, Auto<DisposableFramebuffer> framebuffer) GetPassAndFramebuffer(
VulkanRenderer gd,
Device device,
Expand Down
6 changes: 6 additions & 0 deletions src/Ryujinx.Graphics.Vulkan/HardwareCapabilities.cs
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ readonly struct HardwareCapabilities
public readonly bool SupportsViewportArray2;
public readonly bool SupportsHostImportedMemory;
public readonly bool SupportsDepthClipControl;
public readonly bool SupportsAttachmentFeedbackLoop;
public readonly bool SupportsDynamicAttachmentFeedbackLoop;
public readonly uint SubgroupSize;
public readonly SampleCountFlags SupportedSampleCounts;
public readonly PortabilitySubsetFlags PortabilitySubset;
Expand Down Expand Up @@ -84,6 +86,8 @@ public HardwareCapabilities(
bool supportsViewportArray2,
bool supportsHostImportedMemory,
bool supportsDepthClipControl,
bool supportsAttachmentFeedbackLoop,
bool supportsDynamicAttachmentFeedbackLoop,
uint subgroupSize,
SampleCountFlags supportedSampleCounts,
PortabilitySubsetFlags portabilitySubset,
Expand Down Expand Up @@ -121,6 +125,8 @@ public HardwareCapabilities(
SupportsViewportArray2 = supportsViewportArray2;
SupportsHostImportedMemory = supportsHostImportedMemory;
SupportsDepthClipControl = supportsDepthClipControl;
SupportsAttachmentFeedbackLoop = supportsAttachmentFeedbackLoop;
SupportsDynamicAttachmentFeedbackLoop = supportsDynamicAttachmentFeedbackLoop;
SubgroupSize = subgroupSize;
SupportedSampleCounts = supportedSampleCounts;
PortabilitySubset = portabilitySubset;
Expand Down
Loading

0 comments on commit ca59c3f

Please sign in to comment.