From fd3d3c4158e517b33afbb2f5890d606f042c0b45 Mon Sep 17 00:00:00 2001 From: squidbus <175574877+squidbus@users.noreply.github.com> Date: Mon, 17 Feb 2025 06:13:39 -0800 Subject: [PATCH] shader_recompiler: Implement AMD buffer bounds checking behavior. (#2448) * shader_recompiler: Implement AMD buffer bounds checking behavior. * shader_recompiler: Use SRT flatbuf for bounds check size. * shader_recompiler: Fix buffer atomic bounds check. * buffer_cache: Prevent false image-to-buffer sync. Lowering vertex fetch to formatted buffer surfaced an issue where a CPU modified range may be overwritten with stale GPU modified image data. * Address review comments. --- .../backend/spirv/emit_spirv_atomic.cpp | 26 ++- .../spirv/emit_spirv_context_get_set.cpp | 185 +++++++++++++----- .../backend/spirv/emit_spirv_special.cpp | 2 +- .../backend/spirv/spirv_emit_context.cpp | 101 +++++++--- .../backend/spirv/spirv_emit_context.h | 7 +- .../frontend/fetch_shader.cpp | 9 +- src/shader_recompiler/frontend/fetch_shader.h | 2 + .../frontend/translate/translate.cpp | 90 ++++++--- .../frontend/translate/translate.h | 2 + .../frontend/translate/vector_memory.cpp | 4 + src/shader_recompiler/info.h | 30 +-- src/shader_recompiler/ir/reg.h | 1 + src/shader_recompiler/profile.h | 1 + src/video_core/buffer_cache/buffer_cache.cpp | 6 +- .../renderer_vulkan/vk_instance.cpp | 37 ++-- src/video_core/renderer_vulkan/vk_instance.h | 18 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 1 + .../renderer_vulkan/vk_rasterizer.cpp | 1 - .../renderer_vulkan/vk_rasterizer.h | 11 +- 19 files changed, 376 insertions(+), 158 deletions(-) diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 92cfcbb0f..4faa99fe8 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -21,6 +21,28 @@ Id SharedAtomicU32(EmitContext& ctx, Id offset, Id value, return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value); } +Id BufferAtomicU32BoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) { + if (Sirit::ValidId(buffer_size)) { + // Bounds checking enabled, wrap in a conditional branch to make sure that + // the atomic is not mistakenly executed when the index is out of bounds. + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer_size); + const Id ib_label = ctx.OpLabel(); + const Id oob_label = ctx.OpLabel(); + const Id end_label = ctx.OpLabel(); + ctx.OpBranchConditional(in_bounds, ib_label, oob_label); + ctx.AddLabel(ib_label); + const Id ib_result = emit_func(); + ctx.OpBranch(end_label); + ctx.AddLabel(oob_label); + const Id oob_result = ctx.u32_zero_value; + ctx.OpBranch(end_label); + ctx.AddLabel(end_label); + return ctx.OpPhi(ctx.U32[1], ib_result, ib_label, oob_result, oob_label); + } + // Bounds checking not enabled, just perform the atomic operation. + return emit_func(); +} + Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; @@ -31,7 +53,9 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id const auto [id, pointer_type] = buffer[EmitContext::BufferAlias::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); const auto [scope, semantics]{AtomicArgs(ctx)}; - return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); + return BufferAtomicU32BoundsCheck(ctx, index, buffer.size_dwords, [&] { + return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, value); + }); } Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id value, diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index cc7b7e097..e4071bb95 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -178,14 +178,21 @@ Id EmitReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); const auto [id, pointer_type] = buffer[BufferAlias::U32]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - return ctx.OpLoad(ctx.U32[1], ptr); + const Id result{ctx.OpLoad(ctx.U32[1], ptr)}; + + if (Sirit::ValidId(buffer.size_dwords)) { + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords); + return ctx.OpSelect(ctx.U32[1], in_bounds, result, ctx.u32_zero_value); + } else { + return result; + } } Id EmitReadStepRate(EmitContext& ctx, int rate_idx) { + const auto index{rate_idx == 0 ? PushData::Step0Index : PushData::Step1Index}; return ctx.OpLoad( ctx.U32[1], ctx.OpAccessChain(ctx.TypePointer(spv::StorageClass::PushConstant, ctx.U32[1]), - ctx.push_data_block, - rate_idx == 0 ? ctx.u32_zero_value : ctx.u32_one_value)); + ctx.push_data_block, ctx.ConstU32(index))); } static Id EmitGetAttributeForGeometry(EmitContext& ctx, IR::Attribute attr, u32 comp, Id index) { @@ -402,8 +409,30 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { ctx.OpStore(pointer, value); } +template +static Id EmitLoadBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, Id result, + bool is_float) { + if (Sirit::ValidId(buffer_size)) { + // Bounds checking enabled, wrap in a select. + const auto result_type = is_float ? ctx.F32[N] : ctx.U32[N]; + auto compare_index = index; + auto zero_value = is_float ? ctx.f32_zero_value : ctx.u32_zero_value; + if (N > 1) { + compare_index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); + std::array zero_ids; + zero_ids.fill(zero_value); + zero_value = ctx.ConstantComposite(result_type, zero_ids); + } + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); + return ctx.OpSelect(result_type, in_bounds, result, zero_value); + } + // Bounds checking not enabled, just return the plain value. + return result; +} + template -static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) { +static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + const auto flags = inst->Flags(); const auto& spv_buffer = ctx.buffers[handle]; if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); @@ -411,31 +440,42 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, u32 handle, Id address) { const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; const auto [id, pointer_type] = spv_buffer[alias]; - if constexpr (N == 1) { - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - return ctx.OpLoad(data_types[1], ptr); - } else { - boost::container::static_vector ids; - for (u32 i = 0; i < N; i++) { - const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i)}; - ids.push_back(ctx.OpLoad(data_types[1], ptr)); + + boost::container::static_vector ids; + for (u32 i = 0; i < N; i++) { + const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); + const Id result_i = ctx.OpLoad(data_types[1], ptr_i); + if (!flags.typed) { + // Untyped loads have bounds checking per-component. + ids.push_back(EmitLoadBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, + result_i, alias == BufferAlias::F32)); + } else { + ids.push_back(result_i); } - return ctx.OpCompositeConstruct(data_types[N], ids); } + + const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids); + if (flags.typed) { + // Typed loads have single bounds check for the whole load. + return EmitLoadBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, result, + alias == BufferAlias::F32); + } + return result; } -Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { +Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); } const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr)); + const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, ptr))}; + return EmitLoadBufferBoundsCheck<1>(ctx, address, spv_buffer.size, result, false); } -Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { +Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); @@ -443,47 +483,73 @@ Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr)); + const Id result{ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, ptr))}; + return EmitLoadBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, result, false); } -Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, handle, address); +Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address); } -Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, handle, address); +Id EmitLoadBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address); } -Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, handle, address); +Id EmitLoadBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address); } -Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, handle, address); +Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + return EmitLoadBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address); } Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, handle, address); + return EmitLoadBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address); } Id EmitLoadBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, handle, address); + return EmitLoadBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address); } Id EmitLoadBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, handle, address); + return EmitLoadBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address); } Id EmitLoadBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { - return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, handle, address); + return EmitLoadBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address); } Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { UNREACHABLE_MSG("SPIR-V instruction"); } +template +void EmitStoreBufferBoundsCheck(EmitContext& ctx, Id index, Id buffer_size, auto emit_func) { + if (Sirit::ValidId(buffer_size)) { + // Bounds checking enabled, wrap in a conditional branch. + auto compare_index = index; + if (N > 1) { + index = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(N - 1)); + } + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], compare_index, buffer_size); + const Id in_bounds_label = ctx.OpLabel(); + const Id merge_label = ctx.OpLabel(); + ctx.OpSelectionMerge(merge_label, spv::SelectionControlMask::MaskNone); + ctx.OpBranchConditional(in_bounds, in_bounds_label, merge_label); + ctx.AddLabel(in_bounds_label); + emit_func(); + ctx.OpBranch(merge_label); + ctx.AddLabel(merge_label); + return; + } + // Bounds checking not enabled, just perform the store. + emit_func(); +} + template -static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id value) { +static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, + Id value) { + const auto flags = inst->Flags(); const auto& spv_buffer = ctx.buffers[handle]; if (Sirit::ValidId(spv_buffer.offset)) { address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); @@ -491,15 +557,27 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, u32 handle, Id address, Id va const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto& data_types = alias == BufferAlias::U32 ? ctx.U32 : ctx.F32; const auto [id, pointer_type] = spv_buffer[alias]; - if constexpr (N == 1) { - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - ctx.OpStore(ptr, value); - } else { + + auto store = [&] { for (u32 i = 0; i < N; i++) { - const Id index_i = ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); - ctx.OpStore(ptr, ctx.OpCompositeExtract(data_types[1], value, i)); + const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); + const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i); + auto store_i = [&]() { ctx.OpStore(ptr_i, value_i); }; + if (!flags.typed) { + // Untyped stores have bounds checking per-component. + EmitStoreBufferBoundsCheck<1>(ctx, index_i, spv_buffer.size_dwords, store_i); + } else { + store_i(); + } } + }; + + if (flags.typed) { + // Typed stores have single bounds check for the whole store. + EmitStoreBufferBoundsCheck(ctx, index, spv_buffer.size_dwords, store); + } else { + store(); } } @@ -510,7 +588,8 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v } const auto [id, pointer_type] = spv_buffer[BufferAlias::U8]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - ctx.OpStore(ptr, ctx.OpUConvert(ctx.U8, value)); + const Id result{ctx.OpUConvert(ctx.U8, value)}; + EmitStoreBufferBoundsCheck<1>(ctx, address, spv_buffer.size, [&] { ctx.OpStore(ptr, result); }); } void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { @@ -521,39 +600,41 @@ void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id const auto [id, pointer_type] = spv_buffer[BufferAlias::U16]; const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - ctx.OpStore(ptr, ctx.OpUConvert(ctx.U16, value)); + const Id result{ctx.OpUConvert(ctx.U16, value)}; + EmitStoreBufferBoundsCheck<1>(ctx, index, spv_buffer.size_shorts, + [&] { ctx.OpStore(ptr, result); }); } -void EmitStoreBufferU32(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, handle, address, value); +void EmitStoreBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<1, BufferAlias::U32>(ctx, inst, handle, address, value); } -void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, handle, address, value); +void EmitStoreBufferU32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<2, BufferAlias::U32>(ctx, inst, handle, address, value); } -void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, handle, address, value); +void EmitStoreBufferU32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<3, BufferAlias::U32>(ctx, inst, handle, address, value); } -void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, handle, address, value); +void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + EmitStoreBufferB32xN<4, BufferAlias::U32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, handle, address, value); + EmitStoreBufferB32xN<1, BufferAlias::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32x2(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, handle, address, value); + EmitStoreBufferB32xN<2, BufferAlias::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32x3(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, handle, address, value); + EmitStoreBufferB32xN<3, BufferAlias::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferF32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { - EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, handle, address, value); + EmitStoreBufferB32xN<4, BufferAlias::F32>(ctx, inst, handle, address, value); } void EmitStoreBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp index 724550cd6..fe7bd3356 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_special.cpp @@ -14,7 +14,7 @@ void EmitPrologue(EmitContext& ctx) { if (ctx.info.loads.Get(IR::Attribute::WorkgroupIndex)) { ctx.DefineWorkgroupIndex(); } - ctx.DefineBufferOffsets(); + ctx.DefineBufferProperties(); } void ConvertDepthMode(EmitContext& ctx) { diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index da20dc691..7c25d1477 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -192,8 +192,27 @@ EmitContext::SpirvAttribute EmitContext::GetAttributeInfo(AmdGpu::NumberFormat f UNREACHABLE_MSG("Invalid attribute type {}", fmt); } -void EmitContext::DefineBufferOffsets() { - for (BufferDefinition& buffer : buffers) { +Id EmitContext::GetBufferSize(const u32 sharp_idx) { + const auto& srt_flatbuf = buffers.back(); + ASSERT(srt_flatbuf.buffer_type == BufferType::ReadConstUbo); + const auto [id, pointer_type] = srt_flatbuf[BufferAlias::U32]; + + const auto rsrc1{ + OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))}; + const auto rsrc2{ + OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 2)))}; + + const auto stride{OpBitFieldUExtract(U32[1], rsrc1, ConstU32(16u), ConstU32(14u))}; + const auto num_records{rsrc2}; + + const auto stride_zero{OpIEqual(U1[1], stride, u32_zero_value)}; + const auto stride_size{OpIMul(U32[1], num_records, stride)}; + return OpSelect(U32[1], stride_zero, num_records, stride_size); +} + +void EmitContext::DefineBufferProperties() { + for (u32 i = 0; i < buffers.size(); i++) { + BufferDefinition& buffer = buffers[i]; if (buffer.buffer_type != BufferType::Guest) { continue; } @@ -208,6 +227,22 @@ void EmitContext::DefineBufferOffsets() { Name(buffer.offset, fmt::format("buf{}_off", binding)); buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); + + // Only need to load size if performing bounds checks and the buffer is both guest and not + // inline. + if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) { + const BufferResource& desc = info.buffers[i]; + if (desc.sharp_idx == std::numeric_limits::max()) { + buffer.size = ConstU32(desc.inline_cbuf.GetSize()); + } else { + buffer.size = GetBufferSize(desc.sharp_idx); + } + Name(buffer.size, fmt::format("buf{}_size", binding)); + buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U)); + Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding)); + buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U)); + Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding)); + } } } @@ -589,34 +624,34 @@ void EmitContext::DefineOutputs() { void EmitContext::DefinePushDataBlock() { // Create push constants block for instance steps rates - const Id struct_type{Name(TypeStruct(U32[1], U32[1], U32[4], U32[4], U32[4], U32[4], U32[4], - U32[4], F32[1], F32[1], F32[1], F32[1]), + const Id struct_type{Name(TypeStruct(U32[1], U32[1], F32[1], F32[1], F32[1], F32[1], U32[4], + U32[4], U32[4], U32[4], U32[4], U32[4]), "AuxData")}; Decorate(struct_type, spv::Decoration::Block); - MemberName(struct_type, 0, "sr0"); - MemberName(struct_type, 1, "sr1"); - MemberName(struct_type, Shader::PushData::BufOffsetIndex + 0, "buf_offsets0"); - MemberName(struct_type, Shader::PushData::BufOffsetIndex + 1, "buf_offsets1"); - MemberName(struct_type, Shader::PushData::UdRegsIndex + 0, "ud_regs0"); - MemberName(struct_type, Shader::PushData::UdRegsIndex + 1, "ud_regs1"); - MemberName(struct_type, Shader::PushData::UdRegsIndex + 2, "ud_regs2"); - MemberName(struct_type, Shader::PushData::UdRegsIndex + 3, "ud_regs3"); - MemberName(struct_type, Shader::PushData::XOffsetIndex, "xoffset"); - MemberName(struct_type, Shader::PushData::YOffsetIndex, "yoffset"); - MemberName(struct_type, Shader::PushData::XScaleIndex, "xscale"); - MemberName(struct_type, Shader::PushData::YScaleIndex, "yscale"); - MemberDecorate(struct_type, 0, spv::Decoration::Offset, 0U); - MemberDecorate(struct_type, 1, spv::Decoration::Offset, 4U); - MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 8U); - MemberDecorate(struct_type, Shader::PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 24U); - MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 0, spv::Decoration::Offset, 40U); - MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 1, spv::Decoration::Offset, 56U); - MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 2, spv::Decoration::Offset, 72U); - MemberDecorate(struct_type, Shader::PushData::UdRegsIndex + 3, spv::Decoration::Offset, 88U); - MemberDecorate(struct_type, Shader::PushData::XOffsetIndex, spv::Decoration::Offset, 104U); - MemberDecorate(struct_type, Shader::PushData::YOffsetIndex, spv::Decoration::Offset, 108U); - MemberDecorate(struct_type, Shader::PushData::XScaleIndex, spv::Decoration::Offset, 112U); - MemberDecorate(struct_type, Shader::PushData::YScaleIndex, spv::Decoration::Offset, 116U); + MemberName(struct_type, PushData::Step0Index, "sr0"); + MemberName(struct_type, PushData::Step1Index, "sr1"); + MemberName(struct_type, PushData::XOffsetIndex, "xoffset"); + MemberName(struct_type, PushData::YOffsetIndex, "yoffset"); + MemberName(struct_type, PushData::XScaleIndex, "xscale"); + MemberName(struct_type, PushData::YScaleIndex, "yscale"); + MemberName(struct_type, PushData::UdRegsIndex + 0, "ud_regs0"); + MemberName(struct_type, PushData::UdRegsIndex + 1, "ud_regs1"); + MemberName(struct_type, PushData::UdRegsIndex + 2, "ud_regs2"); + MemberName(struct_type, PushData::UdRegsIndex + 3, "ud_regs3"); + MemberName(struct_type, PushData::BufOffsetIndex + 0, "buf_offsets0"); + MemberName(struct_type, PushData::BufOffsetIndex + 1, "buf_offsets1"); + MemberDecorate(struct_type, PushData::Step0Index, spv::Decoration::Offset, 0U); + MemberDecorate(struct_type, PushData::Step1Index, spv::Decoration::Offset, 4U); + MemberDecorate(struct_type, PushData::XOffsetIndex, spv::Decoration::Offset, 8U); + MemberDecorate(struct_type, PushData::YOffsetIndex, spv::Decoration::Offset, 12U); + MemberDecorate(struct_type, PushData::XScaleIndex, spv::Decoration::Offset, 16U); + MemberDecorate(struct_type, PushData::YScaleIndex, spv::Decoration::Offset, 20U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 0, spv::Decoration::Offset, 24U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 1, spv::Decoration::Offset, 40U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 2, spv::Decoration::Offset, 56U); + MemberDecorate(struct_type, PushData::UdRegsIndex + 3, spv::Decoration::Offset, 72U); + MemberDecorate(struct_type, PushData::BufOffsetIndex + 0, spv::Decoration::Offset, 88U); + MemberDecorate(struct_type, PushData::BufOffsetIndex + 1, spv::Decoration::Offset, 104U); push_data_block = DefineVar(struct_type, spv::StorageClass::PushConstant); Name(push_data_block, "push_data"); interfaces.push_back(push_data_block); @@ -661,12 +696,22 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte break; default: Name(id, fmt::format("{}_{}", is_storage ? "ssbo" : "ubo", binding.buffer)); + break; } interfaces.push_back(id); return {id, pointer_type}; }; void EmitContext::DefineBuffers() { + if (!profile.supports_robust_buffer_access && !info.has_readconst) { + // In case ReadConstUbo has not already been bound by IR and is needed + // to query buffer sizes, bind it now. + info.buffers.push_back({ + .used_types = IR::Type::U32, + .inline_cbuf = AmdGpu::Buffer::Null(), + .buffer_type = BufferType::ReadConstUbo, + }); + } for (const auto& desc : info.buffers) { const auto buf_sharp = desc.GetSharp(info); const bool is_storage = desc.IsStorage(buf_sharp, profile); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 0fe6e336c..784748658 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -43,7 +43,7 @@ public: Id Def(const IR::Value& value); - void DefineBufferOffsets(); + void DefineBufferProperties(); void DefineInterpolatedAttribs(); void DefineWorkgroupIndex(); @@ -248,6 +248,9 @@ public: BufferType buffer_type; Id offset; Id offset_dwords; + Id size; + Id size_shorts; + Id size_dwords; std::array aliases; const BufferSpv& operator[](BufferAlias alias) const { @@ -307,6 +310,8 @@ private: Id DefineFloat32ToUfloatM5(u32 mantissa_bits, std::string_view name); Id DefineUfloatM5ToFloat32(u32 mantissa_bits, std::string_view name); + + Id GetBufferSize(u32 sharp_idx); }; } // namespace Shader::Backend::SPIRV diff --git a/src/shader_recompiler/frontend/fetch_shader.cpp b/src/shader_recompiler/frontend/fetch_shader.cpp index 8ae664d79..55508b0f2 100644 --- a/src/shader_recompiler/frontend/fetch_shader.cpp +++ b/src/shader_recompiler/frontend/fetch_shader.cpp @@ -9,6 +9,12 @@ namespace Shader::Gcn { +const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base) { + const u32* code; + std::memcpy(&code, &info.user_data[sgpr_base], sizeof(code)); + return code; +} + /** * s_load_dwordx4 s[8:11], s[2:3], 0x00 * s_load_dwordx4 s[12:15], s[2:3], 0x04 @@ -38,9 +44,8 @@ std::optional ParseFetchShader(const Shader::Info& info) { if (!info.has_fetch_shader) { return std::nullopt; } - const u32* code; - std::memcpy(&code, &info.user_data[info.fetch_shader_sgpr_base], sizeof(code)); + const auto* code = GetFetchShaderCode(info, info.fetch_shader_sgpr_base); FetchShaderData data{.code = code}; GcnCodeSlice code_slice(code, code + std::numeric_limits::max()); GcnDecodeContext decoder; diff --git a/src/shader_recompiler/frontend/fetch_shader.h b/src/shader_recompiler/frontend/fetch_shader.h index 080b0eb22..837caafa0 100644 --- a/src/shader_recompiler/frontend/fetch_shader.h +++ b/src/shader_recompiler/frontend/fetch_shader.h @@ -64,6 +64,8 @@ struct FetchShaderData { } }; +const u32* GetFetchShaderCode(const Info& info, u32 sgpr_base); + std::optional ParseFetchShader(const Shader::Info& info); } // namespace Shader::Gcn diff --git a/src/shader_recompiler/frontend/translate/translate.cpp b/src/shader_recompiler/frontend/translate/translate.cpp index 7f1bcb33e..230f3917f 100644 --- a/src/shader_recompiler/frontend/translate/translate.cpp +++ b/src/shader_recompiler/frontend/translate/translate.cpp @@ -4,6 +4,7 @@ #include "common/config.h" #include "common/io_file.h" #include "common/path_util.h" +#include "shader_recompiler/frontend/decode.h" #include "shader_recompiler/frontend/fetch_shader.h" #include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/info.h" @@ -470,8 +471,29 @@ void Translator::SetDst64(const InstOperand& operand, const IR::U64F64& value_ra void Translator::EmitFetch(const GcnInst& inst) { // Read the pointer to the fetch shader assembly. + const auto code_sgpr_base = inst.src[0].code; + if (!profile.supports_robust_buffer_access) { + // The fetch shader must be inlined to access as regular buffers, so that + // bounds checks can be emitted to emulate robust buffer access. + const auto* code = GetFetchShaderCode(info, code_sgpr_base); + GcnCodeSlice slice(code, code + std::numeric_limits::max()); + GcnDecodeContext decoder; + + // Decode and save instructions + u32 sub_pc = 0; + while (!slice.atEnd()) { + const auto sub_inst = decoder.decodeInstruction(slice); + if (sub_inst.opcode == Opcode::S_SETPC_B64) { + // Assume we're swapping back to the main shader. + break; + } + TranslateInstruction(sub_inst, sub_pc++); + } + return; + } + info.has_fetch_shader = true; - info.fetch_shader_sgpr_base = inst.src[0].code; + info.fetch_shader_sgpr_base = code_sgpr_base; const auto fetch_data = ParseFetchShader(info); ASSERT(fetch_data.has_value()); @@ -520,6 +542,40 @@ void Translator::LogMissingOpcode(const GcnInst& inst) { info.translation_failed = true; } +void Translator::TranslateInstruction(const GcnInst& inst, const u32 pc) { + // Emit instructions for each category. + switch (inst.category) { + case InstCategory::DataShare: + EmitDataShare(inst); + break; + case InstCategory::VectorInterpolation: + EmitVectorInterpolation(inst); + break; + case InstCategory::ScalarMemory: + EmitScalarMemory(inst); + break; + case InstCategory::VectorMemory: + EmitVectorMemory(inst); + break; + case InstCategory::Export: + EmitExport(inst); + break; + case InstCategory::FlowControl: + EmitFlowControl(pc, inst); + break; + case InstCategory::ScalarALU: + EmitScalarAlu(inst); + break; + case InstCategory::VectorALU: + EmitVectorAlu(inst); + break; + case InstCategory::DebugProfile: + break; + default: + UNREACHABLE(); + } +} + void Translate(IR::Block* block, u32 pc, std::span inst_list, Info& info, const RuntimeInfo& runtime_info, const Profile& profile) { if (inst_list.empty()) { @@ -537,37 +593,7 @@ void Translate(IR::Block* block, u32 pc, std::span inst_list, Inf continue; } - // Emit instructions for each category. - switch (inst.category) { - case InstCategory::DataShare: - translator.EmitDataShare(inst); - break; - case InstCategory::VectorInterpolation: - translator.EmitVectorInterpolation(inst); - break; - case InstCategory::ScalarMemory: - translator.EmitScalarMemory(inst); - break; - case InstCategory::VectorMemory: - translator.EmitVectorMemory(inst); - break; - case InstCategory::Export: - translator.EmitExport(inst); - break; - case InstCategory::FlowControl: - translator.EmitFlowControl(pc, inst); - break; - case InstCategory::ScalarALU: - translator.EmitScalarAlu(inst); - break; - case InstCategory::VectorALU: - translator.EmitVectorAlu(inst); - break; - case InstCategory::DebugProfile: - break; - default: - UNREACHABLE(); - } + translator.TranslateInstruction(inst, pc); } } diff --git a/src/shader_recompiler/frontend/translate/translate.h b/src/shader_recompiler/frontend/translate/translate.h index 563881a8e..b4919213b 100644 --- a/src/shader_recompiler/frontend/translate/translate.h +++ b/src/shader_recompiler/frontend/translate/translate.h @@ -58,6 +58,8 @@ public: explicit Translator(IR::Block* block_, Info& info, const RuntimeInfo& runtime_info, const Profile& profile); + void TranslateInstruction(const GcnInst& inst, u32 pc); + // Instruction categories void EmitPrologue(); void EmitFetch(const GcnInst& inst); diff --git a/src/shader_recompiler/frontend/translate/vector_memory.cpp b/src/shader_recompiler/frontend/translate/vector_memory.cpp index 0b911eb57..bfbe937a1 100644 --- a/src/shader_recompiler/frontend/translate/vector_memory.cpp +++ b/src/shader_recompiler/frontend/translate/vector_memory.cpp @@ -195,6 +195,7 @@ void Translator::BUFFER_LOAD(u32 num_dwords, bool is_typed, const GcnInst& inst) buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); + buffer_info.typed.Assign(is_typed); if (is_typed) { const auto& mtbuf = inst.control.mtbuf; const auto dmft = static_cast(mtbuf.dfmt); @@ -241,6 +242,7 @@ void Translator::BUFFER_LOAD_FORMAT(u32 num_dwords, const GcnInst& inst) { buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); + buffer_info.typed.Assign(true); const IR::Value handle = ir.CompositeConstruct(ir.GetScalarReg(sharp), ir.GetScalarReg(sharp + 1), @@ -283,6 +285,7 @@ void Translator::BUFFER_STORE(u32 num_dwords, bool is_typed, const GcnInst& inst buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); + buffer_info.typed.Assign(is_typed); if (is_typed) { const auto& mtbuf = inst.control.mtbuf; const auto dmft = static_cast(mtbuf.dfmt); @@ -339,6 +342,7 @@ void Translator::BUFFER_STORE_FORMAT(u32 num_dwords, const GcnInst& inst) { buffer_info.inst_offset.Assign(mubuf.offset); buffer_info.globally_coherent.Assign(mubuf.glc); buffer_info.system_coherent.Assign(mubuf.slc); + buffer_info.typed.Assign(true); const IR::VectorReg src_reg{inst.src[1].code}; diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 13f310cf8..8dcf9c5c4 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -23,6 +23,10 @@ namespace Shader { static constexpr size_t NumUserDataRegs = 16; +static constexpr size_t NumImages = 64; +static constexpr size_t NumBuffers = 32; +static constexpr size_t NumSamplers = 16; +static constexpr size_t NumFMasks = 8; enum class TextureType : u32 { Color1D, @@ -63,7 +67,7 @@ struct BufferResource { [[nodiscard]] constexpr AmdGpu::Buffer GetSharp(const Info& info) const noexcept; }; -using BufferResourceList = boost::container::small_vector; +using BufferResourceList = boost::container::small_vector; struct ImageResource { u32 sharp_idx; @@ -74,7 +78,7 @@ struct ImageResource { [[nodiscard]] constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; -using ImageResourceList = boost::container::small_vector; +using ImageResourceList = boost::container::small_vector; struct SamplerResource { u32 sharp_idx; @@ -84,31 +88,33 @@ struct SamplerResource { constexpr AmdGpu::Sampler GetSharp(const Info& info) const noexcept; }; -using SamplerResourceList = boost::container::small_vector; +using SamplerResourceList = boost::container::small_vector; struct FMaskResource { u32 sharp_idx; constexpr AmdGpu::Image GetSharp(const Info& info) const noexcept; }; -using FMaskResourceList = boost::container::small_vector; +using FMaskResourceList = boost::container::small_vector; struct PushData { - static constexpr u32 BufOffsetIndex = 2; - static constexpr u32 UdRegsIndex = 4; - static constexpr u32 XOffsetIndex = 8; - static constexpr u32 YOffsetIndex = 9; - static constexpr u32 XScaleIndex = 10; - static constexpr u32 YScaleIndex = 11; + static constexpr u32 Step0Index = 0; + static constexpr u32 Step1Index = 1; + static constexpr u32 XOffsetIndex = 2; + static constexpr u32 YOffsetIndex = 3; + static constexpr u32 XScaleIndex = 4; + static constexpr u32 YScaleIndex = 5; + static constexpr u32 UdRegsIndex = 6; + static constexpr u32 BufOffsetIndex = UdRegsIndex + NumUserDataRegs / 4; u32 step0; u32 step1; - std::array buf_offsets; - std::array ud_regs; float xoffset; float yoffset; float xscale; float yscale; + std::array ud_regs; + std::array buf_offsets; void AddOffset(u32 binding, u32 offset) { ASSERT(offset < 256 && binding < buf_offsets.size()); diff --git a/src/shader_recompiler/ir/reg.h b/src/shader_recompiler/ir/reg.h index 19e0da3dd..3ee7c4355 100644 --- a/src/shader_recompiler/ir/reg.h +++ b/src/shader_recompiler/ir/reg.h @@ -51,6 +51,7 @@ union BufferInstInfo { BitField<2, 12, u32> inst_offset; BitField<14, 1, u32> system_coherent; BitField<15, 1, u32> globally_coherent; + BitField<16, 1, u32> typed; }; enum class ScalarReg : u32 { diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index 53d940b79..43d2b87d4 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -25,6 +25,7 @@ struct Profile { bool support_legacy_vertex_attributes{}; bool supports_image_load_store_lod{}; bool supports_native_cube_calc{}; + bool supports_robust_buffer_access{}; bool has_broken_spirv_clamp{}; bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; diff --git a/src/video_core/buffer_cache/buffer_cache.cpp b/src/video_core/buffer_cache/buffer_cache.cpp index ccb45c095..7eb4ea9e1 100644 --- a/src/video_core/buffer_cache/buffer_cache.cpp +++ b/src/video_core/buffer_cache/buffer_cache.cpp @@ -608,7 +608,11 @@ bool BufferCache::SynchronizeBufferFromImage(Buffer& buffer, VAddr device_addr, return false; } Image& image = texture_cache.GetImage(image_id); - if (False(image.flags & ImageFlagBits::GpuModified)) { + // Only perform sync if image is: + // - GPU modified; otherwise there are no changes to synchronize. + // - Not CPU modified; otherwise we could overwrite CPU changes with stale GPU changes. + if (False(image.flags & ImageFlagBits::GpuModified) || + True(image.flags & ImageFlagBits::CpuDirty)) { return false; } ASSERT_MSG(device_addr == image.info.guest_address, diff --git a/src/video_core/renderer_vulkan/vk_instance.cpp b/src/video_core/renderer_vulkan/vk_instance.cpp index a17f8c9c2..f01401569 100644 --- a/src/video_core/renderer_vulkan/vk_instance.cpp +++ b/src/video_core/renderer_vulkan/vk_instance.cpp @@ -210,9 +210,6 @@ bool Instance::CreateDevice() { vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT, vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(); features = feature_chain.get().features; -#ifdef __APPLE__ - portability_features = feature_chain.get(); -#endif const vk::StructureChain properties_chain = physical_device.getProperties2< vk::PhysicalDeviceProperties2, vk::PhysicalDeviceVulkan11Properties, @@ -258,16 +255,19 @@ bool Instance::CreateDevice() { add_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME); add_extension(VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); add_extension(VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME); - dynamic_color_write_mask = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); - if (dynamic_color_write_mask) { - dynamic_color_write_mask = - feature_chain.get() - .extendedDynamicState3ColorWriteMask; + dynamic_state_3 = add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME); + if (dynamic_state_3) { + dynamic_state_3_features = + feature_chain.get(); + LOG_INFO(Render_Vulkan, "- extendedDynamicState3ColorWriteMask: {}", + dynamic_state_3_features.extendedDynamicState3ColorWriteMask); } - null_descriptor = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); - if (null_descriptor) { - null_descriptor = - feature_chain.get().nullDescriptor; + robustness2 = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME); + if (robustness2) { + robustness2_features = feature_chain.get(); + LOG_INFO(Render_Vulkan, "- robustBufferAccess2: {}", + robustness2_features.robustBufferAccess2); + LOG_INFO(Render_Vulkan, "- nullDescriptor: {}", robustness2_features.nullDescriptor); } custom_border_color = add_extension(VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME); depth_clip_control = add_extension(VK_EXT_DEPTH_CLIP_CONTROL_EXTENSION_NAME); @@ -284,6 +284,9 @@ bool Instance::CreateDevice() { #ifdef __APPLE__ // Required by Vulkan spec if supported. portability_subset = add_extension(VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME); + if (portability_subset) { + portability_features = feature_chain.get(); + } #endif const auto family_properties = physical_device.getQueueFamilyProperties(); @@ -387,13 +390,15 @@ bool Instance::CreateDevice() { .customBorderColorWithoutFormat = true, }, vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT{ - .extendedDynamicState3ColorWriteMask = true, + .extendedDynamicState3ColorWriteMask = + dynamic_state_3_features.extendedDynamicState3ColorWriteMask, }, vk::PhysicalDeviceDepthClipControlFeaturesEXT{ .depthClipControl = true, }, vk::PhysicalDeviceRobustness2FeaturesEXT{ - .nullDescriptor = true, + .robustBufferAccess2 = robustness2_features.robustBufferAccess2, + .nullDescriptor = robustness2_features.nullDescriptor, }, vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{ .vertexInputDynamicState = true, @@ -420,13 +425,13 @@ bool Instance::CreateDevice() { if (!custom_border_color) { device_chain.unlink(); } - if (!dynamic_color_write_mask) { + if (!dynamic_state_3) { device_chain.unlink(); } if (!depth_clip_control) { device_chain.unlink(); } - if (!null_descriptor) { + if (!robustness2) { device_chain.unlink(); } if (!vertex_input_dynamic_state) { diff --git a/src/video_core/renderer_vulkan/vk_instance.h b/src/video_core/renderer_vulkan/vk_instance.h index 682824044..bdd92cba9 100644 --- a/src/video_core/renderer_vulkan/vk_instance.h +++ b/src/video_core/renderer_vulkan/vk_instance.h @@ -99,9 +99,10 @@ public: return depth_clip_control; } - /// Returns true when dynamic color write mask state is supported + /// Returns true when the extendedDynamicState3ColorWriteMask feature of + /// VK_EXT_extended_dynamic_state3 is supported. bool IsDynamicColorWriteMaskSupported() const { - return dynamic_color_write_mask; + return dynamic_state_3 && dynamic_state_3_features.extendedDynamicState3ColorWriteMask; } /// Returns true when VK_EXT_vertex_input_dynamic_state is supported. @@ -109,9 +110,14 @@ public: return vertex_input_dynamic_state; } + /// Returns true when the robustBufferAccess2 feature of VK_EXT_robustness2 is supported. + bool IsRobustBufferAccess2Supported() const { + return robustness2 && robustness2_features.robustBufferAccess2; + } + /// Returns true when the nullDescriptor feature of VK_EXT_robustness2 is supported. bool IsNullDescriptorSupported() const { - return null_descriptor; + return robustness2 && robustness2_features.nullDescriptor; } /// Returns true when VK_KHR_fragment_shader_barycentric is supported. @@ -303,6 +309,8 @@ private: vk::PhysicalDevicePushDescriptorPropertiesKHR push_descriptor_props; vk::PhysicalDeviceFeatures features; vk::PhysicalDevicePortabilitySubsetFeaturesKHR portability_features; + vk::PhysicalDeviceExtendedDynamicState3FeaturesEXT dynamic_state_3_features; + vk::PhysicalDeviceRobustness2FeaturesEXT robustness2_features; vk::DriverIdKHR driver_id; vk::UniqueDebugUtilsMessengerEXT debug_callback{}; std::string vendor_name; @@ -317,9 +325,9 @@ private: bool custom_border_color{}; bool fragment_shader_barycentric{}; bool depth_clip_control{}; - bool dynamic_color_write_mask{}; + bool dynamic_state_3{}; bool vertex_input_dynamic_state{}; - bool null_descriptor{}; + bool robustness2{}; bool list_restart{}; bool legacy_vertex_attributes{}; bool shader_stencil_export{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 6ac7f7e43..3db22d585 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -200,6 +200,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, .support_legacy_vertex_attributes = instance_.IsLegacyVertexAttributesSupported(), .supports_image_load_store_lod = instance_.IsImageLoadStoreLodSupported(), .supports_native_cube_calc = instance_.IsAmdGcnShaderSupported(), + .supports_robust_buffer_access = instance_.IsRobustBufferAccess2Supported(), .needs_manual_interpolation = instance.IsFragmentShaderBarycentricSupported() && instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 816f149b0..4d58c0ea3 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -447,7 +447,6 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { set_writes.clear(); buffer_barriers.clear(); buffer_infos.clear(); - buffer_views.clear(); image_infos.clear(); // Bind resource buffers and textures. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 292944a10..3b45fd52e 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -110,18 +110,17 @@ private: std::pair, 8> cb_descs; std::optional> db_desc; - boost::container::static_vector image_infos; - boost::container::static_vector buffer_views; - boost::container::static_vector buffer_infos; - boost::container::static_vector bound_images; + boost::container::static_vector image_infos; + boost::container::static_vector buffer_infos; + boost::container::static_vector bound_images; Pipeline::DescriptorWrites set_writes; Pipeline::BufferBarriers buffer_barriers; using BufferBindingInfo = std::pair; - boost::container::static_vector buffer_bindings; + boost::container::static_vector buffer_bindings; using ImageBindingInfo = std::pair; - boost::container::static_vector image_bindings; + boost::container::static_vector image_bindings; }; } // namespace Vulkan