diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 02f290140..b5b18eed1 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -300,7 +300,7 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { ctx.AddCapability(spv::Capability::Tessellation); } - if (info.dma_types != IR::Type::Void) { + if (info.uses_dma) { ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses); ctx.AddExtension("SPV_KHR_physical_storage_buffer"); } diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp index 97e455ff8..3c833b87d 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_atomic.cpp @@ -7,7 +7,11 @@ #include "shader_recompiler/backend/spirv/spirv_emit_context.h" namespace Shader::Backend::SPIRV { + namespace { +using PointerType = EmitContext::PointerType; +using PointerSize = EmitContext::PointerSize; + std::pair AtomicArgs(EmitContext& ctx) { const Id scope{ctx.ConstU32(static_cast(spv::Scope::Device))}; const Id semantics{ctx.u32_zero_value}; @@ -61,14 +65,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id return ctx.U32[1]; } }(); - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] { + return AccessBoundsCheck<32, 1, is_float>(ctx, address, buffer.Size(PointerSize::B32), [&] { return (ctx.*atomic_func)(type, ptr, scope, semantics, value); }); } @@ -76,14 +79,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { + return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] { return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics); }); } @@ -92,14 +94,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id cmp_value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { + return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] { return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value); }); } @@ -107,14 +108,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { const auto& buffer = ctx.buffers[handle]; - if (Sirit::ValidId(buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); + if (const Id offset = buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64]; - const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index); + const auto [id, pointer_type] = buffer.Alias(PointerType::U64); + const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address); const auto [scope, semantics]{AtomicArgs(ctx)}; - return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] { + return AccessBoundsCheck<64>(ctx, address, buffer.Size(PointerSize::B64), [&] { return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value); }); } @@ -360,7 +360,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { const auto& buffer = ctx.buffers[binding]; - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics); @@ -368,7 +368,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) { const auto& buffer = ctx.buffers[binding]; - const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; + const auto [id, pointer_type] = buffer.Alias(PointerType::U32); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const auto [scope, semantics]{AtomicArgs(ctx)}; return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics); diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp index ccbe54d0a..564fb3f80 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp @@ -3,6 +3,7 @@ #include "common/assert.h" #include "common/logging/log.h" +#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/ir/attribute.h" @@ -11,8 +12,6 @@ #include -#include "emit_spirv_bounds.h" - namespace Shader::Backend::SPIRV { namespace { @@ -164,6 +163,7 @@ void EmitGetGotoVariable(EmitContext&) { } using PointerType = EmitContext::PointerType; +using PointerSize = EmitContext::PointerSize; Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { const u32 flatbuf_off_dw = inst->Flags(); @@ -179,14 +179,15 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { template Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { const auto& buffer = ctx.buffers[handle]; - index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); - const auto [id, pointer_type] = buffer[type]; + if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + index = ctx.OpIAdd(ctx.U32[1], index, offset); + } + const auto [id, pointer_type] = buffer.Alias(type); const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1]; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id result{ctx.OpLoad(value_type, ptr)}; - - if (Sirit::ValidId(buffer.size_dwords)) { - const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords); + if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) { + const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size); return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value); } return result; @@ -419,25 +420,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) { template static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { + constexpr bool is_float = alias == PointerType::F32; const auto flags = inst->Flags(); const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; + const auto [id, pointer_type] = spv_buffer.Alias(alias); boost::container::static_vector ids; for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i)); const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); const Id result_i = ctx.OpLoad(data_types[1], ptr_i); if (!flags.typed) { // Untyped loads have bounds checking per-component. - ids.push_back(LoadAccessBoundsCheck < 32, 1, - alias == - PointerType::F32 > (ctx, index_i, spv_buffer.size_dwords, result_i)); + ids.push_back(LoadAccessBoundsCheck<32, 1, is_float>( + ctx, index_i, spv_buffer.Size(PointerSize::B32), result_i)); } else { ids.push_back(result_i); } @@ -446,33 +446,32 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids); if (flags.typed) { // Typed loads have single bounds check for the whole load. - return LoadAccessBoundsCheck < 32, N, - alias == PointerType::F32 > (ctx, index, spv_buffer.size_dwords, result); + return LoadAccessBoundsCheck<32, N, is_float>(ctx, address, + spv_buffer.Size(PointerSize::B32), result); } return result; } Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U8]; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id result{ctx.OpLoad(ctx.U8, ptr)}; - return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.size, result); + return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), result); } Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id result{ctx.OpLoad(ctx.U16, ptr)}; - return LoadAccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, result); + return LoadAccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), result); } Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -493,14 +492,13 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) Id EmitLoadBufferU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U64]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)}; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)}; const Id result{ctx.OpLoad(ctx.U64, ptr)}; - return LoadAccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, result); + return LoadAccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), result); } Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { @@ -526,18 +524,18 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr template static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value) { + constexpr bool is_float = alias == PointerType::F32; const auto flags = inst->Flags(); const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; - const auto [id, pointer_type] = spv_buffer[alias]; + const auto [id, pointer_type] = spv_buffer.Alias(alias); auto store = [&] { for (u32 i = 0; i < N; i++) { - const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); + const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i)); const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i); auto store_i = [&] { @@ -546,8 +544,8 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I }; if (!flags.typed) { // Untyped stores have bounds checking per-component. - AccessBoundsCheck<32, 1, alias == PointerType::F32>( - ctx, index_i, spv_buffer.size_dwords, store_i); + AccessBoundsCheck<32, 1, is_float>(ctx, index_i, spv_buffer.Size(PointerSize::B32), + store_i); } else { store_i(); } @@ -557,8 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I if (flags.typed) { // Typed stores have single bounds check for the whole store. - AccessBoundsCheck<32, N, alias == PointerType::F32>(ctx, index, spv_buffer.size_dwords, - store); + AccessBoundsCheck<32, N, is_float>(ctx, address, spv_buffer.Size(PointerSize::B32), store); } else { store(); } @@ -566,12 +563,12 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U8]; + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; - AccessBoundsCheck<8>(ctx, address, spv_buffer.size, [&] { + AccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), [&] { ctx.OpStore(ptr, value); return Id{}; }); @@ -579,13 +576,12 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U16]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; - AccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, [&] { + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; + AccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), [&] { ctx.OpStore(ptr, value); return Id{}; }); @@ -609,13 +605,12 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre void EmitStoreBufferU64(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { const auto& spv_buffer = ctx.buffers[handle]; - if (Sirit::ValidId(spv_buffer.offset)) { - address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); + if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) { + address = ctx.OpIAdd(ctx.U32[1], address, offset); } - const auto [id, pointer_type] = spv_buffer[PointerType::U64]; - const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); - const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)}; - AccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, [&] { + const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64); + const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)}; + AccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), [&] { ctx.OpStore(ptr, value); return Id{}; }); diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 567c059ae..524914ad4 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -71,7 +71,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf Bindings& binding_) : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { - if (info.dma_types != IR::Type::Void) { + if (info.uses_dma) { SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450); } else { SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450); @@ -169,34 +169,8 @@ void EmitContext::DefineArithmeticTypes() { if (info.uses_fp64) { frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64"); } - - if (True(info.dma_types & IR::Type::F64)) { - physical_pointer_types[PointerType::F64] = - TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]); - } - if (True(info.dma_types & IR::Type::U64)) { - physical_pointer_types[PointerType::U64] = - TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64); - } - if (True(info.dma_types & IR::Type::F32)) { - physical_pointer_types[PointerType::F32] = - TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]); - } - if (True(info.dma_types & IR::Type::U32)) { - physical_pointer_types[PointerType::U32] = - TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]); - } - if (True(info.dma_types & IR::Type::F16)) { - physical_pointer_types[PointerType::F16] = - TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]); - } - if (True(info.dma_types & IR::Type::U16)) { - physical_pointer_types[PointerType::U16] = - TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16); - } - if (True(info.dma_types & IR::Type::U8)) { - physical_pointer_types[PointerType::U8] = - TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8); + if (info.uses_dma) { + physical_pointer_type_u32 = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]); } } @@ -239,7 +213,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) { // Can this be done with memory access? Like we do now with ReadConst const auto& srt_flatbuf = buffers[flatbuf_index]; ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf); - const auto [id, pointer_type] = srt_flatbuf[PointerType::U32]; + const auto [id, pointer_type] = srt_flatbuf.Alias(PointerType::U32); const auto rsrc1{ OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))}; @@ -255,39 +229,70 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) { } void EmitContext::DefineBufferProperties() { + if (!profile.needs_buffer_offsets && profile.supports_robust_buffer_access) { + return; + } for (u32 i = 0; i < buffers.size(); i++) { - BufferDefinition& buffer = buffers[i]; + auto& buffer = buffers[i]; + const auto& desc = info.buffers[i]; + const u32 binding = buffer.binding; if (buffer.buffer_type != BufferType::Guest) { continue; } - const u32 binding = buffer.binding; - const u32 half = PushData::BufOffsetIndex + (binding >> 4); - const u32 comp = (binding & 0xf) >> 2; - const u32 offset = (binding & 0x3) << 3; - const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), - push_data_block, ConstU32(half), ConstU32(comp))}; - const Id value{OpLoad(U32[1], ptr)}; - buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U)); - Name(buffer.offset, fmt::format("buf{}_off", binding)); - buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U)); - Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding)); - // Only need to load size if performing bounds checks and the buffer is both guest and not - // inline. - if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) { - const BufferResource& desc = info.buffers[i]; - if (desc.sharp_idx == std::numeric_limits::max()) { - buffer.size = ConstU32(desc.inline_cbuf.GetSize()); - } else { - buffer.size = GetBufferSize(desc.sharp_idx); + // Only load and apply buffer offsets if host GPU alignment is larger than guest. + if (profile.needs_buffer_offsets) { + const u32 half = PushData::BufOffsetIndex + (binding >> 4); + const u32 comp = (binding & 0xf) >> 2; + const u32 offset = (binding & 0x3) << 3; + const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), + push_data_block, ConstU32(half), ConstU32(comp))}; + const Id value{OpLoad(U32[1], ptr)}; + + const Id buf_offset{OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U))}; + Name(buf_offset, fmt::format("buf{}_off", binding)); + buffer.Offset(PointerSize::B8) = buf_offset; + + if (True(desc.used_types & IR::Type::U16)) { + const Id buf_word_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(1U))}; + Name(buf_word_offset, fmt::format("buf{}_word_off", binding)); + buffer.Offset(PointerSize::B16) = buf_word_offset; + } + if (True(desc.used_types & IR::Type::U32)) { + const Id buf_dword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(2U))}; + Name(buf_dword_offset, fmt::format("buf{}_dword_off", binding)); + buffer.Offset(PointerSize::B32) = buf_dword_offset; + } + if (True(desc.used_types & IR::Type::U64)) { + const Id buf_qword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(3U))}; + Name(buf_qword_offset, fmt::format("buf{}_qword_off", binding)); + buffer.Offset(PointerSize::B64) = buf_qword_offset; + } + } + + // Only load size if performing bounds checks. + if (!profile.supports_robust_buffer_access) { + const Id buf_size{desc.sharp_idx == std::numeric_limits::max() + ? ConstU32(desc.inline_cbuf.GetSize()) + : GetBufferSize(desc.sharp_idx)}; + Name(buf_size, fmt::format("buf{}_size", binding)); + buffer.Size(PointerSize::B8) = buf_size; + + if (True(desc.used_types & IR::Type::U16)) { + const Id buf_word_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(1U))}; + Name(buf_word_size, fmt::format("buf{}_short_size", binding)); + buffer.Size(PointerSize::B16) = buf_word_size; + } + if (True(desc.used_types & IR::Type::U32)) { + const Id buf_dword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(2U))}; + Name(buf_dword_size, fmt::format("buf{}_dword_size", binding)); + buffer.Size(PointerSize::B32) = buf_dword_size; + } + if (True(desc.used_types & IR::Type::U64)) { + const Id buf_qword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(3U))}; + Name(buf_qword_size, fmt::format("buf{}_qword_size", binding)); + buffer.Size(PointerSize::B64) = buf_qword_size; } - Name(buffer.size, fmt::format("buf{}_size", binding)); - buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U)); - Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding)); - buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U)); - Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding)); - buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U)); - Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding)); } } } @@ -779,8 +784,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte }; void EmitContext::DefineBuffers() { - if (!profile.supports_robust_buffer_access && - info.readconst_types == Info::ReadConstType::None) { + if (!profile.supports_robust_buffer_access && !info.uses_dma) { // In case Flatbuf has not already been bound by IR and is needed // to query buffer sizes, bind it now. info.buffers.push_back({ @@ -809,23 +813,23 @@ void EmitContext::DefineBuffers() { // Define aliases depending on the shader usage. auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type); if (True(desc.used_types & IR::Type::U64)) { - spv_buffer[PointerType::U64] = + spv_buffer.Alias(PointerType::U64) = DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64); } if (True(desc.used_types & IR::Type::U32)) { - spv_buffer[PointerType::U32] = + spv_buffer.Alias(PointerType::U32) = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]); } if (True(desc.used_types & IR::Type::F32)) { - spv_buffer[PointerType::F32] = + spv_buffer.Alias(PointerType::F32) = DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]); } if (True(desc.used_types & IR::Type::U16)) { - spv_buffer[PointerType::U16] = + spv_buffer.Alias(PointerType::U16) = DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16); } if (True(desc.used_types & IR::Type::U8)) { - spv_buffer[PointerType::U8] = + spv_buffer.Alias(PointerType::U8) = DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8); } ++binding.unified; @@ -1154,7 +1158,7 @@ Id EmitContext::DefineGetBdaPointer() { const auto page{OpShiftRightLogical(U64, address, caching_pagebits)}; const auto page32{OpUConvert(U32[1], page)}; const auto& bda_buffer{buffers[bda_pagetable_index]}; - const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64]; + const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64); const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)}; const auto bda{OpLoad(U64, bda_ptr)}; @@ -1166,14 +1170,14 @@ Id EmitContext::DefineGetBdaPointer() { // First time acces, mark as fault AddLabel(fault_label); const auto& fault_buffer{buffers[fault_buffer_index]}; - const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8]; - const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))}; - const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))}; - const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)}; + const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U32); + const auto page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))}; + const auto page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))}; + const auto page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)}; const auto fault_ptr{ - OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)}; - const auto fault_value{OpLoad(U8, fault_ptr)}; - const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)}; + OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div32)}; + const auto fault_value{OpLoad(U32[1], fault_ptr)}; + const auto fault_value_masked{OpBitwiseOr(U32[1], fault_value, page_mask)}; OpStore(fault_ptr, fault_value_masked); // Return null pointer @@ -1211,14 +1215,15 @@ Id EmitContext::DefineReadConst(bool dynamic) { const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))}; const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))}; - const auto result = EmitMemoryRead(U32[1], addr, [&]() { + const auto result = EmitDwordMemoryRead(addr, [&]() { if (dynamic) { return u32_zero_value; } else { const auto& flatbuf_buffer{buffers[flatbuf_index]}; ASSERT(flatbuf_buffer.binding >= 0 && flatbuf_buffer.buffer_type == BufferType::Flatbuf); - const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32]; + const auto [flatbuf_buffer_id, flatbuf_pointer_type] = + flatbuf_buffer.Alias(PointerType::U32); const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, flatbuf_offset)}; return OpLoad(U32[1], ptr); @@ -1239,7 +1244,7 @@ void EmitContext::DefineFunctions() { uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32"); uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32"); } - if (info.dma_types != IR::Type::Void) { + if (info.uses_dma) { get_bda_pointer = DefineGetBdaPointer(); } diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 1eb7d05c6..f8c6416e8 100644 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -42,17 +42,6 @@ public: Bindings& binding); ~EmitContext(); - enum class PointerType : u32 { - U8, - U16, - F16, - U32, - F32, - U64, - F64, - NumAlias, - }; - Id Def(const IR::Value& value); void DefineBufferProperties(); @@ -155,25 +144,7 @@ public: return last_label; } - PointerType PointerTypeFromType(Id type) { - if (type.value == U8.value) - return PointerType::U8; - if (type.value == U16.value) - return PointerType::U16; - if (type.value == F16[1].value) - return PointerType::F16; - if (type.value == U32[1].value) - return PointerType::U32; - if (type.value == F32[1].value) - return PointerType::F32; - if (type.value == U64.value) - return PointerType::U64; - if (type.value == F64[1].value) - return PointerType::F64; - UNREACHABLE_MSG("Unknown type for pointer"); - } - - Id EmitMemoryRead(Id type, Id address, auto&& fallback) { + Id EmitDwordMemoryRead(Id address, auto&& fallback) { const Id available_label = OpLabel(); const Id fallback_label = OpLabel(); const Id merge_label = OpLabel(); @@ -185,10 +156,8 @@ public: // Available AddLabel(available_label); - const auto pointer_type = PointerTypeFromType(type); - const Id pointer_type_id = physical_pointer_types[pointer_type]; - const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr); - const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u); + const Id addr_ptr = OpConvertUToPtr(physical_pointer_type_u32, addr); + const Id result = OpLoad(U32[1], addr_ptr, spv::MemoryAccessMask::Aligned, 4u); OpBranch(merge_label); // Fallback @@ -199,7 +168,7 @@ public: // Merge AddLabel(merge_label); const Id final_result = - OpPhi(type, fallback_result, fallback_label, result, available_label); + OpPhi(U32[1], fallback_result, fallback_label, result, available_label); return final_result; } @@ -314,6 +283,24 @@ public: bool is_storage = false; }; + enum class PointerType : u32 { + U8, + U16, + U32, + F32, + U64, + F64, + NumAlias, + }; + + enum class PointerSize : u32 { + B8, + B16, + B32, + B64, + NumClass, + }; + struct BufferSpv { Id id; Id pointer_type; @@ -322,32 +309,23 @@ public: struct BufferDefinition { u32 binding; BufferType buffer_type; - Id offset; - Id offset_dwords; - Id size; - Id size_shorts; - Id size_dwords; - Id size_qwords; + std::array offsets; + std::array sizes; std::array aliases; - const BufferSpv& operator[](PointerType alias) const { - return aliases[u32(alias)]; + template + auto& Alias(this Self& self, PointerType alias) { + return self.aliases[u32(alias)]; } - BufferSpv& operator[](PointerType alias) { - return aliases[u32(alias)]; - } - }; - - struct PhysicalPointerTypes { - std::array types; - - const Id& operator[](PointerType type) const { - return types[u32(type)]; + template + auto& Offset(this Self& self, PointerSize size) { + return self.offsets[u32(size)]; } - Id& operator[](PointerType type) { - return types[u32(type)]; + template + auto& Size(this Self& self, PointerSize size) { + return self.sizes[u32(size)]; } }; @@ -356,12 +334,12 @@ public: boost::container::small_vector buffers; boost::container::small_vector images; boost::container::small_vector samplers; - PhysicalPointerTypes physical_pointer_types; std::unordered_map first_to_last_label_map; size_t flatbuf_index{}; size_t bda_pagetable_index{}; size_t fault_buffer_index{}; + Id physical_pointer_type_u32; Id sampler_type{}; Id sampler_pointer_type{}; diff --git a/src/shader_recompiler/frontend/translate/scalar_alu.cpp b/src/shader_recompiler/frontend/translate/scalar_alu.cpp index 7beb594c3..48f977f49 100644 --- a/src/shader_recompiler/frontend/translate/scalar_alu.cpp +++ b/src/shader_recompiler/frontend/translate/scalar_alu.cpp @@ -1,7 +1,6 @@ // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include #include "common/assert.h" #include "shader_recompiler/frontend/translate/translate.h" diff --git a/src/shader_recompiler/info.h b/src/shader_recompiler/info.h index 6777c4769..b2b03bbbf 100644 --- a/src/shader_recompiler/info.h +++ b/src/shader_recompiler/info.h @@ -238,7 +238,7 @@ struct Info { Dynamic = 1 << 1, }; ReadConstType readconst_types{}; - IR::Type dma_types{IR::Type::Void}; + bool uses_dma{false}; explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, diff --git a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp index 2e9b78f0e..f758d8e7b 100644 --- a/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp +++ b/src/shader_recompiler/ir/passes/resource_tracking_pass.cpp @@ -105,6 +105,49 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) { } } +u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) { + switch (inst.GetOpcode()) { + case IR::Opcode::LoadBufferU8: + case IR::Opcode::StoreBufferU8: + return 0; + case IR::Opcode::LoadBufferU16: + case IR::Opcode::StoreBufferU16: + return 1; + case IR::Opcode::LoadBufferU64: + case IR::Opcode::StoreBufferU64: + case IR::Opcode::BufferAtomicIAdd64: + return 3; + case IR::Opcode::LoadBufferFormatF32: + case IR::Opcode::StoreBufferFormatF32: { + switch (data_format) { + case AmdGpu::DataFormat::Format8: + return 0; + case AmdGpu::DataFormat::Format8_8: + case AmdGpu::DataFormat::Format16: + return 1; + case AmdGpu::DataFormat::Format8_8_8_8: + case AmdGpu::DataFormat::Format16_16: + case AmdGpu::DataFormat::Format10_11_11: + case AmdGpu::DataFormat::Format2_10_10_10: + case AmdGpu::DataFormat::Format16_16_16_16: + case AmdGpu::DataFormat::Format32: + case AmdGpu::DataFormat::Format32_32: + case AmdGpu::DataFormat::Format32_32_32: + case AmdGpu::DataFormat::Format32_32_32_32: + return 2; + default: + return 0; + } + break; + } + case IR::Opcode::ReadConstBuffer: + // Provided address is already in dwords + return 0; + default: + return 2; + } +} + bool IsImageAtomicInstruction(const IR::Inst& inst) { switch (inst.GetOpcode()) { case IR::Opcode::ImageAtomicIAdd32: @@ -496,6 +539,22 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info, const AmdGpu::Buffer& buffer, u32 stride) { const auto inst_info = inst.Flags(); + const u32 inst_offset = inst_info.inst_offset.Value(); + const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid; + const auto data_format = is_inst_typed + ? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value()) + : buffer.GetDataFmt(); + const u32 shift = BufferAddressShift(inst, data_format); + const u32 mask = (1 << shift) - 1; + + // If address calculation is of the form "index * const_stride + offset" with offset constant + // and both const_stride and offset are divisible with the element size, apply shift directly. + if (inst_info.index_enable && !inst_info.offset_enable && !buffer.swizzle_enable && + !buffer.add_tid_enable && (stride & mask) == 0 && (inst_offset & mask) == 0) { + // buffer_offset = index * (const_stride >> shift) + (inst_offset >> shift) + const IR::U32 index = IR::U32{inst.Arg(1)}; + return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)), ir.Imm32(inst_offset >> shift)); + } // index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0) IR::U32 index = ir.Imm32(0U); @@ -512,7 +571,7 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In index = ir.IAdd(index, thread_id); } // offset = (inst_offen ? vgpr_offset : 0) + inst_offset - IR::U32 offset = ir.Imm32(inst_info.inst_offset.Value()); + IR::U32 offset = ir.Imm32(inst_offset); if (inst_info.offset_enable) { const IR::U32 vgpr_offset = inst_info.index_enable ? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)} @@ -545,6 +604,9 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In // buffer_offset = index * const_stride + offset buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset); } + if (shift != 0) { + buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift)); + } return buffer_offset; } diff --git a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp index b3b4ac36a..797d8bb4a 100644 --- a/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp +++ b/src/shader_recompiler/ir/passes/shader_info_collection_pass.cpp @@ -102,7 +102,7 @@ void Visit(Info& info, const IR::Inst& inst) { info.uses_lane_id = true; break; case IR::Opcode::ReadConst: - if (info.readconst_types == Info::ReadConstType::None) { + if (!info.uses_dma) { info.buffers.push_back({ .used_types = IR::Type::U32, // We can't guarantee that flatbuf will not grow past UBO @@ -116,7 +116,7 @@ void Visit(Info& info, const IR::Inst& inst) { } else { info.readconst_types |= Info::ReadConstType::Dynamic; } - info.dma_types |= IR::Type::U32; + info.uses_dma = true; break; case IR::Opcode::PackUfloat10_11_11: info.uses_pack_10_11_11 = true; @@ -130,21 +130,22 @@ void Visit(Info& info, const IR::Inst& inst) { } void CollectShaderInfoPass(IR::Program& program) { + auto& info = program.info; for (IR::Block* const block : program.post_order_blocks) { for (IR::Inst& inst : block->Instructions()) { - Visit(program.info, inst); + Visit(info, inst); } } - if (program.info.dma_types != IR::Type::Void) { - program.info.buffers.push_back({ + if (info.uses_dma) { + info.buffers.push_back({ .used_types = IR::Type::U64, .inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::BDA_PAGETABLE_SIZE), .buffer_type = BufferType::BdaPagetable, .is_written = true, }); - program.info.buffers.push_back({ - .used_types = IR::Type::U8, + info.buffers.push_back({ + .used_types = IR::Type::U32, .inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE), .buffer_type = BufferType::FaultBuffer, .is_written = true, diff --git a/src/shader_recompiler/profile.h b/src/shader_recompiler/profile.h index bcdf86962..d7eb307b6 100644 --- a/src/shader_recompiler/profile.h +++ b/src/shader_recompiler/profile.h @@ -35,7 +35,7 @@ struct Profile { bool lower_left_origin_mode{}; bool needs_manual_interpolation{}; bool needs_lds_barriers{}; - u64 min_ssbo_alignment{}; + bool needs_buffer_offsets{}; u64 max_ubo_size{}; u32 max_viewport_width{}; u32 max_viewport_height{}; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1d8ac4823..831995339 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -225,6 +225,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || instance.GetDriverID() == vk::DriverId::eMoltenvk, + .needs_buffer_offsets = instance.StorageMinAlignment() > 4, // When binding a UBO, we calculate its size considering the offset in the larger buffer // cache underlying resource. In some cases, it may produce sizes exceeding the system // maximum allowed UBO range, so we need to reduce the threshold to prevent issues. diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 9dea5ceea..fbeaaf9dc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -468,17 +468,12 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { stage->PushUd(binding, push_data); BindBuffers(*stage, binding, push_data); BindTextures(*stage, binding); - - uses_dma |= stage->dma_types != Shader::IR::Type::Void; + uses_dma |= stage->uses_dma; } - pipeline->BindResources(set_writes, buffer_barriers, push_data); - if (uses_dma && !fault_process_pending) { // We only use fault buffer for DMA right now. { - // TODO: GPU might have written to memory (for example with EVENT_WRITE_EOP) - // we need to account for that and synchronize. Common::RecursiveSharedLock lock{mapped_ranges_mutex}; for (auto& range : mapped_ranges) { buffer_cache.SynchronizeBuffersInRange(range.lower(), @@ -490,6 +485,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) { fault_process_pending |= uses_dma; + pipeline->BindResources(set_writes, buffer_barriers, push_data); + return true; }