shader_recompiler: Optimize general case of buffer addressing (#3159)
Some checks are pending
Build and Release / reuse (push) Waiting to run
Build and Release / clang-format (push) Waiting to run
Build and Release / get-info (push) Waiting to run
Build and Release / windows-sdl (push) Blocked by required conditions
Build and Release / windows-qt (push) Blocked by required conditions
Build and Release / macos-sdl (push) Blocked by required conditions
Build and Release / macos-qt (push) Blocked by required conditions
Build and Release / linux-sdl (push) Blocked by required conditions
Build and Release / linux-qt (push) Blocked by required conditions
Build and Release / linux-sdl-gcc (push) Blocked by required conditions
Build and Release / linux-qt-gcc (push) Blocked by required conditions
Build and Release / pre-release (push) Blocked by required conditions

* shader_recompiler: Simplify dma types

Only U32 is needed for S_LOAD_DWORD

* shader_recompiler: Perform address shift on IR level

Buffer instructions now expect address in the data unit they work on. Doing the shift on IR level will allow us to optimize some operations away on common case

* shader_recompiler: Optimize common buffer access pattern

* emit_spirv: Use 32-bit integer ops for fault buffer

Not many GPUs have 8-bit bitwise or operations so that would probably require some overhead to emulate from the driver

* resource_tracking_pass: Fix texel buffer shift
This commit is contained in:
TheTurtle 2025-06-26 12:14:36 +03:00 committed by GitHub
parent 6eaec7a004
commit a49b13fe66
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 271 additions and 233 deletions

View file

@ -300,7 +300,7 @@ void SetupCapabilities(const Info& info, const Profile& profile, EmitContext& ct
if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) { if (stage == LogicalStage::TessellationControl || stage == LogicalStage::TessellationEval) {
ctx.AddCapability(spv::Capability::Tessellation); ctx.AddCapability(spv::Capability::Tessellation);
} }
if (info.dma_types != IR::Type::Void) { if (info.uses_dma) {
ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses); ctx.AddCapability(spv::Capability::PhysicalStorageBufferAddresses);
ctx.AddExtension("SPV_KHR_physical_storage_buffer"); ctx.AddExtension("SPV_KHR_physical_storage_buffer");
} }

View file

@ -7,7 +7,11 @@
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
namespace { namespace {
using PointerType = EmitContext::PointerType;
using PointerSize = EmitContext::PointerSize;
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) { std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
const Id scope{ctx.ConstU32(static_cast<u32>(spv::Scope::Device))}; const Id scope{ctx.ConstU32(static_cast<u32>(spv::Scope::Device))};
const Id semantics{ctx.u32_zero_value}; const Id semantics{ctx.u32_zero_value};
@ -61,14 +65,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
return ctx.U32[1]; return ctx.U32[1];
} }
}(); }();
if (Sirit::ValidId(buffer.offset)) { if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32, 1, is_float>(ctx, index, buffer.size_dwords, [&] { return AccessBoundsCheck<32, 1, is_float>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(type, ptr, scope, semantics, value); return (ctx.*atomic_func)(type, ptr, scope, semantics, value);
}); });
} }
@ -76,14 +79,13 @@ Id BufferAtomicU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id
Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id BufferAtomicU32IncDec(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle]; const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) { if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics); return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics);
}); });
} }
@ -92,14 +94,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id cmp_value, Id cmp_value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle]; const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) { if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u)); const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<32>(ctx, index, buffer.size_dwords, [&] { return AccessBoundsCheck<32>(ctx, address, buffer.Size(PointerSize::B32), [&] {
return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value); return (ctx.*atomic_func)(ctx.U32[1], ptr, scope, semantics, semantics, value, cmp_value);
}); });
} }
@ -107,14 +108,13 @@ Id BufferAtomicU32CmpSwap(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value, Id BufferAtomicU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) { Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
const auto& buffer = ctx.buffers[handle]; const auto& buffer = ctx.buffers[handle];
if (Sirit::ValidId(buffer.offset)) { if (const Id offset = buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); const auto [id, pointer_type] = buffer.Alias(PointerType::U64);
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U64]; const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index);
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return AccessBoundsCheck<64>(ctx, index, buffer.size_qwords, [&] { return AccessBoundsCheck<64>(ctx, address, buffer.Size(PointerSize::B64), [&] {
return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value); return (ctx.*atomic_func)(ctx.U64, ptr, scope, semantics, value);
}); });
} }
@ -360,7 +360,7 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) { Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding]; const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics); return ctx.OpAtomicIIncrement(ctx.U32[1], ptr, scope, semantics);
@ -368,7 +368,7 @@ Id EmitDataAppend(EmitContext& ctx, u32 gds_addr, u32 binding) {
Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) { Id EmitDataConsume(EmitContext& ctx, u32 gds_addr, u32 binding) {
const auto& buffer = ctx.buffers[binding]; const auto& buffer = ctx.buffers[binding];
const auto [id, pointer_type] = buffer[EmitContext::PointerType::U32]; const auto [id, pointer_type] = buffer.Alias(PointerType::U32);
const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr)); const Id ptr = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, ctx.ConstU32(gds_addr));
const auto [scope, semantics]{AtomicArgs(ctx)}; const auto [scope, semantics]{AtomicArgs(ctx)};
return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics); return ctx.OpAtomicIDecrement(ctx.U32[1], ptr, scope, semantics);

View file

@ -3,6 +3,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "shader_recompiler/backend/spirv/emit_spirv_bounds.h"
#include "shader_recompiler/backend/spirv/emit_spirv_instructions.h" #include "shader_recompiler/backend/spirv/emit_spirv_instructions.h"
#include "shader_recompiler/backend/spirv/spirv_emit_context.h" #include "shader_recompiler/backend/spirv/spirv_emit_context.h"
#include "shader_recompiler/ir/attribute.h" #include "shader_recompiler/ir/attribute.h"
@ -11,8 +12,6 @@
#include <magic_enum/magic_enum.hpp> #include <magic_enum/magic_enum.hpp>
#include "emit_spirv_bounds.h"
namespace Shader::Backend::SPIRV { namespace Shader::Backend::SPIRV {
namespace { namespace {
@ -164,6 +163,7 @@ void EmitGetGotoVariable(EmitContext&) {
} }
using PointerType = EmitContext::PointerType; using PointerType = EmitContext::PointerType;
using PointerSize = EmitContext::PointerSize;
Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) { Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
const u32 flatbuf_off_dw = inst->Flags<u32>(); const u32 flatbuf_off_dw = inst->Flags<u32>();
@ -179,14 +179,15 @@ Id EmitReadConst(EmitContext& ctx, IR::Inst* inst, Id addr, Id offset) {
template <PointerType type> template <PointerType type>
Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) { Id ReadConstBuffer(EmitContext& ctx, u32 handle, Id index) {
const auto& buffer = ctx.buffers[handle]; const auto& buffer = ctx.buffers[handle];
index = ctx.OpIAdd(ctx.U32[1], index, buffer.offset_dwords); if (const Id offset = buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
const auto [id, pointer_type] = buffer[type]; index = ctx.OpIAdd(ctx.U32[1], index, offset);
}
const auto [id, pointer_type] = buffer.Alias(type);
const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1]; const auto value_type = type == PointerType::U32 ? ctx.U32[1] : ctx.F32[1];
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(value_type, ptr)}; const Id result{ctx.OpLoad(value_type, ptr)};
if (const Id size = buffer.Size(PointerSize::B32); Sirit::ValidId(size)) {
if (Sirit::ValidId(buffer.size_dwords)) { const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, size);
const Id in_bounds = ctx.OpULessThan(ctx.U1[1], index, buffer.size_dwords);
return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value); return ctx.OpSelect(value_type, in_bounds, result, ctx.u32_zero_value);
} }
return result; return result;
@ -419,25 +420,24 @@ void EmitSetPatch(EmitContext& ctx, IR::Patch patch, Id value) {
template <u32 N, PointerType alias> template <u32 N, PointerType alias>
static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
constexpr bool is_float = alias == PointerType::F32;
const auto flags = inst->Flags<IR::BufferInstInfo>(); const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias]; const auto [id, pointer_type] = spv_buffer.Alias(alias);
boost::container::static_vector<Id, N> ids; boost::container::static_vector<Id, N> ids;
for (u32 i = 0; i < N; i++) { for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id result_i = ctx.OpLoad(data_types[1], ptr_i); const Id result_i = ctx.OpLoad(data_types[1], ptr_i);
if (!flags.typed) { if (!flags.typed) {
// Untyped loads have bounds checking per-component. // Untyped loads have bounds checking per-component.
ids.push_back(LoadAccessBoundsCheck < 32, 1, ids.push_back(LoadAccessBoundsCheck<32, 1, is_float>(
alias == ctx, index_i, spv_buffer.Size(PointerSize::B32), result_i));
PointerType::F32 > (ctx, index_i, spv_buffer.size_dwords, result_i));
} else { } else {
ids.push_back(result_i); ids.push_back(result_i);
} }
@ -446,33 +446,32 @@ static Id EmitLoadBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id a
const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids); const Id result = N == 1 ? ids[0] : ctx.OpCompositeConstruct(data_types[N], ids);
if (flags.typed) { if (flags.typed) {
// Typed loads have single bounds check for the whole load. // Typed loads have single bounds check for the whole load.
return LoadAccessBoundsCheck < 32, N, return LoadAccessBoundsCheck<32, N, is_float>(ctx, address,
alias == PointerType::F32 > (ctx, index, spv_buffer.size_dwords, result); spv_buffer.Size(PointerSize::B32), result);
} }
return result; return result;
} }
Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferU8(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const auto [id, pointer_type] = spv_buffer[PointerType::U8]; const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id result{ctx.OpLoad(ctx.U8, ptr)}; const Id result{ctx.OpLoad(ctx.U8, ptr)};
return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.size, result); return LoadAccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), result);
} }
Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferU16(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const auto [id, pointer_type] = spv_buffer[PointerType::U16]; const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)};
const Id result{ctx.OpLoad(ctx.U16, ptr)}; const Id result{ctx.OpLoad(ctx.U16, ptr)};
return LoadAccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, result); return LoadAccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), result);
} }
Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferU32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -493,14 +492,13 @@ Id EmitLoadBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address)
Id EmitLoadBufferU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferU64(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const auto [id, pointer_type] = spv_buffer[PointerType::U64]; const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)};
const Id result{ctx.OpLoad(ctx.U64, ptr)}; const Id result{ctx.OpLoad(ctx.U64, ptr)};
return LoadAccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, result); return LoadAccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), result);
} }
Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) { Id EmitLoadBufferF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address) {
@ -526,18 +524,18 @@ Id EmitLoadBufferFormatF32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addr
template <u32 N, PointerType alias> template <u32 N, PointerType alias>
static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address, static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address,
Id value) { Id value) {
constexpr bool is_float = alias == PointerType::F32;
const auto flags = inst->Flags<IR::BufferInstInfo>(); const auto flags = inst->Flags<IR::BufferInstInfo>();
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B32); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(2u));
const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32; const auto& data_types = alias == PointerType::U32 ? ctx.U32 : ctx.F32;
const auto [id, pointer_type] = spv_buffer[alias]; const auto [id, pointer_type] = spv_buffer.Alias(alias);
auto store = [&] { auto store = [&] {
for (u32 i = 0; i < N; i++) { for (u32 i = 0; i < N; i++) {
const Id index_i = i == 0 ? index : ctx.OpIAdd(ctx.U32[1], index, ctx.ConstU32(i)); const Id index_i = i == 0 ? address : ctx.OpIAdd(ctx.U32[1], address, ctx.ConstU32(i));
const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i); const Id ptr_i = ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index_i);
const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i); const Id value_i = N == 1 ? value : ctx.OpCompositeExtract(data_types[1], value, i);
auto store_i = [&] { auto store_i = [&] {
@ -546,8 +544,8 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
}; };
if (!flags.typed) { if (!flags.typed) {
// Untyped stores have bounds checking per-component. // Untyped stores have bounds checking per-component.
AccessBoundsCheck<32, 1, alias == PointerType::F32>( AccessBoundsCheck<32, 1, is_float>(ctx, index_i, spv_buffer.Size(PointerSize::B32),
ctx, index_i, spv_buffer.size_dwords, store_i); store_i);
} else { } else {
store_i(); store_i();
} }
@ -557,8 +555,7 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
if (flags.typed) { if (flags.typed) {
// Typed stores have single bounds check for the whole store. // Typed stores have single bounds check for the whole store.
AccessBoundsCheck<32, N, alias == PointerType::F32>(ctx, index, spv_buffer.size_dwords, AccessBoundsCheck<32, N, is_float>(ctx, address, spv_buffer.Size(PointerSize::B32), store);
store);
} else { } else {
store(); store();
} }
@ -566,12 +563,12 @@ static void EmitStoreBufferB32xN(EmitContext& ctx, IR::Inst* inst, u32 handle, I
void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B8); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const auto [id, pointer_type] = spv_buffer[PointerType::U8]; const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U8);
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)}; const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
AccessBoundsCheck<8>(ctx, address, spv_buffer.size, [&] { AccessBoundsCheck<8>(ctx, address, spv_buffer.Size(PointerSize::B8), [&] {
ctx.OpStore(ptr, value); ctx.OpStore(ptr, value);
return Id{}; return Id{};
}); });
@ -579,13 +576,12 @@ void EmitStoreBufferU8(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id v
void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU16(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B16); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const auto [id, pointer_type] = spv_buffer[PointerType::U16]; const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U16);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(1u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, address)};
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u32_zero_value, index)}; AccessBoundsCheck<16>(ctx, address, spv_buffer.Size(PointerSize::B16), [&] {
AccessBoundsCheck<16>(ctx, index, spv_buffer.size_shorts, [&] {
ctx.OpStore(ptr, value); ctx.OpStore(ptr, value);
return Id{}; return Id{};
}); });
@ -609,13 +605,12 @@ void EmitStoreBufferU32x4(EmitContext& ctx, IR::Inst* inst, u32 handle, Id addre
void EmitStoreBufferU64(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) { void EmitStoreBufferU64(EmitContext& ctx, IR::Inst*, u32 handle, Id address, Id value) {
const auto& spv_buffer = ctx.buffers[handle]; const auto& spv_buffer = ctx.buffers[handle];
if (Sirit::ValidId(spv_buffer.offset)) { if (const Id offset = spv_buffer.Offset(PointerSize::B64); Sirit::ValidId(offset)) {
address = ctx.OpIAdd(ctx.U32[1], address, spv_buffer.offset); address = ctx.OpIAdd(ctx.U32[1], address, offset);
} }
const auto [id, pointer_type] = spv_buffer[PointerType::U64]; const auto [id, pointer_type] = spv_buffer.Alias(PointerType::U64);
const Id index = ctx.OpShiftRightLogical(ctx.U32[1], address, ctx.ConstU32(3u)); const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, address)};
const Id ptr{ctx.OpAccessChain(pointer_type, id, ctx.u64_zero_value, index)}; AccessBoundsCheck<64>(ctx, address, spv_buffer.Size(PointerSize::B64), [&] {
AccessBoundsCheck<64>(ctx, index, spv_buffer.size_qwords, [&] {
ctx.OpStore(ptr, value); ctx.OpStore(ptr, value);
return Id{}; return Id{};
}); });

View file

@ -71,7 +71,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf
Bindings& binding_) Bindings& binding_)
: Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_}, : Sirit::Module(profile_.supported_spirv), info{info_}, runtime_info{runtime_info_},
profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} { profile{profile_}, stage{info.stage}, l_stage{info.l_stage}, binding{binding_} {
if (info.dma_types != IR::Type::Void) { if (info.uses_dma) {
SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450); SetMemoryModel(spv::AddressingModel::PhysicalStorageBuffer64, spv::MemoryModel::GLSL450);
} else { } else {
SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450); SetMemoryModel(spv::AddressingModel::Logical, spv::MemoryModel::GLSL450);
@ -169,34 +169,8 @@ void EmitContext::DefineArithmeticTypes() {
if (info.uses_fp64) { if (info.uses_fp64) {
frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64"); frexp_result_f64 = Name(TypeStruct(F64[1], S32[1]), "frexp_result_f64");
} }
if (info.uses_dma) {
if (True(info.dma_types & IR::Type::F64)) { physical_pointer_type_u32 = TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
physical_pointer_types[PointerType::F64] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F64[1]);
}
if (True(info.dma_types & IR::Type::U64)) {
physical_pointer_types[PointerType::U64] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U64);
}
if (True(info.dma_types & IR::Type::F32)) {
physical_pointer_types[PointerType::F32] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F32[1]);
}
if (True(info.dma_types & IR::Type::U32)) {
physical_pointer_types[PointerType::U32] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U32[1]);
}
if (True(info.dma_types & IR::Type::F16)) {
physical_pointer_types[PointerType::F16] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, F16[1]);
}
if (True(info.dma_types & IR::Type::U16)) {
physical_pointer_types[PointerType::U16] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U16);
}
if (True(info.dma_types & IR::Type::U8)) {
physical_pointer_types[PointerType::U8] =
TypePointer(spv::StorageClass::PhysicalStorageBuffer, U8);
} }
} }
@ -239,7 +213,7 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
// Can this be done with memory access? Like we do now with ReadConst // Can this be done with memory access? Like we do now with ReadConst
const auto& srt_flatbuf = buffers[flatbuf_index]; const auto& srt_flatbuf = buffers[flatbuf_index];
ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf); ASSERT(srt_flatbuf.buffer_type == BufferType::Flatbuf);
const auto [id, pointer_type] = srt_flatbuf[PointerType::U32]; const auto [id, pointer_type] = srt_flatbuf.Alias(PointerType::U32);
const auto rsrc1{ const auto rsrc1{
OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))}; OpLoad(U32[1], OpAccessChain(pointer_type, id, u32_zero_value, ConstU32(sharp_idx + 1)))};
@ -255,39 +229,70 @@ Id EmitContext::GetBufferSize(const u32 sharp_idx) {
} }
void EmitContext::DefineBufferProperties() { void EmitContext::DefineBufferProperties() {
if (!profile.needs_buffer_offsets && profile.supports_robust_buffer_access) {
return;
}
for (u32 i = 0; i < buffers.size(); i++) { for (u32 i = 0; i < buffers.size(); i++) {
BufferDefinition& buffer = buffers[i]; auto& buffer = buffers[i];
const auto& desc = info.buffers[i];
const u32 binding = buffer.binding;
if (buffer.buffer_type != BufferType::Guest) { if (buffer.buffer_type != BufferType::Guest) {
continue; continue;
} }
const u32 binding = buffer.binding;
// Only load and apply buffer offsets if host GPU alignment is larger than guest.
if (profile.needs_buffer_offsets) {
const u32 half = PushData::BufOffsetIndex + (binding >> 4); const u32 half = PushData::BufOffsetIndex + (binding >> 4);
const u32 comp = (binding & 0xf) >> 2; const u32 comp = (binding & 0xf) >> 2;
const u32 offset = (binding & 0x3) << 3; const u32 offset = (binding & 0x3) << 3;
const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]), const Id ptr{OpAccessChain(TypePointer(spv::StorageClass::PushConstant, U32[1]),
push_data_block, ConstU32(half), ConstU32(comp))}; push_data_block, ConstU32(half), ConstU32(comp))};
const Id value{OpLoad(U32[1], ptr)}; const Id value{OpLoad(U32[1], ptr)};
buffer.offset = OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U));
Name(buffer.offset, fmt::format("buf{}_off", binding));
buffer.offset_dwords = OpShiftRightLogical(U32[1], buffer.offset, ConstU32(2U));
Name(buffer.offset_dwords, fmt::format("buf{}_dword_off", binding));
// Only need to load size if performing bounds checks and the buffer is both guest and not const Id buf_offset{OpBitFieldUExtract(U32[1], value, ConstU32(offset), ConstU32(8U))};
// inline. Name(buf_offset, fmt::format("buf{}_off", binding));
if (!profile.supports_robust_buffer_access && buffer.buffer_type == BufferType::Guest) { buffer.Offset(PointerSize::B8) = buf_offset;
const BufferResource& desc = info.buffers[i];
if (desc.sharp_idx == std::numeric_limits<u32>::max()) { if (True(desc.used_types & IR::Type::U16)) {
buffer.size = ConstU32(desc.inline_cbuf.GetSize()); const Id buf_word_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(1U))};
} else { Name(buf_word_offset, fmt::format("buf{}_word_off", binding));
buffer.size = GetBufferSize(desc.sharp_idx); buffer.Offset(PointerSize::B16) = buf_word_offset;
}
if (True(desc.used_types & IR::Type::U32)) {
const Id buf_dword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(2U))};
Name(buf_dword_offset, fmt::format("buf{}_dword_off", binding));
buffer.Offset(PointerSize::B32) = buf_dword_offset;
}
if (True(desc.used_types & IR::Type::U64)) {
const Id buf_qword_offset{OpShiftRightLogical(U32[1], buf_offset, ConstU32(3U))};
Name(buf_qword_offset, fmt::format("buf{}_qword_off", binding));
buffer.Offset(PointerSize::B64) = buf_qword_offset;
}
}
// Only load size if performing bounds checks.
if (!profile.supports_robust_buffer_access) {
const Id buf_size{desc.sharp_idx == std::numeric_limits<u32>::max()
? ConstU32(desc.inline_cbuf.GetSize())
: GetBufferSize(desc.sharp_idx)};
Name(buf_size, fmt::format("buf{}_size", binding));
buffer.Size(PointerSize::B8) = buf_size;
if (True(desc.used_types & IR::Type::U16)) {
const Id buf_word_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(1U))};
Name(buf_word_size, fmt::format("buf{}_short_size", binding));
buffer.Size(PointerSize::B16) = buf_word_size;
}
if (True(desc.used_types & IR::Type::U32)) {
const Id buf_dword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(2U))};
Name(buf_dword_size, fmt::format("buf{}_dword_size", binding));
buffer.Size(PointerSize::B32) = buf_dword_size;
}
if (True(desc.used_types & IR::Type::U64)) {
const Id buf_qword_size{OpShiftRightLogical(U32[1], buf_size, ConstU32(3U))};
Name(buf_qword_size, fmt::format("buf{}_qword_size", binding));
buffer.Size(PointerSize::B64) = buf_qword_size;
} }
Name(buffer.size, fmt::format("buf{}_size", binding));
buffer.size_shorts = OpShiftRightLogical(U32[1], buffer.size, ConstU32(1U));
Name(buffer.size_shorts, fmt::format("buf{}_short_size", binding));
buffer.size_dwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(2U));
Name(buffer.size_dwords, fmt::format("buf{}_dword_size", binding));
buffer.size_qwords = OpShiftRightLogical(U32[1], buffer.size, ConstU32(3U));
Name(buffer.size_qwords, fmt::format("buf{}_qword_size", binding));
} }
} }
} }
@ -779,8 +784,7 @@ EmitContext::BufferSpv EmitContext::DefineBuffer(bool is_storage, bool is_writte
}; };
void EmitContext::DefineBuffers() { void EmitContext::DefineBuffers() {
if (!profile.supports_robust_buffer_access && if (!profile.supports_robust_buffer_access && !info.uses_dma) {
info.readconst_types == Info::ReadConstType::None) {
// In case Flatbuf has not already been bound by IR and is needed // In case Flatbuf has not already been bound by IR and is needed
// to query buffer sizes, bind it now. // to query buffer sizes, bind it now.
info.buffers.push_back({ info.buffers.push_back({
@ -809,23 +813,23 @@ void EmitContext::DefineBuffers() {
// Define aliases depending on the shader usage. // Define aliases depending on the shader usage.
auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type); auto& spv_buffer = buffers.emplace_back(binding.buffer++, desc.buffer_type);
if (True(desc.used_types & IR::Type::U64)) { if (True(desc.used_types & IR::Type::U64)) {
spv_buffer[PointerType::U64] = spv_buffer.Alias(PointerType::U64) =
DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64); DefineBuffer(is_storage, desc.is_written, 3, desc.buffer_type, U64);
} }
if (True(desc.used_types & IR::Type::U32)) { if (True(desc.used_types & IR::Type::U32)) {
spv_buffer[PointerType::U32] = spv_buffer.Alias(PointerType::U32) =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]); DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, U32[1]);
} }
if (True(desc.used_types & IR::Type::F32)) { if (True(desc.used_types & IR::Type::F32)) {
spv_buffer[PointerType::F32] = spv_buffer.Alias(PointerType::F32) =
DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]); DefineBuffer(is_storage, desc.is_written, 2, desc.buffer_type, F32[1]);
} }
if (True(desc.used_types & IR::Type::U16)) { if (True(desc.used_types & IR::Type::U16)) {
spv_buffer[PointerType::U16] = spv_buffer.Alias(PointerType::U16) =
DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16); DefineBuffer(is_storage, desc.is_written, 1, desc.buffer_type, U16);
} }
if (True(desc.used_types & IR::Type::U8)) { if (True(desc.used_types & IR::Type::U8)) {
spv_buffer[PointerType::U8] = spv_buffer.Alias(PointerType::U8) =
DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8); DefineBuffer(is_storage, desc.is_written, 0, desc.buffer_type, U8);
} }
++binding.unified; ++binding.unified;
@ -1154,7 +1158,7 @@ Id EmitContext::DefineGetBdaPointer() {
const auto page{OpShiftRightLogical(U64, address, caching_pagebits)}; const auto page{OpShiftRightLogical(U64, address, caching_pagebits)};
const auto page32{OpUConvert(U32[1], page)}; const auto page32{OpUConvert(U32[1], page)};
const auto& bda_buffer{buffers[bda_pagetable_index]}; const auto& bda_buffer{buffers[bda_pagetable_index]};
const auto [bda_buffer_id, bda_pointer_type] = bda_buffer[PointerType::U64]; const auto [bda_buffer_id, bda_pointer_type] = bda_buffer.Alias(PointerType::U64);
const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)}; const auto bda_ptr{OpAccessChain(bda_pointer_type, bda_buffer_id, u32_zero_value, page32)};
const auto bda{OpLoad(U64, bda_ptr)}; const auto bda{OpLoad(U64, bda_ptr)};
@ -1166,14 +1170,14 @@ Id EmitContext::DefineGetBdaPointer() {
// First time acces, mark as fault // First time acces, mark as fault
AddLabel(fault_label); AddLabel(fault_label);
const auto& fault_buffer{buffers[fault_buffer_index]}; const auto& fault_buffer{buffers[fault_buffer_index]};
const auto [fault_buffer_id, fault_pointer_type] = fault_buffer[PointerType::U8]; const auto [fault_buffer_id, fault_pointer_type] = fault_buffer.Alias(PointerType::U32);
const auto page_div8{OpShiftRightLogical(U32[1], page32, ConstU32(3U))}; const auto page_div32{OpShiftRightLogical(U32[1], page32, ConstU32(5U))};
const auto page_mod8{OpBitwiseAnd(U32[1], page32, ConstU32(7U))}; const auto page_mod32{OpBitwiseAnd(U32[1], page32, ConstU32(31U))};
const auto page_mask{OpShiftLeftLogical(U8, u8_one_value, page_mod8)}; const auto page_mask{OpShiftLeftLogical(U32[1], u32_one_value, page_mod32)};
const auto fault_ptr{ const auto fault_ptr{
OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div8)}; OpAccessChain(fault_pointer_type, fault_buffer_id, u32_zero_value, page_div32)};
const auto fault_value{OpLoad(U8, fault_ptr)}; const auto fault_value{OpLoad(U32[1], fault_ptr)};
const auto fault_value_masked{OpBitwiseOr(U8, fault_value, page_mask)}; const auto fault_value_masked{OpBitwiseOr(U32[1], fault_value, page_mask)};
OpStore(fault_ptr, fault_value_masked); OpStore(fault_ptr, fault_value_masked);
// Return null pointer // Return null pointer
@ -1211,14 +1215,15 @@ Id EmitContext::DefineReadConst(bool dynamic) {
const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))}; const auto offset_bytes{OpShiftLeftLogical(U32[1], offset, ConstU32(2U))};
const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))}; const auto addr{OpIAdd(U64, base_addr, OpUConvert(U64, offset_bytes))};
const auto result = EmitMemoryRead(U32[1], addr, [&]() { const auto result = EmitDwordMemoryRead(addr, [&]() {
if (dynamic) { if (dynamic) {
return u32_zero_value; return u32_zero_value;
} else { } else {
const auto& flatbuf_buffer{buffers[flatbuf_index]}; const auto& flatbuf_buffer{buffers[flatbuf_index]};
ASSERT(flatbuf_buffer.binding >= 0 && ASSERT(flatbuf_buffer.binding >= 0 &&
flatbuf_buffer.buffer_type == BufferType::Flatbuf); flatbuf_buffer.buffer_type == BufferType::Flatbuf);
const auto [flatbuf_buffer_id, flatbuf_pointer_type] = flatbuf_buffer[PointerType::U32]; const auto [flatbuf_buffer_id, flatbuf_pointer_type] =
flatbuf_buffer.Alias(PointerType::U32);
const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value, const auto ptr{OpAccessChain(flatbuf_pointer_type, flatbuf_buffer_id, u32_zero_value,
flatbuf_offset)}; flatbuf_offset)};
return OpLoad(U32[1], ptr); return OpLoad(U32[1], ptr);
@ -1239,7 +1244,7 @@ void EmitContext::DefineFunctions() {
uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32"); uf11_to_f32 = DefineUfloatM5ToFloat32(6, "uf11_to_f32");
uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32"); uf10_to_f32 = DefineUfloatM5ToFloat32(5, "uf10_to_f32");
} }
if (info.dma_types != IR::Type::Void) { if (info.uses_dma) {
get_bda_pointer = DefineGetBdaPointer(); get_bda_pointer = DefineGetBdaPointer();
} }

View file

@ -42,17 +42,6 @@ public:
Bindings& binding); Bindings& binding);
~EmitContext(); ~EmitContext();
enum class PointerType : u32 {
U8,
U16,
F16,
U32,
F32,
U64,
F64,
NumAlias,
};
Id Def(const IR::Value& value); Id Def(const IR::Value& value);
void DefineBufferProperties(); void DefineBufferProperties();
@ -155,25 +144,7 @@ public:
return last_label; return last_label;
} }
PointerType PointerTypeFromType(Id type) { Id EmitDwordMemoryRead(Id address, auto&& fallback) {
if (type.value == U8.value)
return PointerType::U8;
if (type.value == U16.value)
return PointerType::U16;
if (type.value == F16[1].value)
return PointerType::F16;
if (type.value == U32[1].value)
return PointerType::U32;
if (type.value == F32[1].value)
return PointerType::F32;
if (type.value == U64.value)
return PointerType::U64;
if (type.value == F64[1].value)
return PointerType::F64;
UNREACHABLE_MSG("Unknown type for pointer");
}
Id EmitMemoryRead(Id type, Id address, auto&& fallback) {
const Id available_label = OpLabel(); const Id available_label = OpLabel();
const Id fallback_label = OpLabel(); const Id fallback_label = OpLabel();
const Id merge_label = OpLabel(); const Id merge_label = OpLabel();
@ -185,10 +156,8 @@ public:
// Available // Available
AddLabel(available_label); AddLabel(available_label);
const auto pointer_type = PointerTypeFromType(type); const Id addr_ptr = OpConvertUToPtr(physical_pointer_type_u32, addr);
const Id pointer_type_id = physical_pointer_types[pointer_type]; const Id result = OpLoad(U32[1], addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
const Id addr_ptr = OpConvertUToPtr(pointer_type_id, addr);
const Id result = OpLoad(type, addr_ptr, spv::MemoryAccessMask::Aligned, 4u);
OpBranch(merge_label); OpBranch(merge_label);
// Fallback // Fallback
@ -199,7 +168,7 @@ public:
// Merge // Merge
AddLabel(merge_label); AddLabel(merge_label);
const Id final_result = const Id final_result =
OpPhi(type, fallback_result, fallback_label, result, available_label); OpPhi(U32[1], fallback_result, fallback_label, result, available_label);
return final_result; return final_result;
} }
@ -314,6 +283,24 @@ public:
bool is_storage = false; bool is_storage = false;
}; };
enum class PointerType : u32 {
U8,
U16,
U32,
F32,
U64,
F64,
NumAlias,
};
enum class PointerSize : u32 {
B8,
B16,
B32,
B64,
NumClass,
};
struct BufferSpv { struct BufferSpv {
Id id; Id id;
Id pointer_type; Id pointer_type;
@ -322,32 +309,23 @@ public:
struct BufferDefinition { struct BufferDefinition {
u32 binding; u32 binding;
BufferType buffer_type; BufferType buffer_type;
Id offset; std::array<Id, u32(PointerSize::NumClass)> offsets;
Id offset_dwords; std::array<Id, u32(PointerSize::NumClass)> sizes;
Id size;
Id size_shorts;
Id size_dwords;
Id size_qwords;
std::array<BufferSpv, u32(PointerType::NumAlias)> aliases; std::array<BufferSpv, u32(PointerType::NumAlias)> aliases;
const BufferSpv& operator[](PointerType alias) const { template <class Self>
return aliases[u32(alias)]; auto& Alias(this Self& self, PointerType alias) {
return self.aliases[u32(alias)];
} }
BufferSpv& operator[](PointerType alias) { template <class Self>
return aliases[u32(alias)]; auto& Offset(this Self& self, PointerSize size) {
} return self.offsets[u32(size)];
};
struct PhysicalPointerTypes {
std::array<Id, u32(PointerType::NumAlias)> types;
const Id& operator[](PointerType type) const {
return types[u32(type)];
} }
Id& operator[](PointerType type) { template <class Self>
return types[u32(type)]; auto& Size(this Self& self, PointerSize size) {
return self.sizes[u32(size)];
} }
}; };
@ -356,12 +334,12 @@ public:
boost::container::small_vector<BufferDefinition, 16> buffers; boost::container::small_vector<BufferDefinition, 16> buffers;
boost::container::small_vector<TextureDefinition, 8> images; boost::container::small_vector<TextureDefinition, 8> images;
boost::container::small_vector<Id, 4> samplers; boost::container::small_vector<Id, 4> samplers;
PhysicalPointerTypes physical_pointer_types;
std::unordered_map<u32, Id> first_to_last_label_map; std::unordered_map<u32, Id> first_to_last_label_map;
size_t flatbuf_index{}; size_t flatbuf_index{};
size_t bda_pagetable_index{}; size_t bda_pagetable_index{};
size_t fault_buffer_index{}; size_t fault_buffer_index{};
Id physical_pointer_type_u32;
Id sampler_type{}; Id sampler_type{};
Id sampler_pointer_type{}; Id sampler_pointer_type{};

View file

@ -1,7 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
#include <bit>
#include "common/assert.h" #include "common/assert.h"
#include "shader_recompiler/frontend/translate/translate.h" #include "shader_recompiler/frontend/translate/translate.h"

View file

@ -238,7 +238,7 @@ struct Info {
Dynamic = 1 << 1, Dynamic = 1 << 1,
}; };
ReadConstType readconst_types{}; ReadConstType readconst_types{};
IR::Type dma_types{IR::Type::Void}; bool uses_dma{false};
explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params) explicit Info(Stage stage_, LogicalStage l_stage_, ShaderParams params)
: stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()}, : stage{stage_}, l_stage{l_stage_}, pgm_hash{params.hash}, pgm_base{params.Base()},

View file

@ -105,6 +105,49 @@ IR::Type BufferDataType(const IR::Inst& inst, AmdGpu::NumberFormat num_format) {
} }
} }
u32 BufferAddressShift(const IR::Inst& inst, AmdGpu::DataFormat data_format) {
switch (inst.GetOpcode()) {
case IR::Opcode::LoadBufferU8:
case IR::Opcode::StoreBufferU8:
return 0;
case IR::Opcode::LoadBufferU16:
case IR::Opcode::StoreBufferU16:
return 1;
case IR::Opcode::LoadBufferU64:
case IR::Opcode::StoreBufferU64:
case IR::Opcode::BufferAtomicIAdd64:
return 3;
case IR::Opcode::LoadBufferFormatF32:
case IR::Opcode::StoreBufferFormatF32: {
switch (data_format) {
case AmdGpu::DataFormat::Format8:
return 0;
case AmdGpu::DataFormat::Format8_8:
case AmdGpu::DataFormat::Format16:
return 1;
case AmdGpu::DataFormat::Format8_8_8_8:
case AmdGpu::DataFormat::Format16_16:
case AmdGpu::DataFormat::Format10_11_11:
case AmdGpu::DataFormat::Format2_10_10_10:
case AmdGpu::DataFormat::Format16_16_16_16:
case AmdGpu::DataFormat::Format32:
case AmdGpu::DataFormat::Format32_32:
case AmdGpu::DataFormat::Format32_32_32:
case AmdGpu::DataFormat::Format32_32_32_32:
return 2;
default:
return 0;
}
break;
}
case IR::Opcode::ReadConstBuffer:
// Provided address is already in dwords
return 0;
default:
return 2;
}
}
bool IsImageAtomicInstruction(const IR::Inst& inst) { bool IsImageAtomicInstruction(const IR::Inst& inst) {
switch (inst.GetOpcode()) { switch (inst.GetOpcode()) {
case IR::Opcode::ImageAtomicIAdd32: case IR::Opcode::ImageAtomicIAdd32:
@ -496,6 +539,22 @@ void PatchDataRingAccess(IR::Block& block, IR::Inst& inst, Info& info, Descripto
IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info, IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const Info& info,
const AmdGpu::Buffer& buffer, u32 stride) { const AmdGpu::Buffer& buffer, u32 stride) {
const auto inst_info = inst.Flags<IR::BufferInstInfo>(); const auto inst_info = inst.Flags<IR::BufferInstInfo>();
const u32 inst_offset = inst_info.inst_offset.Value();
const auto is_inst_typed = inst_info.inst_data_fmt != AmdGpu::DataFormat::FormatInvalid;
const auto data_format = is_inst_typed
? AmdGpu::RemapDataFormat(inst_info.inst_data_fmt.Value())
: buffer.GetDataFmt();
const u32 shift = BufferAddressShift(inst, data_format);
const u32 mask = (1 << shift) - 1;
// If address calculation is of the form "index * const_stride + offset" with offset constant
// and both const_stride and offset are divisible with the element size, apply shift directly.
if (inst_info.index_enable && !inst_info.offset_enable && !buffer.swizzle_enable &&
!buffer.add_tid_enable && (stride & mask) == 0 && (inst_offset & mask) == 0) {
// buffer_offset = index * (const_stride >> shift) + (inst_offset >> shift)
const IR::U32 index = IR::U32{inst.Arg(1)};
return ir.IAdd(ir.IMul(index, ir.Imm32(stride >> shift)), ir.Imm32(inst_offset >> shift));
}
// index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0) // index = (inst_idxen ? vgpr_index : 0) + (const_add_tid_enable ? thread_id[5:0] : 0)
IR::U32 index = ir.Imm32(0U); IR::U32 index = ir.Imm32(0U);
@ -512,7 +571,7 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
index = ir.IAdd(index, thread_id); index = ir.IAdd(index, thread_id);
} }
// offset = (inst_offen ? vgpr_offset : 0) + inst_offset // offset = (inst_offen ? vgpr_offset : 0) + inst_offset
IR::U32 offset = ir.Imm32(inst_info.inst_offset.Value()); IR::U32 offset = ir.Imm32(inst_offset);
if (inst_info.offset_enable) { if (inst_info.offset_enable) {
const IR::U32 vgpr_offset = inst_info.index_enable const IR::U32 vgpr_offset = inst_info.index_enable
? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)} ? IR::U32{ir.CompositeExtract(inst.Arg(1), 1)}
@ -545,6 +604,9 @@ IR::U32 CalculateBufferAddress(IR::IREmitter& ir, const IR::Inst& inst, const In
// buffer_offset = index * const_stride + offset // buffer_offset = index * const_stride + offset
buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset); buffer_offset = ir.IAdd(ir.IMul(index, const_stride), offset);
} }
if (shift != 0) {
buffer_offset = ir.ShiftRightLogical(buffer_offset, ir.Imm32(shift));
}
return buffer_offset; return buffer_offset;
} }

View file

@ -102,7 +102,7 @@ void Visit(Info& info, const IR::Inst& inst) {
info.uses_lane_id = true; info.uses_lane_id = true;
break; break;
case IR::Opcode::ReadConst: case IR::Opcode::ReadConst:
if (info.readconst_types == Info::ReadConstType::None) { if (!info.uses_dma) {
info.buffers.push_back({ info.buffers.push_back({
.used_types = IR::Type::U32, .used_types = IR::Type::U32,
// We can't guarantee that flatbuf will not grow past UBO // We can't guarantee that flatbuf will not grow past UBO
@ -116,7 +116,7 @@ void Visit(Info& info, const IR::Inst& inst) {
} else { } else {
info.readconst_types |= Info::ReadConstType::Dynamic; info.readconst_types |= Info::ReadConstType::Dynamic;
} }
info.dma_types |= IR::Type::U32; info.uses_dma = true;
break; break;
case IR::Opcode::PackUfloat10_11_11: case IR::Opcode::PackUfloat10_11_11:
info.uses_pack_10_11_11 = true; info.uses_pack_10_11_11 = true;
@ -130,21 +130,22 @@ void Visit(Info& info, const IR::Inst& inst) {
} }
void CollectShaderInfoPass(IR::Program& program) { void CollectShaderInfoPass(IR::Program& program) {
auto& info = program.info;
for (IR::Block* const block : program.post_order_blocks) { for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) { for (IR::Inst& inst : block->Instructions()) {
Visit(program.info, inst); Visit(info, inst);
} }
} }
if (program.info.dma_types != IR::Type::Void) { if (info.uses_dma) {
program.info.buffers.push_back({ info.buffers.push_back({
.used_types = IR::Type::U64, .used_types = IR::Type::U64,
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::BDA_PAGETABLE_SIZE), .inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::BDA_PAGETABLE_SIZE),
.buffer_type = BufferType::BdaPagetable, .buffer_type = BufferType::BdaPagetable,
.is_written = true, .is_written = true,
}); });
program.info.buffers.push_back({ info.buffers.push_back({
.used_types = IR::Type::U8, .used_types = IR::Type::U32,
.inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE), .inline_cbuf = AmdGpu::Buffer::Placeholder(VideoCore::BufferCache::FAULT_BUFFER_SIZE),
.buffer_type = BufferType::FaultBuffer, .buffer_type = BufferType::FaultBuffer,
.is_written = true, .is_written = true,

View file

@ -35,7 +35,7 @@ struct Profile {
bool lower_left_origin_mode{}; bool lower_left_origin_mode{};
bool needs_manual_interpolation{}; bool needs_manual_interpolation{};
bool needs_lds_barriers{}; bool needs_lds_barriers{};
u64 min_ssbo_alignment{}; bool needs_buffer_offsets{};
u64 max_ubo_size{}; u64 max_ubo_size{};
u32 max_viewport_width{}; u32 max_viewport_width{};
u32 max_viewport_height{}; u32 max_viewport_height{};

View file

@ -225,6 +225,7 @@ PipelineCache::PipelineCache(const Instance& instance_, Scheduler& scheduler_,
instance.GetDriverID() == vk::DriverId::eNvidiaProprietary, instance.GetDriverID() == vk::DriverId::eNvidiaProprietary,
.needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary || .needs_lds_barriers = instance.GetDriverID() == vk::DriverId::eNvidiaProprietary ||
instance.GetDriverID() == vk::DriverId::eMoltenvk, instance.GetDriverID() == vk::DriverId::eMoltenvk,
.needs_buffer_offsets = instance.StorageMinAlignment() > 4,
// When binding a UBO, we calculate its size considering the offset in the larger buffer // When binding a UBO, we calculate its size considering the offset in the larger buffer
// cache underlying resource. In some cases, it may produce sizes exceeding the system // cache underlying resource. In some cases, it may produce sizes exceeding the system
// maximum allowed UBO range, so we need to reduce the threshold to prevent issues. // maximum allowed UBO range, so we need to reduce the threshold to prevent issues.

View file

@ -468,17 +468,12 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
stage->PushUd(binding, push_data); stage->PushUd(binding, push_data);
BindBuffers(*stage, binding, push_data); BindBuffers(*stage, binding, push_data);
BindTextures(*stage, binding); BindTextures(*stage, binding);
uses_dma |= stage->uses_dma;
uses_dma |= stage->dma_types != Shader::IR::Type::Void;
} }
pipeline->BindResources(set_writes, buffer_barriers, push_data);
if (uses_dma && !fault_process_pending) { if (uses_dma && !fault_process_pending) {
// We only use fault buffer for DMA right now. // We only use fault buffer for DMA right now.
{ {
// TODO: GPU might have written to memory (for example with EVENT_WRITE_EOP)
// we need to account for that and synchronize.
Common::RecursiveSharedLock lock{mapped_ranges_mutex}; Common::RecursiveSharedLock lock{mapped_ranges_mutex};
for (auto& range : mapped_ranges) { for (auto& range : mapped_ranges) {
buffer_cache.SynchronizeBuffersInRange(range.lower(), buffer_cache.SynchronizeBuffersInRange(range.lower(),
@ -490,6 +485,8 @@ bool Rasterizer::BindResources(const Pipeline* pipeline) {
fault_process_pending |= uses_dma; fault_process_pending |= uses_dma;
pipeline->BindResources(set_writes, buffer_barriers, push_data);
return true; return true;
} }